From: <ikr...@us...> - 2012-02-16 19:07:53
|
Revision: 3613 http://archive-access.svn.sourceforge.net/archive-access/?rev=3613&view=rev Author: ikreymer Date: 2012-02-16 19:07:40 +0000 (Thu, 16 Feb 2012) Log Message: ----------- VERSION: Increment to 1.7.1-SNAPSHOT FEATURE: Adding CompositeAccessPoint and related classes (org.archive.wayback.accesspoint) which allow for a single access point to be used with different AccessPointConfigs. This AccessPoint is useful when there is a single WaybackCollection but multiple, possibly dynamic ways to access it. Each config mainly consists mainly of file prefixes that can pass through that access point, as well as user properties. FEATURE: There is also a ProxyAccessPoint and related classes (org.archive.wayback.accesspoint.proxy) which allows Wayback to serve as Proxy on any config that is part of a CompositeAccessPoint. Which access point is the active proxy can be configured a number of ways, including basic authentication, just IP checking or a cookie (not completed in this update). FIXES/UPDATE: FastArchivalUrlReplayParseEventHandler includes support for inserting JSP into <head> block, as well as improved checking for beginning of <body> block when inserting body jsp (including skipping through any <head>, <script> or <style> blocks that may have spurrous tags) Also ability to respond to check rewrite policy from CustomOracleFilter FEATURE: CustomPolicyOracleFilter extends ExclusionOracleFilter to provide custom policies beyond allow, block, robots. The policy is stored in the CaptureSearchResult and may be available during parsing. FIX: JSPExecutor now has getUiResults() ADDED: ArchivalUrlResultURIConverterFactory added for dynamic URI converters that need to change per AccessPointConfig Modified Paths: -------------- trunk/archive-access/projects/wayback/dist/pom.xml trunk/archive-access/projects/wayback/pom.xml trunk/archive-access/projects/wayback/wayback-core/pom.xml trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/oracleclient/OracleExclusionFilter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/FastArchivalUrlReplayParseEventHandler.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/JSPExecutor.java trunk/archive-access/projects/wayback/wayback-hadoop/pom.xml trunk/archive-access/projects/wayback/wayback-hadoop-java/pom.xml trunk/archive-access/projects/wayback/wayback-webapp/pom.xml Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/oracleclient/CustomPolicyOracleFilter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/oracleclient/CustomPolicyOracleFilterFactory.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesspoint/ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesspoint/AccessPointAdapter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesspoint/AccessPointConfig.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesspoint/AccessPointConfigs.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesspoint/CompositeAccessPoint.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesspoint/proxy/ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesspoint/proxy/AuthProxyConfigSelector.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesspoint/proxy/IPProxyConfigSelector.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesspoint/proxy/ProxyAccessPoint.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesspoint/proxy/ProxyConfigSelector.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlResultURIConverterFactory.java Modified: trunk/archive-access/projects/wayback/dist/pom.xml =================================================================== --- trunk/archive-access/projects/wayback/dist/pom.xml 2012-02-07 19:36:59 UTC (rev 3612) +++ trunk/archive-access/projects/wayback/dist/pom.xml 2012-02-16 19:07:40 UTC (rev 3613) @@ -7,7 +7,7 @@ <parent> <artifactId>wayback</artifactId> <groupId>org.archive.wayback</groupId> - <version>1.7.0-SNAPSHOT</version> + <version>1.7.1-SNAPSHOT</version> </parent> <artifactId>dist</artifactId> Modified: trunk/archive-access/projects/wayback/pom.xml =================================================================== --- trunk/archive-access/projects/wayback/pom.xml 2012-02-07 19:36:59 UTC (rev 3612) +++ trunk/archive-access/projects/wayback/pom.xml 2012-02-16 19:07:40 UTC (rev 3613) @@ -7,7 +7,7 @@ <groupId>org.archive.wayback</groupId> <artifactId>wayback</artifactId> <packaging>pom</packaging> - <version>1.7.0-SNAPSHOT</version> + <version>1.7.1-SNAPSHOT</version> <name>Wayback</name> <modules> Modified: trunk/archive-access/projects/wayback/wayback-core/pom.xml =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/pom.xml 2012-02-07 19:36:59 UTC (rev 3612) +++ trunk/archive-access/projects/wayback/wayback-core/pom.xml 2012-02-16 19:07:40 UTC (rev 3613) @@ -8,7 +8,7 @@ <parent> <artifactId>wayback</artifactId> <groupId>org.archive.wayback</groupId> - <version>1.7.0-SNAPSHOT</version> + <version>1.7.1-SNAPSHOT</version> </parent> <artifactId>wayback-core</artifactId> Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/oracleclient/CustomPolicyOracleFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/oracleclient/CustomPolicyOracleFilter.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/oracleclient/CustomPolicyOracleFilter.java 2012-02-16 19:07:40 UTC (rev 3613) @@ -0,0 +1,91 @@ +/* + * Oracle Filter Implementation that supports custom policies in addition to + * allow, block, block-message and robots + * + * The policy is stored in the CaptureSearchResult + */ + +package org.archive.wayback.accesscontrol.oracleclient; + +import java.util.Date; +import java.util.logging.Logger; + +import org.archive.accesscontrol.RobotsUnavailableException; +import org.archive.accesscontrol.RuleOracleUnavailableException; +import org.archive.util.ArchiveUtils; +import org.archive.wayback.core.CaptureSearchResult; + +public class CustomPolicyOracleFilter extends OracleExclusionFilter { + + public static final String CAPTURE_ORACLE_POLICY = "oracle-policy"; + + private static final Logger LOGGER = Logger.getLogger( + CustomPolicyOracleFilter.class.getName()); + + enum Policy { + ALLOW("allow"), + BLOCK_HIDDEN("block"), + BLOCK_MESSAGE("block-message"), + ROBOTS("robots"); + + Policy(String policy) { + this.policy = policy; + } + + boolean matches(String other) + { + return (other.equals(this.policy)); + } + + String policy; + } + + protected int defaultFilter = FILTER_INCLUDE; + + public CustomPolicyOracleFilter(String oracleUrl, String accessGroup, String proxyHostPort) { + super(oracleUrl, accessGroup, proxyHostPort); + } + + @Override + public int filterObject(CaptureSearchResult o) { + String url = o.getOriginalUrl(); + Date captureDate = o.getCaptureDate(); + Date retrievalDate = new Date(); + + String policy; + try { + policy = client.getPolicy(ArchiveUtils.addImpliedHttpIfNecessary(url), captureDate, retrievalDate, accessGroup); + + o.put(CAPTURE_ORACLE_POLICY, policy); + + if (policy == null) { + return defaultFilter; + } + + if (Policy.ALLOW.matches(policy)) { + return handleAllow(); + } + + // Block page but silently, as if it wasn't found + if (Policy.BLOCK_HIDDEN.matches(policy)) { + return FILTER_EXCLUDE; + } + + // Block page bit and display "access blocked" message + if (Policy.BLOCK_MESSAGE.matches(policy)) { + return handleBlock(); + } + + if (Policy.ROBOTS.matches("policy")) { + return handleRobots(); + } + + } catch (RobotsUnavailableException e) { + e.printStackTrace(); + } catch (RuleOracleUnavailableException e) { + LOGGER.warning("Oracle Unavailable/not running, default to allow all until it responds. Details: " + e.toString()); + } + + return defaultFilter; + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/oracleclient/CustomPolicyOracleFilter.java ___________________________________________________________________ Added: svn:mime-type + text/plain Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/oracleclient/CustomPolicyOracleFilterFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/oracleclient/CustomPolicyOracleFilterFactory.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/oracleclient/CustomPolicyOracleFilterFactory.java 2012-02-16 19:07:40 UTC (rev 3613) @@ -0,0 +1,10 @@ +package org.archive.wayback.accesscontrol.oracleclient; +import org.archive.wayback.resourceindex.filters.ExclusionFilter; + +public class CustomPolicyOracleFilterFactory extends OracleExclusionFilterFactory { + + @Override + public ExclusionFilter get() { + return new CustomPolicyOracleFilter(this.getOracleUrl(), this.getAccessGroup(), this.getProxyHostPort()); + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/oracleclient/CustomPolicyOracleFilterFactory.java ___________________________________________________________________ Added: svn:mime-type + text/plain Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/oracleclient/OracleExclusionFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/oracleclient/OracleExclusionFilter.java 2012-02-07 19:36:59 UTC (rev 3612) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/oracleclient/OracleExclusionFilter.java 2012-02-16 19:07:40 UTC (rev 3613) @@ -1,5 +1,4 @@ -/* - * This file is part of the Wayback archival access software +/* This file is part of the Wayback archival access software * (http://archive-access.sourceforge.net/projects/wayback/). * * Licensed to the Internet Archive (IA) by one or more individual @@ -33,8 +32,8 @@ * */ public class OracleExclusionFilter extends ExclusionFilter { - AccessControlClient client = null; - private String accessGroup = null; + protected AccessControlClient client = null; + protected String accessGroup = null; private final static String POLICY_ALLOW = "allow"; private final static String POLICY_BLOCK = "block"; @@ -70,7 +69,57 @@ this.accessGroup = accessGroup; } + protected int handleAllow() + { + if(!notifiedAdminSeen) { + notifiedAdminSeen = true; + if(filterGroup != null) { + filterGroup.setSawAdministrative(); + } + } + if(!notifiedAdminPassed) { + notifiedAdminPassed = true; + if(filterGroup != null) { + filterGroup.setPassedAdministrative(); + } + } + return FILTER_INCLUDE; + } + protected int handleBlock() + { + if(!notifiedAdminSeen) { + notifiedAdminSeen = true; + if(filterGroup != null) { + filterGroup.setSawAdministrative(); + } + } + return FILTER_EXCLUDE; + } + + protected int handleRobots() + { + if(!notifiedRobotSeen) { + notifiedRobotSeen = true; + if(filterGroup != null) { + filterGroup.setSawRobots(); + } + } + return FILTER_INCLUDE; +// if(robotFilter != null) { +// if(!notifiedRobotPassed) { +// notifiedRobotPassed = true; +// if(filterGroup != null) { +// filterGroup.setPassedRobot(); +// } +// } +// return robotFilter.filterObject(o); +// } else { +// return FILTER_EXCLUDE; +// } + } + + public int filterObject(CaptureSearchResult o) { String url = o.getOriginalUrl(); Date captureDate = o.getCaptureDate(); @@ -82,46 +131,11 @@ accessGroup); if(policy != null) { if(policy.equals(POLICY_ALLOW)) { - if(!notifiedAdminSeen) { - notifiedAdminSeen = true; - if(filterGroup != null) { - filterGroup.setSawAdministrative(); - } - } - if(!notifiedAdminPassed) { - notifiedAdminPassed = true; - if(filterGroup != null) { - filterGroup.setPassedAdministrative(); - } - } - return FILTER_INCLUDE; + return handleAllow(); } else if(policy.equals(POLICY_BLOCK)) { - if(!notifiedAdminSeen) { - notifiedAdminSeen = true; - if(filterGroup != null) { - filterGroup.setSawAdministrative(); - } - } - return FILTER_EXCLUDE; + return handleBlock(); } else if(policy.equals(POLICY_ROBOT)) { - if(!notifiedRobotSeen) { - notifiedRobotSeen = true; - if(filterGroup != null) { - filterGroup.setSawRobots(); - } - } - return FILTER_INCLUDE; -// if(robotFilter != null) { -// if(!notifiedRobotPassed) { -// notifiedRobotPassed = true; -// if(filterGroup != null) { -// filterGroup.setPassedRobot(); -// } -// } -// return robotFilter.filterObject(o); -// } else { -// return FILTER_EXCLUDE; -// } + return handleRobots(); } } } catch (RobotsUnavailableException e) { Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesspoint/AccessPointAdapter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesspoint/AccessPointAdapter.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesspoint/AccessPointAdapter.java 2012-02-16 19:07:40 UTC (rev 3613) @@ -0,0 +1,325 @@ +/* + * This file is part of the Wayback archival access software + * (http://archive-access.sourceforge.net/projects/wayback/). + * + * Licensed to the Internet Archive (IA) by one or more individual + * contributors. + * + * The IA licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.archive.wayback.accesspoint; + +import java.util.ArrayList; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Properties; + +import javax.servlet.ServletContext; + +import org.archive.wayback.ExceptionRenderer; +import org.archive.wayback.QueryRenderer; +import org.archive.wayback.ReplayDispatcher; +import org.archive.wayback.RequestParser; +import org.archive.wayback.ResultURIConverter; +import org.archive.wayback.accesscontrol.CompositeExclusionFilterFactory; +import org.archive.wayback.accesscontrol.ExclusionFilterFactory; +import org.archive.wayback.accesscontrol.oracleclient.CustomPolicyOracleFilter; +import org.archive.wayback.accesspoint.proxy.ProxyAccessPoint; +import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.replay.html.ContextResultURIConverterFactory; +import org.archive.wayback.resourceindex.filters.ExclusionFilter; +import org.archive.wayback.util.operator.BooleanOperator; +import org.archive.wayback.webapp.AccessPoint; +import org.archive.wayback.webapp.CustomResultFilterFactory; +import org.archive.wayback.webapp.WaybackCollection; + +public class AccessPointAdapter extends AccessPoint { + + private CompositeAccessPoint baseAccessPoint; + private AccessPointConfig config; + private ExclusionFilterFactory exclusionFactory; + private ResultURIConverter cacheUriConverter; + + private boolean switchable = false; + + private class DynamicExclusionFactory implements ExclusionFilterFactory + { + public ExclusionFilter get() + { + return new CustomPolicyOracleFilter(baseAccessPoint.getOracleUrl(), config.getBeanName(), null); + } + + public void shutdown() { + + } + } + + public AccessPointAdapter(CompositeAccessPoint baseAccessPoint, AccessPointConfig config) + { + this.baseAccessPoint = baseAccessPoint; + this.config = config; + this.exclusionFactory = null; + + this.switchable = true; + } + + public AccessPointAdapter(String accessPointName, CompositeAccessPoint baseAccessPoint) + { + this.baseAccessPoint = baseAccessPoint; + this.exclusionFactory = null; + this.config = baseAccessPoint.getAccessPointConfigs().getAccessPointConfigs().get(accessPointName); + + this.switchable = false; + } + + public CompositeAccessPoint getBaseAccessPoint() + { + return baseAccessPoint; + } + + public boolean isProxyMode() + { + return (baseAccessPoint instanceof ProxyAccessPoint); + } + + public boolean isProxySwitchable() + { + return switchable && isProxyMode(); + } + + public String getSwitchCollPath() + { + return ProxyAccessPoint.SWITCH_COLLECTION_PATH; + } + + public AccessPointConfig getAccessPointConfig() + { + return config; + } + + public Map<String, Object> getUserProps() + { + return baseAccessPoint.getUserProps(); + } + + @Override + public List<String> getFileIncludePrefixes() { + return config.getFileIncludePrefixes(); + } + + @Override + public List<String> getFileExcludePrefixes() { + return config.getFileExcludePrefixes(); + } + + @Override + public Properties getConfigs() { + // TODO Auto-generated method stub + return config.getConfigs(); + } + + @Override + public ExclusionFilterFactory getExclusionFactory() { + if (exclusionFactory == null) { + exclusionFactory = buildExclusionFactory(); + } + + return exclusionFactory; + } + + protected ExclusionFilterFactory buildExclusionFactory() + { + ArrayList<ExclusionFilterFactory> staticExclusions = baseAccessPoint.getStaticExclusions(); + + if (staticExclusions == null) { + return new DynamicExclusionFactory(); + } else { + CompositeExclusionFilterFactory composite = new CompositeExclusionFilterFactory(); + ArrayList<ExclusionFilterFactory> allExclusions = new ArrayList<ExclusionFilterFactory>(); + allExclusions.addAll(staticExclusions); + allExclusions.add(new DynamicExclusionFactory()); + composite.setFactories(allExclusions); + return composite; + } + } + + protected String getPrefix(String basePrefix) + { + if (isProxyMode()) { + return basePrefix; + } else { + return basePrefix + config.getBeanName() + "/"; + } + } + + @Override + public String getStaticPrefix() { + // TODO Auto-generated method stub + return getPrefix(baseAccessPoint.getStaticPrefix()); + } + + @Override + public String getReplayPrefix() { + // TODO Auto-generated method stub + return getPrefix(baseAccessPoint.getReplayPrefix()); + } + + @Override + public String getQueryPrefix() { + // TODO Auto-generated method stub + return getPrefix(baseAccessPoint.getQueryPrefix()); + } + + @Override + public boolean isExactHostMatch() { + // TODO Auto-generated method stub + return baseAccessPoint.isExactHostMatch(); + } + + @Override + public boolean isExactSchemeMatch() { + // TODO Auto-generated method stub + return baseAccessPoint.isExactSchemeMatch(); + } + + @Override + public boolean isUseAnchorWindow() { + // TODO Auto-generated method stub + return baseAccessPoint.isUseAnchorWindow(); + } + + @Override + public boolean isServeStatic() { + // TODO Auto-generated method stub + return baseAccessPoint.isServeStatic(); + } + + @Override + public ServletContext getServletContext() { + return baseAccessPoint.getServletContext(); + } + + @Override + public String getLiveWebPrefix() { + // TODO Auto-generated method stub + return baseAccessPoint.getLiveWebPrefix(); + } + + @Override + public String getInterstitialJsp() { + // TODO Auto-generated method stub + return baseAccessPoint.getInterstitialJsp(); + } + + @Override + public Locale getLocale() { + // TODO Auto-generated method stub + return baseAccessPoint.getLocale(); + } + + @Override + public List<String> getFilePatterns() { + // TODO Auto-generated method stub + return baseAccessPoint.getFilePatterns(); + } + + @Override + public WaybackCollection getCollection() { + // TODO Auto-generated method stub + return baseAccessPoint.getCollection(); + } + + @Override + public ExceptionRenderer getException() { + // TODO Auto-generated method stub + return baseAccessPoint.getException(); + } + + @Override + public QueryRenderer getQuery() { + // TODO Auto-generated method stub + return baseAccessPoint.getQuery(); + } + + @Override + public RequestParser getParser() { + // TODO Auto-generated method stub + return baseAccessPoint.getParser(); + } + + @Override + public ReplayDispatcher getReplay() { + // TODO Auto-generated method stub + return baseAccessPoint.getReplay(); + } + + @Override + public ResultURIConverter getUriConverter() { + + if (cacheUriConverter == null) { + ContextResultURIConverterFactory factory = baseAccessPoint.getUriConverterFactory(); + + if (factory != null) { + cacheUriConverter = factory.getContextConverter(getReplayPrefix()); + } else { + cacheUriConverter = baseAccessPoint.getUriConverter(); + } + } + + return cacheUriConverter; + } + + @Override + public BooleanOperator<WaybackRequest> getAuthentication() { + // TODO Auto-generated method stub + return baseAccessPoint.getAuthentication(); + } + + @Override + public String getRefererAuth() { + // TODO Auto-generated method stub + return baseAccessPoint.getRefererAuth(); + } + + @Override + public boolean isBounceToReplayPrefix() { + // TODO Auto-generated method stub + return baseAccessPoint.isBounceToReplayPrefix(); + } + + @Override + public boolean isBounceToQueryPrefix() { + // TODO Auto-generated method stub + return baseAccessPoint.isBounceToQueryPrefix(); + } + + @Override + public long getEmbargoMS() { + // TODO Auto-generated method stub + return baseAccessPoint.getEmbargoMS(); + } + + @Override + public boolean isForceCleanQueries() { + // TODO Auto-generated method stub + return baseAccessPoint.isForceCleanQueries(); + } + + @Override + public CustomResultFilterFactory getFilterFactory() { + // TODO Auto-generated method stub + return baseAccessPoint.getFilterFactory(); + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesspoint/AccessPointAdapter.java ___________________________________________________________________ Added: svn:mime-type + text/plain Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesspoint/AccessPointConfig.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesspoint/AccessPointConfig.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesspoint/AccessPointConfig.java 2012-02-16 19:07:40 UTC (rev 3613) @@ -0,0 +1,41 @@ +package org.archive.wayback.accesspoint; + +import java.util.List; +import java.util.Properties; + +import org.springframework.beans.factory.BeanNameAware; + +public class AccessPointConfig implements BeanNameAware { + + private Properties configs = null; + private List<String> fileIncludePrefixes = null; + private List<String> fileExcludePrefixes = null; + private String beanName; + + public Properties getConfigs() { + return configs; + } + public void setConfigs(Properties configs) { + this.configs = configs; + } + public List<String> getFileIncludePrefixes() { + return fileIncludePrefixes; + } + public void setFileIncludePrefixes(List<String> fileIncludePrefixes) { + this.fileIncludePrefixes = fileIncludePrefixes; + } + public List<String> getFileExcludePrefixes() { + return fileExcludePrefixes; + } + public void setFileExcludePrefixes(List<String> fileExcludePrefixes) { + this.fileExcludePrefixes = fileExcludePrefixes; + } + + public void setBeanName(String beanName) { + this.beanName = beanName; + } + + public String getBeanName() { + return this.beanName; + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesspoint/AccessPointConfig.java ___________________________________________________________________ Added: svn:mime-type + text/plain Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesspoint/AccessPointConfigs.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesspoint/AccessPointConfigs.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesspoint/AccessPointConfigs.java 2012-02-16 19:07:40 UTC (rev 3613) @@ -0,0 +1,34 @@ +/* + * This file is part of the Wayback archival access software + * (http://archive-access.sourceforge.net/projects/wayback/). + * + * Licensed to the Internet Archive (IA) by one or more individual + * contributors. + * + * The IA licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.archive.wayback.accesspoint; + +import java.util.HashMap; + +public class AccessPointConfigs { + private HashMap<String, AccessPointConfig> configs; + + public HashMap<String, AccessPointConfig> getAccessPointConfigs() { + return configs; + } + public void setAccessPointConfigs(HashMap<String, AccessPointConfig> configs) { + this.configs = configs; + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesspoint/AccessPointConfigs.java ___________________________________________________________________ Added: svn:mime-type + text/plain Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesspoint/CompositeAccessPoint.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesspoint/CompositeAccessPoint.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesspoint/CompositeAccessPoint.java 2012-02-16 19:07:40 UTC (rev 3613) @@ -0,0 +1,162 @@ +/* + * This file is part of the Wayback archival access software + * (http://archive-access.sourceforge.net/projects/wayback/). + * + * Licensed to the Internet Archive (IA) by one or more individual + * contributors. + * + * The IA licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.archive.wayback.accesspoint; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Map; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.archive.wayback.accesscontrol.ExclusionFilterFactory; +import org.archive.wayback.replay.html.ContextResultURIConverterFactory; +import org.archive.wayback.webapp.AccessPoint; + +public class CompositeAccessPoint extends AccessPoint { + + protected final static String REQUEST_CONTEXT_PREFIX = + "webapp-request-context-path-prefix"; + + protected enum Status + { + ConfigNotFound, + ConfigHandled, + ConfigNotHandled, + } + + private HashMap<String, AccessPointAdapter> accessPointCache; + + public CompositeAccessPoint() + { + accessPointCache = new HashMap<String, AccessPointAdapter>(); + } + + @Override + public boolean handleRequest(HttpServletRequest request, + HttpServletResponse response) throws ServletException, + IOException { + + String configName = request.getRequestURI(); + + if (!configName.isEmpty() && (configName.charAt(0) == '/')) { + configName = configName.substring(1); + } + + int slash = configName.indexOf('/'); + + if (slash >= 0) { + configName = configName.substring(0, slash); + } + + request.setAttribute(REQUEST_CONTEXT_PREFIX, "/" + configName + "/"); + + Status status = handleRequest(configName, request, response); + return (status == Status.ConfigHandled); + } + + protected Status handleRequest(String realAccessPoint, HttpServletRequest request, + HttpServletResponse response) throws ServletException, + IOException { + + // First, check cached accessPoint + AccessPointAdapter adapter = accessPointCache.get(realAccessPoint); + + if ((adapter == null) && (accessPointConfigs != null)) { + AccessPointConfig config = accessPointConfigs.getAccessPointConfigs().get(realAccessPoint); + + if (config != null) { + adapter = new AccessPointAdapter(this, config); + accessPointCache.put(realAccessPoint, adapter); + } + } + + if (adapter == null) { + return Status.ConfigNotFound; + } + + boolean handled = adapter.handleRequest(request, response); + return (handled ? Status.ConfigHandled : Status.ConfigNotHandled); + } + + private String oracleUrl; + private ArrayList<ExclusionFilterFactory> staticExclusions; + + private ContextResultURIConverterFactory uriConverterFactory; + + public ContextResultURIConverterFactory getUriConverterFactory() { + return uriConverterFactory; + } + + public void setUriConverterFactory(ContextResultURIConverterFactory uriConverterFactory) { + this.uriConverterFactory = uriConverterFactory; + } + + public ArrayList<ExclusionFilterFactory> getStaticExclusions() { + return staticExclusions; + } + + public void setStaticExclusions( + ArrayList<ExclusionFilterFactory> staticExclusions) { + this.staticExclusions = staticExclusions; + } + + private Map<String, Object> userProps; + + private AccessPointConfigs accessPointConfigs; + + public String getOracleUrl() { + return oracleUrl; + } + public void setOracleUrl(String oracleUrl) { + this.oracleUrl = oracleUrl; + } + public Map<String, Object> getUserProps() { + return userProps; + } + + public void setUserProps(Map<String, Object> userProps) { + this.userProps = userProps; + } + + public AccessPointConfigs getAccessPointConfigs() { + return accessPointConfigs; + } + + public void setAccessPointConfigs(AccessPointConfigs accessPointConfigs) { + this.accessPointConfigs = accessPointConfigs; + } + + public AccessPointConfig findConfigForFile(String file) + { + for (AccessPointConfig config : accessPointConfigs.getAccessPointConfigs().values()) { + for (String prefix : config.getFileIncludePrefixes()) { + if (file.startsWith(prefix)) { + return config; + } + } + } + + return null; + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesspoint/CompositeAccessPoint.java ___________________________________________________________________ Added: svn:mime-type + text/plain Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesspoint/proxy/AuthProxyConfigSelector.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesspoint/proxy/AuthProxyConfigSelector.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesspoint/proxy/AuthProxyConfigSelector.java 2012-02-16 19:07:40 UTC (rev 3613) @@ -0,0 +1,142 @@ +/* + * This file is part of the Wayback archival access software + * (http://archive-access.sourceforge.net/projects/wayback/). + * + * Licensed to the Internet Archive (IA) by one or more individual + * contributors. + * + * The IA licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.archive.wayback.accesspoint.proxy; + +import java.io.IOException; +import java.io.PrintWriter; +import java.io.UnsupportedEncodingException; + +import javax.servlet.RequestDispatcher; +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; +import javax.servlet.http.HttpSession; + +import org.apache.commons.codec.binary.Base64; +import org.archive.wayback.replay.StringHttpServletResponseWrapper; + +public class AuthProxyConfigSelector implements ProxyConfigSelector { + + public final static String PROXY_REFERRER_KEY = "wayback-wombat-proxy-referrer"; + + private String proxyInfoJsp = "/WEB-INF/replay/ProxyInfo.jsp"; + + private String authMsg = "Please enter the collection number to see Wayback content from that collection. (You can leave the password blank)"; + + public String getProxyInfoJsp() { + return proxyInfoJsp; + } + + public void setProxyInfoJsp(String proxyInfoJsp) { + this.proxyInfoJsp = proxyInfoJsp; + } + + public String getAuthMsg() { + return authMsg; + } + + public void setAuthMsg(String authMsg) { + this.authMsg = authMsg; + } + + public String resolveConfig(HttpServletRequest request) { + String authenticate = request.getHeader("Proxy-Authorization"); + + if (authenticate != null) { + String auth = decodeBasic(authenticate); + if (auth != null) { + int userEnd = auth.indexOf(':'); + return auth.substring(0, userEnd); + } + } + + return null; + } + + public boolean selectConfigHandler(HttpServletRequest request, HttpServletResponse response, ProxyAccessPoint proxy) throws IOException + { + response.setStatus(HttpServletResponse.SC_PROXY_AUTHENTICATION_REQUIRED); //407 + response.setHeader("Proxy-Authenticate", "Basic realm=\"" + authMsg + "\""); + response.setContentType("text/html"); + + //TODO: Better way to pass this to jsp? + request.setAttribute("proxyAccessPoint", proxy); + + StringHttpServletResponseWrapper wrappedResponse = + new StringHttpServletResponseWrapper(response); + RequestDispatcher dispatcher = request.getRequestDispatcher(proxyInfoJsp); + + try { + dispatcher.forward(request, wrappedResponse); + } catch (ServletException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + PrintWriter writer = response.getWriter(); + writer.println(wrappedResponse.getStringResponse()); + return true; + } + + private String decodeBasic(String authHeaderValue) { + if(authHeaderValue != null) { + if(authHeaderValue.startsWith("Basic ")) { + String b64 = authHeaderValue.substring(6); + byte[] decoded = Base64.decodeBase64(b64.getBytes()); + try { + return new String(decoded,"utf-8"); + } catch (UnsupportedEncodingException e) { + // really?... + return new String(decoded); + } + } + } + return null; + + } + + public void handleSwitch(HttpServletRequest request, + HttpServletResponse response, ProxyAccessPoint proxy) throws IOException { + + // Check reset cookie... + HttpSession sess = request.getSession(); + String referrer = (String)sess.getAttribute(PROXY_REFERRER_KEY); + + // If referrer not set, we're sending the switch request + if (referrer == null) { + String httpReferrer = request.getHeader("Referer"); + if (httpReferrer == null) { + httpReferrer = proxy.getReplayPrefix(); + } + sess.setAttribute(PROXY_REFERRER_KEY, httpReferrer); + + selectConfigHandler(request, response, proxy); + } else { + sess.removeAttribute(PROXY_REFERRER_KEY); + response.sendRedirect(referrer); + } + } + + public void handleProxyPac(HttpServletRequest httpRequest, + HttpServletResponse httpResponse) { + //No Special Handling for Proxy Pac request + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesspoint/proxy/AuthProxyConfigSelector.java ___________________________________________________________________ Added: svn:mime-type + text/plain Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesspoint/proxy/IPProxyConfigSelector.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesspoint/proxy/IPProxyConfigSelector.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesspoint/proxy/IPProxyConfigSelector.java 2012-02-16 19:07:40 UTC (rev 3613) @@ -0,0 +1,116 @@ +/* + * This file is part of the Wayback archival access software + * (http://archive-access.sourceforge.net/projects/wayback/). + * + * Licensed to the Internet Archive (IA) by one or more individual + * contributors. + * + * The IA licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.archive.wayback.accesspoint.proxy; + +import java.io.IOException; + +import javax.servlet.RequestDispatcher; +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.archive.wayback.util.bdb.BDBMap; + +public class IPProxyConfigSelector implements ProxyConfigSelector { + + protected String proxyInfoJsp = "/WEB-INF/replay/ProxyInfo.jsp"; + + public String resolveConfig(HttpServletRequest request) { + String context = request.getContextPath(); + BDBMap bdbMap = BDBMap.getContextMap(context); + + String key = genKey(request); + String coll = bdbMap.get(key); + return coll; + } + + protected String genKey(HttpServletRequest request) + { + return request.getRemoteAddr() + "$coll"; + } + + public boolean selectConfigHandler(HttpServletRequest request, + HttpServletResponse response, ProxyAccessPoint proxy) throws IOException { + + request.setAttribute("proxyAccessPoint", proxy); + + RequestDispatcher dispatcher = request.getRequestDispatcher(proxyInfoJsp); + + try { + dispatcher.forward(request, response); + } catch (ServletException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + return true; + } + + public void handleSwitch(HttpServletRequest request, + HttpServletResponse response, ProxyAccessPoint proxy) + throws IOException { + + String config = request.getParameter("config"); + + if (config == null) { + selectConfigHandler(request, response, proxy); + return; + } + + setConfig(request, config); + + String referrer = request.getHeader("Referer"); + if (referrer == null) { + referrer = proxy.getReplayPrefix(); + } + response.sendRedirect(referrer); + } + + protected void setConfig(HttpServletRequest request, String config) + { + String context = request.getContextPath(); + BDBMap bdbMap = BDBMap.getContextMap(context); + + String key = genKey(request); + bdbMap.put(key, config); + } + + public void handleProxyPac(HttpServletRequest httpRequest, + HttpServletResponse httpResponse) { + + String uri = httpRequest.getRequestURI(); + int pacStrIndex = uri.indexOf(ProxyAccessPoint.PROXY_PAC_PATH); + + if (pacStrIndex >= 0) { + String config = uri.substring(1, pacStrIndex); + //System.out.println("config: " + config); + setConfig(httpRequest, config); + } + } + + public String getProxyInfoJsp() { + return proxyInfoJsp; + } + + public void setProxyInfoJsp(String proxyInfoJsp) { + this.proxyInfoJsp = proxyInfoJsp; + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesspoint/proxy/IPProxyConfigSelector.java ___________________________________________________________________ Added: svn:mime-type + text/plain Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesspoint/proxy/ProxyAccessPoint.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesspoint/proxy/ProxyAccessPoint.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesspoint/proxy/ProxyAccessPoint.java 2012-02-16 19:07:40 UTC (rev 3613) @@ -0,0 +1,190 @@ +/* + * This file is part of the Wayback archival access software + * (http://archive-access.sourceforge.net/projects/wayback/). + * + * Licensed to the Internet Archive (IA) by one or more individual + * contributors. + * + * The IA licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.archive.wayback.accesspoint.proxy; + +import java.io.IOException; +import java.io.PrintWriter; +import java.util.List; +import java.util.logging.Logger; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.archive.wayback.accesspoint.CompositeAccessPoint; +import org.archive.wayback.webapp.AccessPoint; + +public class ProxyAccessPoint extends CompositeAccessPoint { + + private static final Logger LOGGER = + Logger.getLogger(ProxyAccessPoint.class.getName()); + + public final static String SWITCH_COLLECTION_PATH = "switchCollection"; + public final static String PROXY_PAC_PATH = "/proxy.pac"; + + private List<String> directHosts; + private AccessPoint nonProxyAccessPoint; + + private ProxyConfigSelector configSelector; + + public ProxyConfigSelector getConfigSelector() { + return configSelector; + } + + public void setConfigSelector(ProxyConfigSelector configSelector) { + this.configSelector = configSelector; + } + + public List<String> getDirectHosts() { + return directHosts; + } + + public void setDirectHosts(List<String> directHosts) { + this.directHosts = directHosts; + } + + public AccessPoint getNonProxyAccessPoint() { + return nonProxyAccessPoint; + } + + public void setNonProxyAccessPoint(AccessPoint nonProxyAccessPoint) { + this.nonProxyAccessPoint = nonProxyAccessPoint; + } + + @Override + public boolean handleRequest(HttpServletRequest request, + HttpServletResponse response) throws ServletException, + IOException { + + boolean isProxyReq = (request.getHeader("Proxy-Connection") != null); + + if (!isProxyReq) { + return handleNonProxy(request, response); + } else { + return handleProxy(request, response); + } + } + + protected boolean handleNonProxy(HttpServletRequest request, + HttpServletResponse response) throws ServletException, IOException + { + String uri = request.getRequestURI(); + + if (uri.endsWith(PROXY_PAC_PATH)) { + this.writeProxyPac(request, response); + return true; + } + + return baseHandleRequest(request, response); + } + + protected boolean handleProxy(HttpServletRequest request, + HttpServletResponse response) throws ServletException, IOException + { + StringBuffer urlBuff = request.getRequestURL(); + String url = urlBuff.toString(); + + boolean isProxyHost = url.startsWith(getReplayPrefix()); + + if (isProxyHost) { + // Special reset link + if (url.endsWith(SWITCH_COLLECTION_PATH)) { + configSelector.handleSwitch(request, response, this); + return true; + } + } + + String realAccessPoint = configSelector.resolveConfig(request); + + if (realAccessPoint != null) { + + // See if the archival url form was included and redirect to strip it + if (isProxyHost) { + String prefix = "/" + realAccessPoint + "/"; + String uri = request.getRequestURI(); + + + if (uri.length() > prefix.length()) { + String requestUrl = uri.substring(prefix.length()); + + // If matches this config, simply redirect and strip + if (uri.startsWith(prefix)) { + response.sendRedirect("/" + requestUrl); + return true; + } + } + + //If archival url with any *different* config, force a selection + //if (ReplayRequestParser.WB_REQUEST_REGEX.matcher(requestUrl).matches()) { + // return configSelector.selectConfigHandler(request, response, this); + //} + } + + Status status = handleRequest(realAccessPoint, request, response); + + switch (status) { + case ConfigHandled: + return true; + + case ConfigNotHandled: + return false; + + case ConfigNotFound: + break; + } + } + + return configSelector.selectConfigHandler(request, response, this); + } + + protected boolean baseHandleRequest(HttpServletRequest request, + HttpServletResponse response) throws ServletException, IOException { + + if (nonProxyAccessPoint != null) { + return nonProxyAccessPoint.handleRequest(request, response); + } else { + return super.handleRequest(request, response); + } + } + + protected void writeProxyPac(HttpServletRequest httpRequest, + HttpServletResponse httpResponse) throws ServletException, + IOException { + + configSelector.handleProxyPac(httpRequest, httpResponse); + + String hostName = httpRequest.getServerName(); + int port = httpRequest.getServerPort(); + + httpResponse.setContentType("application/x-ns-proxy-autoconfig"); + + LOGGER.fine("updating proxy .pac"); + + PrintWriter writer = httpResponse.getWriter(); + writer.println("function FindProxyForURL (url, host) {"); + + for (String host : directHosts) { + writer.println(" if (shExpMatch(host, \"" + host + "\")) { return \"DIRECT\"; }"); + } + + writer.println(" return \"PROXY " + hostName + ":" + port + "\";\n}"); + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesspoint/proxy/ProxyAccessPoint.java ___________________________________________________________________ Added: svn:mime-type + text/plain Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesspoint/proxy/ProxyConfigSelector.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesspoint/proxy/ProxyConfigSelector.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesspoint/proxy/ProxyConfigSelector.java 2012-02-16 19:07:40 UTC (rev 3613) @@ -0,0 +1,36 @@ +/* + * This file is part of the Wayback archival access software + * (http://archive-access.sourceforge.net/projects/wayback/). + * + * Licensed to the Internet Archive (IA) by one or more individual + * contributors. + * + * The IA licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.archive.wayback.accesspoint.proxy; + +import java.io.IOException; + +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +public interface ProxyConfigSelector { + String resolveConfig(HttpServletRequest request); + + boolean selectConfigHandler(HttpServletRequest request, HttpServletResponse response, ProxyAccessPoint proxy) throws IOException; + + void handleSwitch(HttpServletRequest request, HttpServletResponse response, ProxyAccessPoint proxy) throws IOException; + + void handleProxyPac(HttpServletRequest httpRequest, HttpServletResponse httpResponse); +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesspoint/proxy/ProxyConfigSelector.java ___________________________________________________________________ Added: svn:mime-type + text/plain Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlResultURIConverterFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlResultURIConverterFactory.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlResultURIConverterFactory.java 2012-02-16 19:07:40 UTC (rev 3613) @@ -0,0 +1,35 @@ +/* + * This file is part of the Wayback archival access software + * (http://archive-access.sourceforge.net/projects/wayback/). + * + * Licensed to the Internet Archive (IA) by one or more individual + * contributors. + * + * The IA licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.archive.wayback.archivalurl; + +import org.archive.wayback.ResultURIConverter; +import org.archive.wayback.replay.html.ContextResultURIConverterFactory; + +public class ArchivalUrlResultURIConverterFactory implements + ContextResultURIConverterFactory { + + public ResultURIConverter getContextConverter(String flags) { + ArchivalUrlResultURIConverter converter = new ArchivalUrlResultURIConverter(); + converter.setReplayURIPrefix(flags); + return converter; + } + +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlResultURIConverterFactory.java ___________________________________________________________________ Added: svn:mime-type + text/plain Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/FastArchivalUrlReplayParseEventHandler.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/FastArchivalUrlReplayParseEventHandler.java 2012-02-07 19:36:59 UTC (rev 3612) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/FastArchivalUrlReplayParseEventHandler.java 2012-02-16 19:07:40 UTC (rev 3613) @@ -23,11 +23,11 @@ import java.io.OutputStream; import java.net.MalformedURLException; import java.net.URL; -import java.nio.charset.Charset; import java.util.HashMap; import javax.servlet.ServletException; +import org.archive.wayback.accesscontrol.oracleclient.CustomPolicyOracleFilter; import org.archive.wayback.replay.html.ReplayParseContext; import org.archive.wayback.replay.html.StringTransformer; import org.archive.wayback.replay.html.transformer.BlockCSSStringTransformer; @@ -39,6 +39,7 @@ import org.archive.wayback.util.htmllex.ParseContext; import org.archive.wayback.util.htmllex.ParseEventHandler; import org.htmlparser.Node; +import org.htmlparser.nodes.RemarkNode; import org.htmlparser.nodes.TagNode; import org.htmlparser.nodes.TextNode; @@ -55,6 +56,8 @@ public final static String FERRET_DONE_KEY = FastArchivalUrlReplayParseEventHandler.class.toString(); + + protected final static String FERRET_IN_HEAD = "FERRET_IN_HEAD"; private String jspInsertPath = "/WEB-INF/replay/DisclaimChooser.jsp"; private String endJsp = "/WEB-INF/replay/ArchiveComment.jsp"; @@ -67,6 +70,8 @@ private final static String FRAMESET_TAG = "FRAMESET"; private final static String BODY_TAG = "BODY"; + protected static final String FERRET_HEAD_INSERTED = "FERRET_HEAD_INSERTED"; + private BlockCSSStringTransformer cssBlockTrans = new BlockCSSStringTransformer(); private InlineCSSStringTransformer cssInlineTrans = @@ -76,6 +81,9 @@ private MetaRefreshUrlStringTransformer metaRefreshTrans = new MetaRefreshUrlStringTransformer(); private URLStringTransformer anchorUrlTrans = new URLStringTransformer(); + + protected String headInsertJsp = null; + // static { // anchorUrlTrans = new URLStringTransformer(); // anchorUrlTrans.setJsTransformer(jsBlockTrans); @@ -105,8 +113,8 @@ throws IOException { ReplayParseContext context = (ReplayParseContext) pContext; if(NodeUtils.isRemarkNode(node)) { -// RemarkNode remarkNode = (RemarkNode) node; -// handleRemarkTextNode(context,remarkNode); + RemarkNode remarkNode = (RemarkNode) node; + remarkNode.setText(jsBlockTrans.transform(context, remarkNode.getText())); emit(context,null,node,null); } else if(NodeUtils.isTextNode(node)) { @@ -122,8 +130,19 @@ } } else if(NodeUtils.isTagNode(node)) { TagNode tagNode = (TagNode) node; - if(tagNode.isEndTag()) { - emit(context,null,tagNode,null); + + if (NodeUtils.isOpenTagNodeNamed(tagNode, NodeUtils.SCRIPT_TAG_NAME)) { + handleJSIncludeNode(context, tagNode); + } else if(tagNode.isEndTag()) { + + if (tagNode.getTagName().equals("HEAD")) { + context.putData(FERRET_IN_HEAD, null); + } + + if (checkAllowTag(pContext, tagNode)) { + emit(context,null,tagNode,null); + } + // handleCloseTagNode(context,tagNode); } else { // assume start, possibly empty: @@ -152,17 +171,64 @@ textNode.setText(jsBlockTrans.transform(context, textNode.getText())); emit(context,null,textNode,null); } + + private void handleJSIncludeNode(ReplayParseContext context, TagNode tagNode) throws IOException { + String file = tagNode.getAttribute("SRC"); + if (file != null) { + //TODO: This is hacky.. fix it + // This is used to check if the file should be skipped... + //from a custom rule.. + String result = jsBlockTrans.transform(context, file); + //The rewriting is done by the js_ rewriter + if ((result != null) && !result.isEmpty()) { + tagNode.setAttribute("SRC", jsUrlTrans.transform(context, file)); + } else { + file = ""; + tagNode.setAttribute("SRC", jsUrlTrans.transform(context, file)); + } + } + + emit(context,null,tagNode,null); + } private void handleOpenTagNode(ReplayParseContext context, TagNode tagNode) throws IOException { boolean insertedJsp = context.getData(FERRET_DONE_KEY) != null; + String preEmit = null; String postEmit = null; String tagName = tagNode.getTagName(); + + boolean alreadyInsertedHead = (context.getData(FERRET_HEAD_INSERTED) != null); + + if (!alreadyInsertedHead) { + // If we're at the beginning of a <head> tag, and haven't inserted yet, + // insert right AFTER head tag + if (tagName.equals("HEAD")) { + emitHeadInsert(context, tagNode, true); + context.putData(FERRET_IN_HEAD, FERRET_IN_HEAD); + return; + } + + + // If we're at the beginning of any tag, other than <html>, + // (including <body>) and haven't inserted yet, + // insert right BEFORE the next tag, also continue other default processing + // of the tag + if (!tagName.equals("HTML") && !tagName.equals("!DOCTYPE")) { + emitHeadInsert(context, null, false); + // Don't return continue to further processing + } + } + + + boolean inHead = (context.getData(FERRET_IN_HEAD) != null); + // Time to insert the JSP header? - if(!insertedJsp) { + //IK added check to avoid inserting inside css or script + if(!insertedJsp && !context.isInCSS() && !context.isInScriptText() && !inHead) { if(!okHeadTagMap.containsKey(tagName)) { if(tagName.equals(FRAMESET_TAG)) { // don't put the insert in framsets: @@ -189,6 +255,7 @@ context.putData(FERRET_DONE_KEY,""); } } + // now do all the usual attribute rewriting: // this could be slightly optimized by moving tags more likely to occur // to the front of the if/else if/else if routing... @@ -255,6 +322,10 @@ } else if(tagName.equals("SCRIPT")) { transformAttr(context, tagNode, "SRC", jsUrlTrans); + } else { + if (!checkAllowTag(context, tagNode)) { + return; + } } // now, for *all* tags... transformAttr(context,tagNode,"BACKGROUND", imageUrlTrans); @@ -263,8 +334,31 @@ emit(context,preEmit,tagNode,postEmit); } - - private void emit(ReplayParseContext context, String pre, Node node, + + protected boolean checkAllowTag(ParseContext context, TagNode tagNode) + { + String tagName = tagNode.getTagName(); + + // Check the NOSCRIPT tag, if force-noscript is set, + // then skip the NOSCRIPT tags and include contents explicitly + if (tagName.equals("NOSCRIPT")) { + String allPolic... [truncated message content] |