From: <bra...@us...> - 2011-02-06 14:31:52
|
Revision: 3388 http://archive-access.svn.sourceforge.net/archive-access/?rev=3388&view=rev Author: bradtofel Date: 2011-02-06 14:31:46 +0000 (Sun, 06 Feb 2011) Log Message: ----------- Now blocks from excludes prior to lookup Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/LiveWebAccessPoint.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/LiveWebAccessPoint.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/LiveWebAccessPoint.java 2011-02-06 14:31:04 UTC (rev 3387) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/LiveWebAccessPoint.java 2011-02-06 14:31:46 UTC (rev 3388) @@ -22,6 +22,7 @@ import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; +import java.util.logging.Logger; import javax.servlet.ServletException; import javax.servlet.http.HttpServletRequest; @@ -29,9 +30,11 @@ import org.archive.io.arc.ARCRecord; import org.archive.wayback.accesscontrol.robotstxt.RobotExclusionFilterFactory; +import org.archive.wayback.accesscontrol.staticmap.StaticMapExclusionFilterFactory; import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.core.CaptureSearchResults; import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.exception.AdministrativeAccessControlException; import org.archive.wayback.exception.BadQueryException; import org.archive.wayback.exception.ResourceNotInArchiveException; import org.archive.wayback.exception.RobotAccessControlException; @@ -39,6 +42,7 @@ import org.archive.wayback.liveweb.LiveWebCache; import org.archive.wayback.resourceindex.filters.ExclusionFilter; import org.archive.wayback.resourcestore.resourcefile.ArcResource; +import org.archive.wayback.util.url.UrlOperations; import org.archive.wayback.util.webapp.AbstractRequestHandler; /** @@ -49,9 +53,14 @@ * */ public class LiveWebAccessPoint extends AbstractRequestHandler { + private static final Logger LOGGER = Logger.getLogger( + LiveWebAccessPoint.class.getName()); + private AccessPoint inner = null; private LiveWebCache cache = null; private RobotExclusionFilterFactory robotFactory = null; + private StaticMapExclusionFilterFactory adminFactory = null; + private long maxCacheMS = 86400000; public boolean handleRequest(HttpServletRequest httpRequest, @@ -59,7 +68,7 @@ throws ServletException, IOException { String urlString = translateRequestPathQuery(httpRequest); - + urlString = UrlOperations.fixupHTTPUrlWithOneSlash(urlString); boolean handled = true; WaybackRequest wbRequest = new WaybackRequest(); wbRequest.setAccessPoint(inner); @@ -84,6 +93,17 @@ throw new RobotAccessControlException(urlString + "is blocked by robots.txt"); } } + if(adminFactory != null) { + ExclusionFilter f = adminFactory.get(); + if(f == null) { + LOGGER.severe("Unable to get administrative exclusion filter!"); + throw new AdministrativeAccessControlException(urlString + "is blocked."); + } + int ruling = f.filterObject(result); + if(ruling == ExclusionFilter.FILTER_EXCLUDE) { + throw new AdministrativeAccessControlException(urlString + "is blocked."); + } + } // no robots check, or robots.txt says GO: ArcResource r = (ArcResource) cache.getCachedResource(url, maxCacheMS , false); ARCRecord ar = (ARCRecord) r.getArcRecord(); @@ -151,4 +171,12 @@ public void setInner(AccessPoint inner) { this.inner = inner; } + + public StaticMapExclusionFilterFactory getAdminFactory() { + return adminFactory; + } + + public void setAdminFactory(StaticMapExclusionFilterFactory adminFactory) { + this.adminFactory = adminFactory; + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |