From: <bra...@us...> - 2010-03-26 03:22:11
|
Revision: 3006 http://archive-access.svn.sourceforge.net/archive-access/?rev=3006&view=rev Author: bradtofel Date: 2010-03-26 03:22:05 +0000 (Fri, 26 Mar 2010) Log Message: ----------- FEATURE: Added new subclasses of AccessControlException, Robot and Administrative... Required defining the new abstract ExclusionFilter, which is aware of the ExclusionFilterGroup, so it can aprise the filtergroup of blocked and passed robots and admin filtering. All Exclusion-related Filters now have to extend the ExclusionFilter.. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/CompositeExclusionFilterFactory.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/ExclusionFilterFactory.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/oracleclient/OracleExclusionFilter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/oracleclient/OracleExclusionFilterFactory.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/remote/RemoteExclusionFilter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/remote/RemoteExclusionFilterFactory.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilterFactory.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilterFactory.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/WaybackRequest.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filterfactory/ExclusionCaptureFilterGroup.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filterfactory/WindowFilterGroup.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/CompositeExclusionFilter.java Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/AdminstrativeAccessControlException.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/RobotAccessControlException.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/ExclusionFilter.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/CompositeExclusionFilterFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/CompositeExclusionFilterFactory.java 2010-03-24 01:08:53 UTC (rev 3005) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/CompositeExclusionFilterFactory.java 2010-03-26 03:22:05 UTC (rev 3006) @@ -29,6 +29,7 @@ import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.resourceindex.filters.CompositeExclusionFilter; +import org.archive.wayback.resourceindex.filters.ExclusionFilter; import org.archive.wayback.util.ObjectFilter; /** @@ -54,7 +55,7 @@ /* (non-Javadoc) * @see org.archive.wayback.resourceindex.ExclusionFilterFactory#get() */ - public ObjectFilter<CaptureSearchResult> get() { + public ExclusionFilter get() { Iterator<ExclusionFilterFactory> itr = factories.iterator(); CompositeExclusionFilter filter = new CompositeExclusionFilter(); while(itr.hasNext()) { Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/ExclusionFilterFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/ExclusionFilterFactory.java 2010-03-24 01:08:53 UTC (rev 3005) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/ExclusionFilterFactory.java 2010-03-26 03:22:05 UTC (rev 3006) @@ -25,6 +25,7 @@ package org.archive.wayback.accesscontrol; import org.archive.wayback.core.CaptureSearchResult; +import org.archive.wayback.resourceindex.filters.ExclusionFilter; import org.archive.wayback.util.ObjectFilter; /** * @@ -37,7 +38,7 @@ * @return an ObjectFilter object that filters records based on * some set of exclusion rules */ - public ObjectFilter<CaptureSearchResult> get(); + public ExclusionFilter get(); /** * close any resources used by this ExclusionFilter system. */ Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/oracleclient/OracleExclusionFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/oracleclient/OracleExclusionFilter.java 2010-03-24 01:08:53 UTC (rev 3005) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/oracleclient/OracleExclusionFilter.java 2010-03-26 03:22:05 UTC (rev 3006) @@ -31,21 +31,25 @@ import org.archive.accesscontrol.RuleOracleUnavailableException; import org.archive.util.ArchiveUtils; import org.archive.wayback.core.CaptureSearchResult; +import org.archive.wayback.resourceindex.filters.ExclusionFilter; import org.archive.wayback.util.ObjectFilter; /** * @author brad * */ -public class OracleExclusionFilter implements ObjectFilter<CaptureSearchResult> { +public class OracleExclusionFilter extends ExclusionFilter { AccessControlClient client = null; private String accessGroup = null; private final static String POLICY_ALLOW = "allow"; private final static String POLICY_BLOCK = "block"; private final static String POLICY_ROBOT = "robots"; + private boolean notifiedRobotSeen = false; + private boolean notifiedRobotPassed = false; + private boolean notifiedAdminSeen = false; + private boolean notifiedAdminPassed = false; - /** * @param oracleUrl String URL prefix for the Oracle HTTP server * @param accessGroup String group to use with requests to the Oracle @@ -84,12 +88,32 @@ accessGroup); if(policy != null) { if(policy.equals(POLICY_ALLOW)) { + if(!notifiedAdminSeen) { + notifiedAdminSeen = true; + filterGroup.setSawAdministrative(); + } + if(!notifiedAdminPassed) { + notifiedAdminPassed = true; + filterGroup.setPassedAdministrative(); + } return FILTER_INCLUDE; } else if(policy.equals(POLICY_BLOCK)) { + if(!notifiedAdminSeen) { + notifiedAdminSeen = true; + filterGroup.setSawAdministrative(); + } return FILTER_EXCLUDE; } else if(policy.equals(POLICY_ROBOT)) { + if(!notifiedRobotSeen) { + notifiedRobotSeen = true; + filterGroup.setSawRobots(); + } return FILTER_INCLUDE; // if(robotFilter != null) { +// if(!notifiedRobotPassed) { +// notifiedRobotPassed = true; +// filterGroup.setPassedRobot(); +// } // return robotFilter.filterObject(o); // } else { // return FILTER_EXCLUDE; Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/oracleclient/OracleExclusionFilterFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/oracleclient/OracleExclusionFilterFactory.java 2010-03-24 01:08:53 UTC (rev 3005) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/oracleclient/OracleExclusionFilterFactory.java 2010-03-26 03:22:05 UTC (rev 3006) @@ -26,6 +26,7 @@ import org.archive.wayback.accesscontrol.ExclusionFilterFactory; import org.archive.wayback.core.CaptureSearchResult; +import org.archive.wayback.resourceindex.filters.ExclusionFilter; import org.archive.wayback.util.ObjectFilter; /** @@ -40,7 +41,7 @@ private String accessGroup = null; private String proxyHostPort = null; - public ObjectFilter<CaptureSearchResult> get() { + public ExclusionFilter get() { OracleExclusionFilter filter = new OracleExclusionFilter(oracleUrl, accessGroup, proxyHostPort); return filter; Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/remote/RemoteExclusionFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/remote/RemoteExclusionFilter.java 2010-03-24 01:08:53 UTC (rev 3005) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/remote/RemoteExclusionFilter.java 2010-03-26 03:22:05 UTC (rev 3006) @@ -33,7 +33,7 @@ import org.apache.log4j.Logger; import org.archive.wayback.core.CaptureSearchResult; -import org.archive.wayback.util.ObjectFilter; +import org.archive.wayback.resourceindex.filters.ExclusionFilter; /** * SearchResultFilter which uses remote access control/exclusion service to @@ -43,7 +43,7 @@ * @author brad * @version $Date$, $Revision$ */ -public class RemoteExclusionFilter implements ObjectFilter<CaptureSearchResult> { +public class RemoteExclusionFilter extends ExclusionFilter { private static final Logger LOGGER = Logger.getLogger(RemoteExclusionFilter.class .getName()); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/remote/RemoteExclusionFilterFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/remote/RemoteExclusionFilterFactory.java 2010-03-24 01:08:53 UTC (rev 3005) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/remote/RemoteExclusionFilterFactory.java 2010-03-26 03:22:05 UTC (rev 3006) @@ -26,6 +26,7 @@ import org.archive.wayback.accesscontrol.ExclusionFilterFactory; import org.archive.wayback.core.CaptureSearchResult; +import org.archive.wayback.resourceindex.filters.ExclusionFilter; import org.archive.wayback.util.ObjectFilter; /** @@ -44,7 +45,7 @@ /* (non-Javadoc) * @see org.archive.wayback.resourceindex.ExclusionFilterFactory#get() */ - public ObjectFilter<CaptureSearchResult> get() { + public ExclusionFilter get() { return new RemoteExclusionFilter(exclusionUrlPrefix, exclusionUserAgent); } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilter.java 2010-03-24 01:08:53 UTC (rev 3005) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilter.java 2010-03-26 03:22:05 UTC (rev 3006) @@ -41,6 +41,7 @@ import org.archive.wayback.exception.LiveDocumentNotAvailableException; import org.archive.wayback.exception.LiveWebCacheUnavailableException; import org.archive.wayback.liveweb.LiveWebCache; +import org.archive.wayback.resourceindex.filters.ExclusionFilter; import org.archive.wayback.util.ObjectFilter; /** @@ -58,7 +59,7 @@ * @author brad * @version $Date$, $Revision$ */ -public class RobotExclusionFilter implements ObjectFilter<CaptureSearchResult> { +public class RobotExclusionFilter extends ExclusionFilter { private final static Logger LOGGER = Logger.getLogger(RobotExclusionFilter.class.getName()); @@ -74,6 +75,8 @@ private String userAgent = null; private StringBuilder sb = null; private final static RobotRules emptyRules = new RobotRules(); + private boolean notifiedSeen = false; + private boolean notifiedPassed = false; /** * Construct a new RobotExclusionFilter that uses webCache to pull @@ -226,7 +229,10 @@ * @see org.archive.wayback.resourceindex.SearchResultFilter#filterSearchResult(org.archive.wayback.core.SearchResult) */ public int filterObject(CaptureSearchResult r) { - + if(!notifiedSeen) { + filterGroup.setSawRobots(); + notifiedSeen = true; + } int filterResult = ObjectFilter.FILTER_EXCLUDE; RobotRules rules = getRules(r); if(rules != null) { @@ -235,6 +241,10 @@ try { url = new URL(ArchiveUtils.addImpliedHttpIfNecessary(resultURL)); if(!rules.blocksPathForUA(url.getPath(), userAgent)) { + if(!notifiedPassed) { + filterGroup.setPassedRobots(); + notifiedPassed = true; + } filterResult = ObjectFilter.FILTER_INCLUDE; LOGGER.fine("ROBOT: ALLOWED("+resultURL+")"); } else { Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilterFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilterFactory.java 2010-03-24 01:08:53 UTC (rev 3005) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilterFactory.java 2010-03-26 03:22:05 UTC (rev 3006) @@ -27,6 +27,7 @@ import org.archive.wayback.accesscontrol.ExclusionFilterFactory; import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.liveweb.LiveWebCache; +import org.archive.wayback.resourceindex.filters.ExclusionFilter; import org.archive.wayback.util.ObjectFilter; /** @@ -44,7 +45,7 @@ /* (non-Javadoc) * @see org.archive.wayback.resourceindex.ExclusionFilterFactory#get() */ - public ObjectFilter<CaptureSearchResult> get() { + public ExclusionFilter get() { return new RobotExclusionFilter(webCache,userAgent,maxCacheMS); } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilter.java 2010-03-24 01:08:53 UTC (rev 3005) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilter.java 2010-03-26 03:22:05 UTC (rev 3006) @@ -30,6 +30,7 @@ import org.apache.commons.httpclient.URIException; import org.apache.log4j.Logger; import org.archive.wayback.core.CaptureSearchResult; +import org.archive.wayback.resourceindex.filters.ExclusionFilter; import org.archive.wayback.surt.SURTTokenizer; import org.archive.wayback.util.ObjectFilter; @@ -39,12 +40,14 @@ * @author brad * @version $Date$, $Revision$ */ -public class StaticMapExclusionFilter implements ObjectFilter<CaptureSearchResult> { +public class StaticMapExclusionFilter extends ExclusionFilter { private static final Logger LOGGER = Logger.getLogger( StaticMapExclusionFilter.class.getName()); private String lastChecked = null; private boolean lastCheckedExcluded = false; + private boolean notifiedSeen = false; + private boolean notifiedPassed = false; Map<String,Object> exclusionMap = null; /** * @param map where each String key is a SURT that is blocked. @@ -77,18 +80,33 @@ * @see org.archive.wayback.resourceindex.SearchResultFilter#filterSearchResult(org.archive.wayback.core.SearchResult) */ public int filterObject(CaptureSearchResult r) { + if(!notifiedSeen) { + filterGroup.setSawAdministrative(); + notifiedSeen = true; + } String url = r.getOriginalUrl(); if(lastChecked != null) { if(lastChecked.equals(url)) { - return lastCheckedExcluded ? - ObjectFilter.FILTER_EXCLUDE : - ObjectFilter.FILTER_INCLUDE; + if(lastCheckedExcluded) { + return ObjectFilter.FILTER_EXCLUDE; + } else { + // don't need to: already did last time... + //filterGroup.setPassedAdministrative(); + return ObjectFilter.FILTER_INCLUDE; + } } } lastChecked = url; lastCheckedExcluded = isExcluded(url); - return lastCheckedExcluded ? - ObjectFilter.FILTER_EXCLUDE : - ObjectFilter.FILTER_INCLUDE; + if(lastCheckedExcluded) { + return ObjectFilter.FILTER_EXCLUDE; + } else { + if(!notifiedPassed) { + filterGroup.setPassedAdministrative(); + notifiedPassed = true; + } + return ObjectFilter.FILTER_INCLUDE; + } + } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilterFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilterFactory.java 2010-03-24 01:08:53 UTC (rev 3005) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilterFactory.java 2010-03-26 03:22:05 UTC (rev 3006) @@ -33,6 +33,7 @@ import org.apache.log4j.Logger; import org.archive.wayback.accesscontrol.ExclusionFilterFactory; import org.archive.wayback.core.CaptureSearchResult; +import org.archive.wayback.resourceindex.filters.ExclusionFilter; import org.archive.wayback.surt.SURTTokenizer; import org.archive.wayback.util.CloseableIterator; import org.archive.wayback.util.ObjectFilter; @@ -110,7 +111,7 @@ * @return ObjectFilter which blocks CaptureSearchResults in the * exclusion file. */ - public ObjectFilter<CaptureSearchResult> get() { + public ExclusionFilter get() { if(currentMap == null) { return null; } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/WaybackRequest.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/WaybackRequest.java 2010-03-24 01:08:53 UTC (rev 3005) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/WaybackRequest.java 2010-03-26 03:22:05 UTC (rev 3006) @@ -36,6 +36,7 @@ import javax.servlet.http.HttpServletRequest; import org.archive.wayback.requestparser.OpenSearchRequestParser; +import org.archive.wayback.resourceindex.filters.ExclusionFilter; import org.archive.wayback.resourceindex.filters.HostMatchFilter; import org.archive.wayback.resourceindex.filters.SchemeMatchFilter; import org.archive.wayback.util.ObjectFilter; @@ -84,7 +85,7 @@ * resultFilters, if these filters redact all results, then an * AccessControlException will be thrown. */ - private ObjectFilter<CaptureSearchResult> exclusionFilter = null; + private ExclusionFilter exclusionFilter = null; /** * custom CaptureSearchResult Filter to use for this specific request. Can @@ -486,11 +487,11 @@ this.accessPoint = accessPoint; } - public ObjectFilter<CaptureSearchResult> getExclusionFilter() { + public ExclusionFilter getExclusionFilter() { return exclusionFilter; } - public void setExclusionFilter(ObjectFilter<CaptureSearchResult> exclusionFilter) { + public void setExclusionFilter(ExclusionFilter exclusionFilter) { this.exclusionFilter = exclusionFilter; } Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/AdminstrativeAccessControlException.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/AdminstrativeAccessControlException.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/AdminstrativeAccessControlException.java 2010-03-26 03:22:05 UTC (rev 3006) @@ -0,0 +1,40 @@ +/* AdminstrativeAccessControlException + * + * $Id$: + * + * Created on Mar 25, 2010. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +package org.archive.wayback.exception; + +/** + * @author brad + * + */ +public class AdminstrativeAccessControlException extends AccessControlException { + + /** + * @param message + */ + public AdminstrativeAccessControlException(String message) { + super("Blocked Site Error",message); + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/AdminstrativeAccessControlException.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/RobotAccessControlException.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/RobotAccessControlException.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/RobotAccessControlException.java 2010-03-26 03:22:05 UTC (rev 3006) @@ -0,0 +1,40 @@ +/* RobotAccessControlException + * + * $Id$: + * + * Created on Mar 25, 2010. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +package org.archive.wayback.exception; + +/** + * @author brad + * + */ +public class RobotAccessControlException extends AccessControlException { + + /** + * @param message + */ + public RobotAccessControlException(String message) { + super("Blocked By Robots",message); + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/RobotAccessControlException.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filterfactory/ExclusionCaptureFilterGroup.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filterfactory/ExclusionCaptureFilterGroup.java 2010-03-24 01:08:53 UTC (rev 3005) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filterfactory/ExclusionCaptureFilterGroup.java 2010-03-26 03:22:05 UTC (rev 3006) @@ -30,8 +30,11 @@ import org.archive.wayback.core.SearchResults; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.AccessControlException; +import org.archive.wayback.exception.AdminstrativeAccessControlException; import org.archive.wayback.exception.ResourceNotInArchiveException; +import org.archive.wayback.exception.RobotAccessControlException; import org.archive.wayback.resourceindex.filters.CounterFilter; +import org.archive.wayback.resourceindex.filters.ExclusionFilter; import org.archive.wayback.util.ObjectFilter; import org.archive.wayback.util.ObjectFilterChain; @@ -41,23 +44,27 @@ private CounterFilter preCounter = null; private CounterFilter postCounter = null; String requestUrl = null; + private boolean sawRobots = false; + private boolean passedRobots = false; + private boolean sawAdministrative = false; + private boolean passedAdministrative = false; public ExclusionCaptureFilterGroup(WaybackRequest request) { // checks an exclusion service for every matching record - ObjectFilter<CaptureSearchResult> exclusion = - request.getExclusionFilter(); + ExclusionFilter exclusion = request.getExclusionFilter(); chain = new ObjectFilterChain<CaptureSearchResult>(); if(exclusion != null) { - preCounter = new CounterFilter(); - // count how many results got to the ExclusionFilter: - chain.addFilter(preCounter); + exclusion.setFilterGroup(this); +// preCounter = new CounterFilter(); +// // count how many results got to the ExclusionFilter: +// chain.addFilter(preCounter); chain.addFilter(exclusion); // count how many results got past the ExclusionFilter: requestUrl = request.getRequestUrl(); } - postCounter = new CounterFilter(); - chain.addFilter(postCounter); +// postCounter = new CounterFilter(); +// chain.addFilter(postCounter); } public List<ObjectFilter<CaptureSearchResult>> getFilters() { @@ -66,20 +73,38 @@ public void annotateResults(SearchResults results) throws AccessControlException, ResourceNotInArchiveException { - if(postCounter.getNumMatched() == 0) { + if(sawRobots && !passedRobots) { + throw new RobotAccessControlException("The URL " + requestUrl + + " is blocked by the sites robots.txt file"); + } + if(sawAdministrative && !passedAdministrative) { + throw new AdminstrativeAccessControlException(requestUrl + + " is not available in the Wayback Machine."); + } - // nothing got to the counter after exclusions. If we have - // exclusions (detected by preCounter being non-null, and the - // preCounter passed any results, then they were all filtered by - // the exclusions filter. - if(preCounter != null && preCounter.getNumMatched() > 0) { - throw new AccessControlException("All results Excluded"); - } - ResourceNotInArchiveException e = - new ResourceNotInArchiveException("the URL " + requestUrl - + " is not in the archive."); - e.setCloseMatches(results.getCloseMatches()); - throw e; - } +// if(postCounter.getNumMatched() == 0) { +// +// // nothing got to the counter after exclusions. If we have +// // exclusions (detected by preCounter being non-null, and the +// // preCounter passed any results, then they were all filtered by +// // the exclusions filter. +// if(preCounter != null && preCounter.getNumMatched() > 0) { +// throw new AccessControlException("All results Excluded"); +// } +// } } + + public void setPassedRobots() { + passedRobots = true; + } + public void setSawRobots() { + sawRobots = true; + } + + public void setPassedAdministrative() { + passedAdministrative = true; + } + public void setSawAdministrative() { + sawAdministrative = true; + } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filterfactory/WindowFilterGroup.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filterfactory/WindowFilterGroup.java 2010-03-24 01:08:53 UTC (rev 3005) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filterfactory/WindowFilterGroup.java 2010-03-26 03:22:05 UTC (rev 3006) @@ -29,6 +29,7 @@ import org.archive.wayback.core.SearchResults; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.BadQueryException; +import org.archive.wayback.exception.ResourceNotInArchiveException; import org.archive.wayback.resourceindex.LocalResourceIndex; import org.archive.wayback.resourceindex.filters.WindowEndFilter; import org.archive.wayback.resourceindex.filters.WindowStartFilter; @@ -42,9 +43,10 @@ ObjectFilterChain<T> windowFilters; WindowStartFilter<T> startFilter; WindowEndFilter<T> endFilter; + private String requestUrl = null; public WindowFilterGroup(WaybackRequest request, LocalResourceIndex index) throws BadQueryException { - + requestUrl = request.getRequestUrl(); windowFilters = new ObjectFilterChain<T>(); // first grab all the info from the WaybackRequest, and validate it: resultsPerPage = request.getResultsPerPage(); @@ -71,13 +73,24 @@ } public void annotateResults(SearchResults results) - throws BadQueryException { + throws BadQueryException, ResourceNotInArchiveException { results.setFirstReturned(startResult); results.setNumRequested(resultsPerPage); + int startSeen = startFilter.getNumSeen(); + if(startSeen == 0) { + ResourceNotInArchiveException e = + new ResourceNotInArchiveException("the URL " + requestUrl + + " is not in the archive."); + e.setCloseMatches(results.getCloseMatches()); + throw e; + } + int numSeen = endFilter.getNumSeen(); if(numSeen == 0) { throw new BadQueryException("No results in requested window"); } + + // how many went by the filters: results.setMatchingCount(startFilter.getNumSeen()); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/CompositeExclusionFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/CompositeExclusionFilter.java 2010-03-24 01:08:53 UTC (rev 3005) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/CompositeExclusionFilter.java 2010-03-26 03:22:05 UTC (rev 3006) @@ -28,6 +28,7 @@ import java.util.Iterator; import org.archive.wayback.core.CaptureSearchResult; +import org.archive.wayback.resourceindex.filterfactory.ExclusionCaptureFilterGroup; import org.archive.wayback.util.ObjectFilter; /** @@ -38,22 +39,29 @@ * @author brad * @version $Date$, $Revision$ */ -public class CompositeExclusionFilter implements ObjectFilter<CaptureSearchResult> { +public class CompositeExclusionFilter extends ExclusionFilter { + //implements ObjectFilter<CaptureSearchResult> { - private ArrayList<ObjectFilter<CaptureSearchResult>> filters = - new ArrayList<ObjectFilter<CaptureSearchResult>>(); + private ArrayList<ExclusionFilter> filters = + new ArrayList<ExclusionFilter>(); /** * @param filter to be added to the composite. */ - public void addComponent(ObjectFilter<CaptureSearchResult> filter) { + public void addComponent(ExclusionFilter filter) { filters.add(filter); } + public void setFilterGroup(ExclusionCaptureFilterGroup filterGroup) { + this.filterGroup = filterGroup; + for(ExclusionFilter filter : filters) { + filter.setFilterGroup(filterGroup); + } + } /* (non-Javadoc) * @see org.archive.wayback.resourceindex.SearchResultFilter#filterSearchResult(org.archive.wayback.core.SearchResult) */ public int filterObject(CaptureSearchResult r) { - Iterator<ObjectFilter<CaptureSearchResult>> itr = filters.iterator(); + Iterator<ExclusionFilter> itr = filters.iterator(); while(itr.hasNext()) { ObjectFilter<CaptureSearchResult> filter = itr.next(); if(filter == null) { Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/ExclusionFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/ExclusionFilter.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/ExclusionFilter.java 2010-03-26 03:22:05 UTC (rev 3006) @@ -0,0 +1,41 @@ +/* ExclusionFilter + * + * $Id$: + * + * Created on Mar 25, 2010. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +package org.archive.wayback.resourceindex.filters; + +import org.archive.wayback.core.CaptureSearchResult; +import org.archive.wayback.resourceindex.filterfactory.ExclusionCaptureFilterGroup; +import org.archive.wayback.util.ObjectFilter; + +/** + * @author brad + * + */ +public abstract class ExclusionFilter implements ObjectFilter<CaptureSearchResult> { + protected ExclusionCaptureFilterGroup filterGroup; + public void setFilterGroup(ExclusionCaptureFilterGroup filterGroup) { + this.filterGroup = filterGroup; + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/ExclusionFilter.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |