From: <bra...@us...> - 2007-09-18 23:46:10
|
Revision: 1996 http://archive-access.svn.sourceforge.net/archive-access/?rev=1996&view=rev Author: bradtofel Date: 2007-09-18 16:46:12 -0700 (Tue, 18 Sep 2007) Log Message: ----------- REFACTOR: moved exclusion mechanism from resourceIndex to WaybackContext, which is clearer conceptually, and will allow a common resource index to be shared across multiple WaybackContexts which may have different exclusion policies. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/CompositeExclusionFilterFactory.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/ExclusionFilterFactory.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/ExternalExcluder.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/remote/RemoteExclusionFilterFactory.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilterFactory.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilterFactory.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/WaybackRequest.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/WaybackContext.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/CompositeExclusionFilterFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/CompositeExclusionFilterFactory.java 2007-09-18 23:44:09 UTC (rev 1995) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/CompositeExclusionFilterFactory.java 2007-09-18 23:46:12 UTC (rev 1996) @@ -28,7 +28,6 @@ import java.util.Iterator; import org.archive.wayback.core.SearchResult; -import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.resourceindex.filters.CompositeExclusionFilter; import org.archive.wayback.util.ObjectFilter; @@ -55,11 +54,11 @@ /* (non-Javadoc) * @see org.archive.wayback.resourceindex.ExclusionFilterFactory#get() */ - public ObjectFilter<SearchResult> get(WaybackRequest wbRequest) { + public ObjectFilter<SearchResult> get() { Iterator<ExclusionFilterFactory> itr = factories.iterator(); CompositeExclusionFilter filter = new CompositeExclusionFilter(); while(itr.hasNext()) { - filter.addComponent(itr.next().get(wbRequest)); + filter.addComponent(itr.next().get()); } return filter; } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/ExclusionFilterFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/ExclusionFilterFactory.java 2007-09-18 23:44:09 UTC (rev 1995) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/ExclusionFilterFactory.java 2007-09-18 23:46:12 UTC (rev 1996) @@ -25,7 +25,6 @@ package org.archive.wayback.accesscontrol; import org.archive.wayback.core.SearchResult; -import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.util.ObjectFilter; /** * @@ -35,11 +34,10 @@ */ public interface ExclusionFilterFactory { /** - * @param wbRequest * @return an ObjectFilter object that filters records based on - * some set of exclusion rules appropriate to the wbRequest. + * some set of exclusion rules */ - public ObjectFilter<SearchResult> get(WaybackRequest wbRequest); + public ObjectFilter<SearchResult> get(); /** * close any resources used by this ExclusionFilter system. */ Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/ExternalExcluder.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/ExternalExcluder.java 2007-09-18 23:44:09 UTC (rev 1995) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/ExternalExcluder.java 2007-09-18 23:46:12 UTC (rev 1996) @@ -28,7 +28,6 @@ import org.archive.net.LaxURI; import org.archive.wayback.WaybackConstants; import org.archive.wayback.core.SearchResult; -import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.util.ObjectFilter; import org.springframework.beans.factory.xml.XmlBeanFactory; import org.springframework.core.io.FileSystemResource; @@ -94,8 +93,7 @@ * at configPath */ public static ExternalExcluder getExcluder(String configPath) { - WaybackRequest wbRequest = null; - return new ExternalExcluder(getFactory(configPath).get(wbRequest)); + return new ExternalExcluder(getFactory(configPath).get()); } /** * shutdown underlying resources. Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/remote/RemoteExclusionFilterFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/remote/RemoteExclusionFilterFactory.java 2007-09-18 23:44:09 UTC (rev 1995) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/remote/RemoteExclusionFilterFactory.java 2007-09-18 23:46:12 UTC (rev 1996) @@ -26,7 +26,6 @@ import org.archive.wayback.accesscontrol.ExclusionFilterFactory; import org.archive.wayback.core.SearchResult; -import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.util.ObjectFilter; /** @@ -44,7 +43,7 @@ /* (non-Javadoc) * @see org.archive.wayback.resourceindex.ExclusionFilterFactory#get() */ - public ObjectFilter<SearchResult> get(WaybackRequest wbRequest) { + public ObjectFilter<SearchResult> get() { return new RemoteExclusionFilter(exclusionUrlPrefix, exclusionUserAgent); } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilterFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilterFactory.java 2007-09-18 23:44:09 UTC (rev 1995) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilterFactory.java 2007-09-18 23:46:12 UTC (rev 1996) @@ -26,7 +26,6 @@ import org.archive.wayback.accesscontrol.ExclusionFilterFactory; import org.archive.wayback.core.SearchResult; -import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.liveweb.LiveWebCache; import org.archive.wayback.util.ObjectFilter; @@ -45,7 +44,7 @@ /* (non-Javadoc) * @see org.archive.wayback.resourceindex.ExclusionFilterFactory#get() */ - public ObjectFilter<SearchResult> get(WaybackRequest wbRequest) { + public ObjectFilter<SearchResult> get() { return new RobotExclusionFilter(webCache,userAgent,maxCacheMS); } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilter.java 2007-09-18 23:44:09 UTC (rev 1995) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilter.java 2007-09-18 23:46:12 UTC (rev 1996) @@ -58,7 +58,9 @@ if(nextSearch == null) { break; } + System.err.println("EXCLUSION-MAP:Checking " + nextSearch); if(exclusionMap.containsKey(nextSearch)) { + System.err.println("EXCLUSION-MAP: EXCLUDED: \"" + nextSearch + "\" (" + url +")"); return true; } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilterFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilterFactory.java 2007-09-18 23:44:09 UTC (rev 1995) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilterFactory.java 2007-09-18 23:46:12 UTC (rev 1996) @@ -32,7 +32,6 @@ import org.archive.wayback.accesscontrol.ExclusionFilterFactory; import org.archive.wayback.core.SearchResult; -import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.surt.SURTTokenizer; import org.archive.wayback.util.CloseableIterator; import org.archive.wayback.util.ObjectFilter; @@ -48,7 +47,7 @@ private static final Logger LOGGER = Logger.getLogger(StaticMapExclusionFilterFactory.class.getName()); - private int checkInterval = 10; + private int checkInterval = 0; private Map<String,Object> currentMap = null; private File file = null; long lastUpdated = 0; @@ -64,7 +63,9 @@ */ public void init() throws IOException { reloadFile(); - startUpdateThread(); + if(checkInterval > 0) { + startUpdateThread(); + } } protected void reloadFile() throws IOException { @@ -95,6 +96,7 @@ } String surt = line.startsWith("(") ? line : SURTTokenizer.prefixKey(line); + System.err.println("EXCLUSION-MAP: adding " + surt); newMap.put(surt, null); } itr.close(); @@ -105,7 +107,7 @@ * @param wbRequest * @return SearchResultFilter */ - public ObjectFilter<SearchResult> get(WaybackRequest wbRequest) { + public ObjectFilter<SearchResult> get() { if(currentMap == null) { return null; } @@ -166,14 +168,14 @@ } /** - * @return the checkInterval + * @return the checkInterval in seconds */ public int getCheckInterval() { return checkInterval; } /** - * @param checkInterval the checkInterval to set + * @param checkInterval the checkInterval in seconds to set */ public void setCheckInterval(int checkInterval) { this.checkInterval = checkInterval; @@ -197,7 +199,6 @@ * @see org.archive.wayback.accesscontrol.ExclusionFilterFactory#shutdown() */ public void shutdown() { - // TODO Auto-generated method stub stopUpdateThread(); } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/WaybackRequest.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/WaybackRequest.java 2007-09-18 23:44:09 UTC (rev 1995) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/WaybackRequest.java 2007-09-18 23:46:12 UTC (rev 1996) @@ -37,6 +37,7 @@ import org.archive.net.UURIFactory; import org.archive.wayback.WaybackConstants; import org.archive.wayback.requestparser.OpenSearchRequestParser; +import org.archive.wayback.util.ObjectFilter; import org.archive.wayback.util.StringFormatter; import org.archive.wayback.webapp.WaybackContext; @@ -56,6 +57,7 @@ private String contextPrefix = null; private String serverPrefix = null; private WaybackContext context = null; + private ObjectFilter<SearchResult> exclusionFilter = null; private HashMap<String,String> filters = new HashMap<String,String>(); @@ -391,4 +393,12 @@ public void setContext(WaybackContext context) { this.context = context; } + + public ObjectFilter<SearchResult> getExclusionFilter() { + return exclusionFilter; + } + + public void setExclusionFilter(ObjectFilter<SearchResult> exclusionFilter) { + this.exclusionFilter = exclusionFilter; + } } \ No newline at end of file Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java 2007-09-18 23:44:09 UTC (rev 1995) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java 2007-09-18 23:46:12 UTC (rev 1996) @@ -43,7 +43,6 @@ import org.archive.wayback.resourceindex.filters.UrlPrefixMatchFilter; import org.archive.wayback.resourceindex.filters.WindowEndFilter; import org.archive.wayback.resourceindex.filters.WindowStartFilter; -import org.archive.wayback.accesscontrol.ExclusionFilterFactory; import org.archive.wayback.core.CaptureSearchResults; import org.archive.wayback.core.SearchResult; import org.archive.wayback.core.SearchResults; @@ -76,24 +75,8 @@ protected SearchResultSource source; - private ExclusionFilterFactory exclusionFactory = null; - private UrlCanonicalizer canonicalizer = new UrlCanonicalizer(); - private ObjectFilter<SearchResult> getExclusionFilter( - WaybackRequest wbRequest) - throws ResourceIndexNotAvailableException { - ObjectFilter<SearchResult> filter = null; - if(exclusionFactory != null) { - filter = exclusionFactory.get(wbRequest); - if(filter == null) { - throw new ResourceIndexNotAvailableException("Exclusion " + - "Service Unavailable"); - } - } - return filter; - } - private void filterRecords(CloseableIterator<SearchResult> itr, ObjectFilter<SearchResult> filter, SearchResults results, boolean forwards) throws IOException { @@ -211,7 +194,7 @@ GuardRailFilter guardrail = new GuardRailFilter(maxRecords); // checks an exclusion service for every matching record - ObjectFilter<SearchResult> exclusion = getExclusionFilter(wbRequest); + ObjectFilter<SearchResult> exclusion = wbRequest.getExclusionFilter(); // count how many results got to the ExclusionFilter: CounterFilter preExCounter = new CounterFilter(); @@ -267,18 +250,15 @@ reverseFilters.addFilter(selfRedirectFilter); // possibly filter via exclusions: - if(exclusion == null) { - forwardFilters.addFilter(finalCounter); - reverseFilters.addFilter(finalCounter); - } else { + if(exclusion != null) { forwardFilters.addFilter(preExCounter); forwardFilters.addFilter(exclusion); - forwardFilters.addFilter(finalCounter); reverseFilters.addFilter(preExCounter); reverseFilters.addFilter(exclusion); - reverseFilters.addFilter(finalCounter); } + forwardFilters.addFilter(finalCounter); + reverseFilters.addFilter(finalCounter); int resultsPerDirection = (int) Math.floor(resultsPerPage / 2); if (resultsPerDirection * 2 == resultsPerPage) { @@ -318,13 +298,11 @@ } filters.addFilter(new EndDateFilter(endDate)); // possibly filter via exclusions: - if (exclusion == null) { - filters.addFilter(finalCounter); - } else { + if (exclusion != null) { filters.addFilter(preExCounter); filters.addFilter(exclusion); - filters.addFilter(finalCounter); } + filters.addFilter(finalCounter); startKey = keyUrl + " " + startDate; // add the start and end windowing filters: @@ -355,13 +333,12 @@ // possibly filter via exclusions: if (exclusion == null) { filters.addFilter(new CaptureToUrlResultFilter()); - filters.addFilter(finalCounter); } else { filters.addFilter(preExCounter); filters.addFilter(exclusion); filters.addFilter(new CaptureToUrlResultFilter()); - filters.addFilter(finalCounter); } + filters.addFilter(finalCounter); startKey = keyUrl; // add the start and end windowing filters: @@ -431,12 +408,4 @@ public void setSource(SearchResultSource source) { this.source = source; } - - /** - * @param exclusionFactory the exclusionFactory to set - */ - public void setExclusionFactory(ExclusionFilterFactory exclusionFactory) { - this.exclusionFactory = exclusionFactory; - } - } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/WaybackContext.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/WaybackContext.java 2007-09-18 23:44:09 UTC (rev 1995) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/WaybackContext.java 2007-09-18 23:46:12 UTC (rev 1996) @@ -39,20 +39,23 @@ import org.archive.wayback.ResourceStore; import org.archive.wayback.ResultURIConverter; import org.archive.wayback.WaybackConstants; +import org.archive.wayback.accesscontrol.ExclusionFilterFactory; import org.archive.wayback.core.CaptureSearchResults; import org.archive.wayback.core.Resource; import org.archive.wayback.core.SearchResult; import org.archive.wayback.core.SearchResults; import org.archive.wayback.core.UIResults; import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.exception.AuthenticationControlException; import org.archive.wayback.exception.BadQueryException; import org.archive.wayback.exception.ResourceNotAvailableException; import org.archive.wayback.exception.WaybackException; +import org.archive.wayback.util.operator.BooleanOperator; import org.springframework.beans.factory.BeanNameAware; /** * Retains all information about a particular Wayback configuration - * withing a ServletContext, including holding references to the + * within a ServletContext, including holding references to the * implementation instances of the primary Wayback classes: * * ResourceIndex @@ -75,6 +78,8 @@ private RequestParser parser = null; private ResultURIConverter uriConverter = null; private Properties configs = null; + private ExclusionFilterFactory exclusionFactory = null; + private BooleanOperator<WaybackRequest> authentication = null; /** * @@ -255,7 +260,15 @@ wbRequest.setContext(this); handled = true; wbRequest.setContextPrefix(getAbsoluteLocalPrefix(httpRequest)); - + if(authentication != null) { + if(!authentication.isTrue(wbRequest)) { + throw new AuthenticationControlException("Not authorized"); + } + } + + if(exclusionFactory != null) { + wbRequest.setExclusionFilter(exclusionFactory.get()); + } if(wbRequest.isReplayRequest()) { handleReplay(wbRequest,httpRequest,httpResponse); @@ -270,9 +283,10 @@ } catch (BadQueryException e) { query.renderException(httpRequest, httpResponse, wbRequest, e); + } catch (AuthenticationControlException e) { + query.renderException(httpRequest, httpResponse, wbRequest, e); } - return handled; } @@ -414,4 +428,20 @@ public void setUseServerName(boolean useServerName) { this.useServerName = useServerName; } + + public ExclusionFilterFactory getExclusionFactory() { + return exclusionFactory; + } + + public void setExclusionFactory(ExclusionFilterFactory exclusionFactory) { + this.exclusionFactory = exclusionFactory; + } + + public BooleanOperator<WaybackRequest> getAuthentication() { + return authentication; + } + + public void setAuthentication(BooleanOperator<WaybackRequest> authentication) { + this.authentication = authentication; + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |