From: <bra...@us...> - 2008-07-01 23:25:46
|
Revision: 2357 http://archive-access.svn.sourceforge.net/archive-access/?rev=2357&view=rev Author: bradtofel Date: 2008-07-01 16:25:55 -0700 (Tue, 01 Jul 2008) Log Message: ----------- REFACTOR: SearchResult => (Url|Capture)SearchResult Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilterFactory.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilter.java 2008-07-01 23:25:27 UTC (rev 2356) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilter.java 2008-07-01 23:25:55 UTC (rev 2357) @@ -35,15 +35,14 @@ import java.util.regex.Pattern; import org.archive.util.ArchiveUtils; -import org.archive.wayback.WaybackConstants; import org.archive.wayback.core.Resource; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.exception.LiveDocumentNotAvailableException; import org.archive.wayback.liveweb.LiveWebCache; import org.archive.wayback.util.ObjectFilter; /** - * SearchResultFilter that uses a LiveWebCache to retrieve robots.txt documents + * CaptureSearchResult Filter that uses a LiveWebCache to retrieve robots.txt documents * from the live web, and filters SearchResults based on the rules therein. * * This class caches parsed RobotRules that are retrieved, so using the same @@ -56,7 +55,7 @@ * @author brad * @version $Date$, $Revision$ */ -public class RobotExclusionFilter implements ObjectFilter<SearchResult> { +public class RobotExclusionFilter implements ObjectFilter<CaptureSearchResult> { private final static String HTTP_PREFIX = "http://"; private final static String ROBOT_SUFFIX = "/robots.txt"; @@ -127,10 +126,10 @@ return list; } - private RobotRules getRules(SearchResult result) { + private RobotRules getRules(CaptureSearchResult result) { RobotRules rules = null; RobotRules tmpRules = null; - String host = result.get(WaybackConstants.RESULT_ORIG_HOST); + String host = result.getOriginalHost(); List<String> urlStrings = searchResultToRobotUrlStrings(host); Iterator<String> itr = urlStrings.iterator(); String firstUrlString = null; @@ -174,12 +173,12 @@ /* (non-Javadoc) * @see org.archive.wayback.resourceindex.SearchResultFilter#filterSearchResult(org.archive.wayback.core.SearchResult) */ - public int filterObject(SearchResult r) { + public int filterObject(CaptureSearchResult r) { int filterResult = ObjectFilter.FILTER_EXCLUDE; RobotRules rules = getRules(r); if(rules != null) { - String resultURL = r.get(WaybackConstants.RESULT_URL); + String resultURL = r.getOriginalUrl(); URL url; try { url = new URL(ArchiveUtils.addImpliedHttpIfNecessary(resultURL)); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilterFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilterFactory.java 2008-07-01 23:25:27 UTC (rev 2356) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilterFactory.java 2008-07-01 23:25:55 UTC (rev 2357) @@ -25,7 +25,7 @@ package org.archive.wayback.accesscontrol.robotstxt; import org.archive.wayback.accesscontrol.ExclusionFilterFactory; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.liveweb.LiveWebCache; import org.archive.wayback.util.ObjectFilter; @@ -44,7 +44,7 @@ /* (non-Javadoc) * @see org.archive.wayback.resourceindex.ExclusionFilterFactory#get() */ - public ObjectFilter<SearchResult> get() { + public ObjectFilter<CaptureSearchResult> get() { return new RobotExclusionFilter(webCache,userAgent,maxCacheMS); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |