Revision: 3453 http://archive-access.svn.sourceforge.net/archive-access/?rev=3453&view=rev Author: bradtofel Date: 2011-05-25 01:40:30 +0000 (Wed, 25 May 2011) Log Message: ----------- OPTIMIZ: now uses UrlOperations.getUrlPath() instead of constructing a URL object when determining if URLs are /robots.txt Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilter.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilter.java 2011-05-25 01:37:48 UTC (rev 3452) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilter.java 2011-05-25 01:40:30 UTC (rev 3453) @@ -39,6 +39,7 @@ import org.archive.wayback.liveweb.LiveWebCache; import org.archive.wayback.resourceindex.filters.ExclusionFilter; import org.archive.wayback.util.ObjectFilter; +import org.archive.wayback.util.url.UrlOperations; /** * CaptureSearchResult Filter that uses a LiveWebCache to retrieve robots.txt @@ -230,6 +231,17 @@ } notifiedSeen = true; } + String resultURL = r.getOriginalUrl(); + String path = UrlOperations.getURLPath(resultURL); + if(path.equals(ROBOT_SUFFIX)) { + if(!notifiedPassed) { + if(filterGroup != null) { + filterGroup.setPassedRobots(); + } + notifiedPassed = true; + } + return ObjectFilter.FILTER_INCLUDE; + } int filterResult = ObjectFilter.FILTER_EXCLUDE; RobotRules rules = getRules(r); if(rules == null) { @@ -237,26 +249,17 @@ return ObjectFilter.FILTER_ABORT; } } else { - String resultURL = r.getOriginalUrl(); - URL url; - try { - url = new URL(ArchiveUtils.addImpliedHttpIfNecessary(resultURL)); - String path = url.getPath(); - if(path.equals(ROBOT_SUFFIX) || - !rules.blocksPathForUA(path, userAgent)) { - if(!notifiedPassed) { - if(filterGroup != null) { - filterGroup.setPassedRobots(); - } - notifiedPassed = true; + if(!rules.blocksPathForUA(path, userAgent)) { + if(!notifiedPassed) { + if(filterGroup != null) { + filterGroup.setPassedRobots(); } - filterResult = ObjectFilter.FILTER_INCLUDE; - LOGGER.fine("ROBOT: ALLOWED("+resultURL+")"); - } else { - LOGGER.info("ROBOT: BLOCKED("+resultURL+")"); + notifiedPassed = true; } - } catch (MalformedURLException e) { - e.printStackTrace(); + filterResult = ObjectFilter.FILTER_INCLUDE; + LOGGER.fine("ROBOT: ALLOWED("+resultURL+")"); + } else { + LOGGER.info("ROBOT: BLOCKED("+resultURL+")"); } } return filterResult; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |