From: <bra...@us...> - 2011-02-06 14:54:09
|
Revision: 3409 http://archive-access.svn.sourceforge.net/archive-access/?rev=3409&view=rev Author: bradtofel Date: 2011-02-06 14:54:02 +0000 (Sun, 06 Feb 2011) Log Message: ----------- BUGFIX: was not canonicalizing URLs prior to lookup.. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilterFactory.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilter.java 2011-02-06 14:52:48 UTC (rev 3408) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilter.java 2011-02-06 14:54:02 UTC (rev 3409) @@ -23,10 +23,12 @@ import java.util.logging.Logger; import org.apache.commons.httpclient.URIException; +import org.archive.wayback.UrlCanonicalizer; import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.resourceindex.filters.ExclusionFilter; import org.archive.wayback.surt.SURTTokenizer; import org.archive.wayback.util.ObjectFilter; +import org.archive.wayback.util.url.AggressiveUrlCanonicalizer; /** * @@ -43,11 +45,13 @@ private boolean notifiedSeen = false; private boolean notifiedPassed = false; Map<String,Object> exclusionMap = null; + UrlCanonicalizer canonicalizer = new AggressiveUrlCanonicalizer(); /** * @param map where each String key is a SURT that is blocked. */ - public StaticMapExclusionFilter(Map<String,Object> map) { + public StaticMapExclusionFilter(Map<String,Object> map, UrlCanonicalizer canonicalizer) { exclusionMap = map; + this.canonicalizer = canonicalizer; } protected boolean isExcluded(String url) { @@ -80,7 +84,14 @@ } notifiedSeen = true; } - String url = r.getOriginalUrl(); + String url; + try { + url = canonicalizer.urlStringToKey(r.getOriginalUrl()); + } catch (URIException e) { + + //e.printStackTrace(); + return FILTER_EXCLUDE; + } if(lastChecked != null) { if(lastChecked.equals(url)) { if(lastCheckedExcluded) { Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilterFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilterFactory.java 2011-02-06 14:52:48 UTC (rev 3408) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilterFactory.java 2011-02-06 14:54:02 UTC (rev 3409) @@ -25,11 +25,13 @@ import java.util.Map; import java.util.logging.Logger; +import org.archive.wayback.UrlCanonicalizer; import org.archive.wayback.accesscontrol.ExclusionFilterFactory; import org.archive.wayback.resourceindex.filters.ExclusionFilter; import org.archive.wayback.surt.SURTTokenizer; import org.archive.wayback.util.CloseableIterator; import org.archive.wayback.util.flatfile.FlatFile; +import org.archive.wayback.util.url.AggressiveUrlCanonicalizer; /** * @@ -45,6 +47,8 @@ private Map<String,Object> currentMap = null; private File file = null; long lastUpdated = 0; + UrlCanonicalizer canonicalizer = new AggressiveUrlCanonicalizer(); + /** * Thread object of update thread -- also is flag indicating if the thread * has already been started -- static, and access to it is synchronized. @@ -93,6 +97,7 @@ if(line.length() == 0) { continue; } + line = canonicalizer.urlStringToKey(line); String surt = line.startsWith("(") ? line : SURTTokenizer.prefixKey(line); LOGGER.fine("EXCLUSION-MAP: adding " + surt); @@ -110,7 +115,7 @@ if(currentMap == null) { return null; } - return new StaticMapExclusionFilter(currentMap); + return new StaticMapExclusionFilter(currentMap, canonicalizer); } private synchronized void startUpdateThread() { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |