Revision: 3390 http://archive-access.svn.sourceforge.net/archive-access/?rev=3390&view=rev Author: bradtofel Date: 2011-02-06 14:33:44 +0000 (Sun, 06 Feb 2011) Log Message: ----------- Now eating StringOutOfBoundsException in UURIFactory Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/AggressiveUrlCanonicalizer.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/AggressiveUrlCanonicalizer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/AggressiveUrlCanonicalizer.java 2011-02-06 14:32:33 UTC (rev 3389) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/AggressiveUrlCanonicalizer.java 2011-02-06 14:33:44 UTC (rev 3390) @@ -23,6 +23,7 @@ import java.io.IOException; import java.io.InputStreamReader; import java.util.ArrayList; +import java.util.logging.Logger; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -41,6 +42,9 @@ */ public class AggressiveUrlCanonicalizer implements UrlCanonicalizer { + private static final Logger LOGGER = Logger.getLogger( + AggressiveUrlCanonicalizer.class.getName()); + private static final String CDX_PREFIX = " CDX "; /** * Strip leading 'www.' @@ -213,7 +217,16 @@ // as building UURIs is *not* a cheap operation. // unescape anything that can be: - UURI tmpURI = UURIFactory.getInstance(searchUrl); + UURI tmpURI = null; + try { + tmpURI = UURIFactory.getInstance(searchUrl); + } catch (StringIndexOutOfBoundsException e) { + LOGGER.warning(e.getMessage() + ": " + searchUrl); + return searchUrl; +// } catch(URIException e) { +// LOGGER.warning(e.getMessage() + ": " + searchUrl); +// return searchUrl; + } tmpURI.setPath(tmpURI.getPath()); // convert to UURI to perform required URI fixup: This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |