Revision: 2709 http://archive-access.svn.sourceforge.net/archive-access/?rev=2709&view=rev Author: bradtofel Date: 2009-05-20 01:40:58 +0000 (Wed, 20 May 2009) Log Message: ----------- TWEAK: removed commented code Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/AggressiveUrlCanonicalizer.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/AggressiveUrlCanonicalizer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/AggressiveUrlCanonicalizer.java 2009-05-20 00:44:04 UTC (rev 2708) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/AggressiveUrlCanonicalizer.java 2009-05-20 01:40:58 UTC (rev 2709) @@ -219,19 +219,6 @@ searchUrl = scheme + searchUrl; } - // TODO: this will only work with http:// scheme. should work with all? - // force add of scheme and possible add '/' with empty path: -// if (searchUrl.startsWith("http://")) { -// if (-1 == searchUrl.indexOf('/', 8)) { -// searchUrl = searchUrl + "/"; -// } -// } else { -// if (-1 == searchUrl.indexOf("/")) { -// searchUrl = searchUrl + "/"; -// } -// searchUrl = "http://" + searchUrl; -// } - // TODO: These next few lines look crazy -- need to be reworked.. This // was the only easy way I could find to get the correct unescaping // out of UURIs, possible a bug. Definitely needs some TLC in any case, This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 3067 http://archive-access.svn.sourceforge.net/archive-access/?rev=3067&view=rev Author: bradtofel Date: 2010-04-27 20:51:27 +0000 (Tue, 27 Apr 2010) Log Message: ----------- JAVADOC Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/AggressiveUrlCanonicalizer.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/AggressiveUrlCanonicalizer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/AggressiveUrlCanonicalizer.java 2010-04-24 01:07:50 UTC (rev 3066) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/AggressiveUrlCanonicalizer.java 2010-04-27 20:51:27 UTC (rev 3067) @@ -193,13 +193,6 @@ return false; } - /** - * return the canonical string key for the URL argument. - * - * @param urlString - * @return String lookup key for URL argument. - * @throws URIException - */ public String urlStringToKey(final String urlString) throws URIException { if(urlString.startsWith("dns:")) { @@ -323,7 +316,7 @@ System.exit(3); } /** - * @param args + * @param args program arguments */ public static void main(String[] args) { AggressiveUrlCanonicalizer canonicalizer = new AggressiveUrlCanonicalizer(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 3390 http://archive-access.svn.sourceforge.net/archive-access/?rev=3390&view=rev Author: bradtofel Date: 2011-02-06 14:33:44 +0000 (Sun, 06 Feb 2011) Log Message: ----------- Now eating StringOutOfBoundsException in UURIFactory Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/AggressiveUrlCanonicalizer.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/AggressiveUrlCanonicalizer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/AggressiveUrlCanonicalizer.java 2011-02-06 14:32:33 UTC (rev 3389) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/AggressiveUrlCanonicalizer.java 2011-02-06 14:33:44 UTC (rev 3390) @@ -23,6 +23,7 @@ import java.io.IOException; import java.io.InputStreamReader; import java.util.ArrayList; +import java.util.logging.Logger; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -41,6 +42,9 @@ */ public class AggressiveUrlCanonicalizer implements UrlCanonicalizer { + private static final Logger LOGGER = Logger.getLogger( + AggressiveUrlCanonicalizer.class.getName()); + private static final String CDX_PREFIX = " CDX "; /** * Strip leading 'www.' @@ -213,7 +217,16 @@ // as building UURIs is *not* a cheap operation. // unescape anything that can be: - UURI tmpURI = UURIFactory.getInstance(searchUrl); + UURI tmpURI = null; + try { + tmpURI = UURIFactory.getInstance(searchUrl); + } catch (StringIndexOutOfBoundsException e) { + LOGGER.warning(e.getMessage() + ": " + searchUrl); + return searchUrl; +// } catch(URIException e) { +// LOGGER.warning(e.getMessage() + ": " + searchUrl); +// return searchUrl; + } tmpURI.setPath(tmpURI.getPath()); // convert to UURI to perform required URI fixup: This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |