From: <nl...@us...> - 2012-02-28 22:58:18
|
Revision: 3622 http://archive-access.svn.sourceforge.net/archive-access/?rev=3622&view=rev Author: nlevitt Date: 2012-02-28 22:58:11 +0000 (Tue, 28 Feb 2012) Log Message: ----------- BUGFIX: avoid NPE on urls with null host, e.g. dns:archive.org Modified Paths: -------------- trunk/archive-access/projects/archive-commons/src/main/java/org/archive/url/GoogleURLCanonicalizer.java Modified: trunk/archive-access/projects/archive-commons/src/main/java/org/archive/url/GoogleURLCanonicalizer.java =================================================================== --- trunk/archive-access/projects/archive-commons/src/main/java/org/archive/url/GoogleURLCanonicalizer.java 2012-02-28 04:09:45 UTC (rev 3621) +++ trunk/archive-access/projects/archive-commons/src/main/java/org/archive/url/GoogleURLCanonicalizer.java 2012-02-28 22:58:11 UTC (rev 3622) @@ -23,19 +23,22 @@ url.setQuery(minimalEscape(url.getQuery())); String hostE = unescapeRepeatedly(url.getHost()); String host = null; - try { - host = IDN.toASCII(hostE); - } catch(IllegalArgumentException e) { - if(!e.getMessage().contains("A prohibited code point was found")) { - // TODO: What to do??? -// throw e; + if (hostE != null) { + try { + host = IDN.toASCII(hostE); + } catch(IllegalArgumentException e) { + if(!e.getMessage().contains("A prohibited code point was found")) { + // TODO: What to do??? + // throw e; + } + host = hostE; + } - host = hostE; - - } - host = host.replaceAll("^\\.+", ""). + host = host.replaceAll("^\\.+", ""). replaceAll("\\.\\.+", "."). replaceAll("\\.$", ""); + } + String ip = null; // try { ip = attemptIPFormats(host); @@ -44,7 +47,7 @@ // } if(ip != null) { host = ip; - } else { + } else if (host != null) { host = escapeOnce(host.toLowerCase()); } url.setHost(host); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |