From: <vin...@us...> - 2012-01-12 22:21:21
|
Revision: 3595 http://archive-access.svn.sourceforge.net/archive-access/?rev=3595&view=rev Author: vinaygoel Date: 2012-01-12 22:21:14 +0000 (Thu, 12 Jan 2012) Log Message: ----------- BUGFIX: Fixed CSS bug with URL too short (String out of bounds exception) in ExtractingParseObserver.java. Added NumberFormatException handling in GoogleURLCanonicalizer.java Modified Paths: -------------- trunk/archive-access/projects/archive-commons/src/main/java/org/archive/resource/html/ExtractingParseObserver.java trunk/archive-access/projects/archive-commons/src/main/java/org/archive/url/GoogleURLCanonicalizer.java Modified: trunk/archive-access/projects/archive-commons/src/main/java/org/archive/resource/html/ExtractingParseObserver.java =================================================================== --- trunk/archive-access/projects/archive-commons/src/main/java/org/archive/resource/html/ExtractingParseObserver.java 2012-01-12 04:57:09 UTC (rev 3594) +++ trunk/archive-access/projects/archive-commons/src/main/java/org/archive/resource/html/ExtractingParseObserver.java 2012-01-12 22:21:14 UTC (rev 3595) @@ -394,6 +394,8 @@ url = url.substring(1, origUrlLength - 1); urlStart += 1; } else if (url.charAt(0) == '\\') { + if(origUrlLength == 2) + continue; url = url.substring(2, origUrlLength - 2); urlStart += 2; } Modified: trunk/archive-access/projects/archive-commons/src/main/java/org/archive/url/GoogleURLCanonicalizer.java =================================================================== --- trunk/archive-access/projects/archive-commons/src/main/java/org/archive/url/GoogleURLCanonicalizer.java 2012-01-12 04:57:09 UTC (rev 3594) +++ trunk/archive-access/projects/archive-commons/src/main/java/org/archive/url/GoogleURLCanonicalizer.java 2012-01-12 22:21:14 UTC (rev 3595) @@ -152,8 +152,12 @@ if(m2Group == null) return null; //int octet = Integer.parseInt(m2.group(i+1).substring((i==0)?0:1)); - int octet = Integer.parseInt(m2Group.substring((i==0)?0:1)); - + int octet; + try { + octet = Integer.parseInt(m2Group.substring((i==0)?0:1)); + } catch (NumberFormatException e){ + return null; + } if((octet < 0) || (octet > 255)) { return null; // throw new URIException("Bad Host("+host+")"); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |