From: <bra...@us...> - 2010-03-20 01:05:45
|
Revision: 2988 http://archive-access.svn.sourceforge.net/archive-access/?rev=2988&view=rev Author: bradtofel Date: 2010-03-20 01:05:39 +0000 (Sat, 20 Mar 2010) Log Message: ----------- BUGFIX(unreported): replaced URL to host processing with REGEX, to better handle URLs with freakish illegal characters before the port/path start. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/UrlOperations.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/UrlOperations.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/UrlOperations.java 2010-03-20 01:02:49 UTC (rev 2987) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/UrlOperations.java 2010-03-20 01:05:39 UTC (rev 2988) @@ -92,6 +92,9 @@ Pattern.compile("(([0-9a-z_.-]+)\\.(" + ALL_TLD_PATTERN + "))|" + "(" + IP_PATTERN + ")"); + private static final Pattern AUTHORITY_REGEX_SIMPLE = + Pattern.compile("([0-9a-z_.-]++)"); + /** * @param urlPart * @return boolean indicating whether urlPart might be an Authority. @@ -186,22 +189,11 @@ for(String scheme : ALL_SCHEMES) { if(url.startsWith(scheme)) { int hostIdx = scheme.length(); - int portIdx = url.indexOf(PORT_SEPARATOR, hostIdx + 1); - int pathIdx = url.indexOf(PATH_START, hostIdx + 1); - if(portIdx == -1 && pathIdx == -1) { - return url.substring(hostIdx); + + Matcher m = AUTHORITY_REGEX_SIMPLE.matcher(url.substring(hostIdx)); + if(m.find()) { + return m.group(0); } - if(portIdx == -1) { - return url.substring(hostIdx,pathIdx); - } - if(pathIdx == -1) { - return url.substring(hostIdx,portIdx); - } - if(pathIdx > portIdx) { - return url.substring(hostIdx,portIdx); - } else { - return url.substring(hostIdx,pathIdx); - } } } return url; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |