|
From: <bra...@us...> - 2010-03-20 01:05:45
|
Revision: 2988
http://archive-access.svn.sourceforge.net/archive-access/?rev=2988&view=rev
Author: bradtofel
Date: 2010-03-20 01:05:39 +0000 (Sat, 20 Mar 2010)
Log Message:
-----------
BUGFIX(unreported): replaced URL to host processing with REGEX, to better handle URLs with freakish illegal characters before the port/path start.
Modified Paths:
--------------
trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/UrlOperations.java
Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/UrlOperations.java
===================================================================
--- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/UrlOperations.java 2010-03-20 01:02:49 UTC (rev 2987)
+++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/UrlOperations.java 2010-03-20 01:05:39 UTC (rev 2988)
@@ -92,6 +92,9 @@
Pattern.compile("(([0-9a-z_.-]+)\\.(" + ALL_TLD_PATTERN + "))|" +
"(" + IP_PATTERN + ")");
+ private static final Pattern AUTHORITY_REGEX_SIMPLE =
+ Pattern.compile("([0-9a-z_.-]++)");
+
/**
* @param urlPart
* @return boolean indicating whether urlPart might be an Authority.
@@ -186,22 +189,11 @@
for(String scheme : ALL_SCHEMES) {
if(url.startsWith(scheme)) {
int hostIdx = scheme.length();
- int portIdx = url.indexOf(PORT_SEPARATOR, hostIdx + 1);
- int pathIdx = url.indexOf(PATH_START, hostIdx + 1);
- if(portIdx == -1 && pathIdx == -1) {
- return url.substring(hostIdx);
+
+ Matcher m = AUTHORITY_REGEX_SIMPLE.matcher(url.substring(hostIdx));
+ if(m.find()) {
+ return m.group(0);
}
- if(portIdx == -1) {
- return url.substring(hostIdx,pathIdx);
- }
- if(pathIdx == -1) {
- return url.substring(hostIdx,portIdx);
- }
- if(pathIdx > portIdx) {
- return url.substring(hostIdx,portIdx);
- } else {
- return url.substring(hostIdx,pathIdx);
- }
}
}
return url;
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|