From: <bra...@us...> - 2008-07-01 23:29:23
|
Revision: 2361 http://archive-access.svn.sourceforge.net/archive-access/?rev=2361&view=rev Author: bradtofel Date: 2008-07-01 16:29:31 -0700 (Tue, 01 Jul 2008) Log Message: ----------- FEATURE: added urlToHost() Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/UrlOperations.java trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/url/UrlOperationsTest.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/UrlOperations.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/UrlOperations.java 2008-07-01 23:27:49 UTC (rev 2360) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/UrlOperations.java 2008-07-01 23:29:31 UTC (rev 2361) @@ -16,6 +16,29 @@ */ public class UrlOperations { + public final static String DNS_SCHEME = "dns:"; + public final static String HTTP_SCHEME = "http://"; + public final static String HTTPS_SCHEME = "https://"; + public final static String FTP_SCHEME = "ftp://"; + public final static String MMS_SCHEME = "mms://"; + public final static String RTSP_SCHEME = "rtsp://"; + // go brewster + public final static String WAIS_SCHEME = "wais://"; + + public final static String ALL_SCHEMES[] = { + HTTP_SCHEME, + HTTPS_SCHEME, + FTP_SCHEME, + MMS_SCHEME, + RTSP_SCHEME, + WAIS_SCHEME + }; + + + public final static char PORT_SEPARATOR = ':'; + public final static char PATH_START = '/'; + + private static final String CC_TLDS = "ac|ad|ae|af|ag|ai|al|am|an|ao|aq" + "|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs" + "|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cu|cv|cx" + @@ -73,4 +96,32 @@ } return resolvedURI.getEscapedURI(); } + + public static String urlToHost(String url) { + if(url.startsWith("dns:")) { + return url.substring(4); + } + for(String scheme : ALL_SCHEMES) { + if(url.startsWith(scheme)) { + int hostIdx = scheme.length(); + int portIdx = url.indexOf(PORT_SEPARATOR, hostIdx + 1); + int pathIdx = url.indexOf(PATH_START, hostIdx + 1); + if(portIdx == -1 && pathIdx == -1) { + return url.substring(hostIdx); + } + if(portIdx == -1) { + return url.substring(hostIdx,pathIdx); + } + if(pathIdx == -1) { + return url.substring(hostIdx,portIdx); + } + if(pathIdx > portIdx) { + return url.substring(hostIdx,portIdx); + } else { + return url.substring(hostIdx,pathIdx); + } + } + } + return url; + } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/url/UrlOperationsTest.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/url/UrlOperationsTest.java 2008-07-01 23:27:49 UTC (rev 2360) +++ trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/url/UrlOperationsTest.java 2008-07-01 23:29:31 UTC (rev 2361) @@ -27,5 +27,42 @@ } else { assertTrue("String("+s+") is not an Authority",want == got); } - } + } + public void testUrlToHost() { + assertEquals("foo.com",UrlOperations.urlToHost("dns:foo.com")); + + assertEquals("foo.com",UrlOperations.urlToHost("http://foo.com")); + assertEquals("foo.com",UrlOperations.urlToHost("https://foo.com")); + assertEquals("foo.com",UrlOperations.urlToHost("ftp://foo.com")); + + assertEquals("foo.com",UrlOperations.urlToHost("http://foo.com/")); + assertEquals("foo.com",UrlOperations.urlToHost("https://foo.com/")); + assertEquals("foo.com",UrlOperations.urlToHost("ftp://foo.com/")); + + assertEquals("foo.com",UrlOperations.urlToHost("http://foo.com:120/")); + assertEquals("foo.com",UrlOperations.urlToHost("https://foo.com:180/")); + assertEquals("foo.com",UrlOperations.urlToHost("ftp://foo.com:190/")); + + assertEquals("foo.com",UrlOperations.urlToHost("http://foo.com:120")); + assertEquals("foo.com",UrlOperations.urlToHost("https://foo.com:180")); + assertEquals("foo.com",UrlOperations.urlToHost("ftp://foo.com:190")); + + assertEquals("foo.com",UrlOperations.urlToHost("http://foo.com:120/path")); + assertEquals("foo.com",UrlOperations.urlToHost("https://foo.com:180/path")); + assertEquals("foo.com",UrlOperations.urlToHost("ftp://foo.com:190/path")); + + assertEquals("foo.com",UrlOperations.urlToHost("http://foo.com:120/path/")); + assertEquals("foo.com",UrlOperations.urlToHost("https://foo.com:180/path/")); + assertEquals("foo.com",UrlOperations.urlToHost("ftp://foo.com:190/path/")); + + assertEquals("foo.com",UrlOperations.urlToHost("http://foo.com:120/path:/")); + assertEquals("foo.com",UrlOperations.urlToHost("https://foo.com:180/path:/")); + assertEquals("foo.com",UrlOperations.urlToHost("ftp://foo.com:190/path:/")); + + assertEquals("foo.com",UrlOperations.urlToHost("http://foo.com/path:/")); + assertEquals("foo.com",UrlOperations.urlToHost("https://foo.com/path:/")); + assertEquals("foo.com",UrlOperations.urlToHost("ftp://foo.com/path:/")); + + + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |