From: <bra...@us...> - 2010-08-10 21:24:53
|
Revision: 3224 http://archive-access.svn.sourceforge.net/archive-access/?rev=3224&view=rev Author: bradtofel Date: 2010-08-10 21:24:46 +0000 (Tue, 10 Aug 2010) Log Message: ----------- BUGFIX(ARI-2473): fixed parse bug extracting userinfo from an url. TWEAK: replaced "dns:" with reference to existing static variable Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/UrlOperations.java trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/url/AggressiveUrlCanonicalizerTest.java trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/url/UrlOperationsTest.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/UrlOperations.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/UrlOperations.java 2010-08-10 20:41:33 UTC (rev 3223) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/UrlOperations.java 2010-08-10 21:24:46 UTC (rev 3224) @@ -131,7 +131,7 @@ private static final Pattern HOST_REGEX_SIMPLE = Pattern.compile("(?:[0-9a-z_.:-]+@)?([0-9a-z_.-]++)"); private static final Pattern USERINFO_REGEX_SIMPLE = - Pattern.compile("([0-9a-z_.:-]+)(?:@[0-9a-z_.-]++)"); + Pattern.compile("^([0-9a-z_.:-]+)(?:@[0-9a-z_.-]++)"); /** * Tests if the String argument looks like it could be a legitimate @@ -292,8 +292,8 @@ */ public static String urlToHost(String url) { String lcUrl = url.toLowerCase(); - if(lcUrl.startsWith("dns:")) { - return lcUrl.substring(4); + if(lcUrl.startsWith(DNS_SCHEME)) { + return lcUrl.substring(DNS_SCHEME.length()); } for(String scheme : ALL_SCHEMES) { if(lcUrl.startsWith(scheme)) { @@ -318,7 +318,7 @@ */ public static String urlToUserInfo(String url) { String lcUrl = url.toLowerCase(); - if(lcUrl.startsWith("dns:")) { + if(lcUrl.startsWith(DNS_SCHEME)) { return null; } for(String scheme : ALL_SCHEMES) { Modified: trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/url/AggressiveUrlCanonicalizerTest.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/url/AggressiveUrlCanonicalizerTest.java 2010-08-10 20:41:33 UTC (rev 3223) +++ trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/url/AggressiveUrlCanonicalizerTest.java 2010-08-10 21:24:46 UTC (rev 3224) @@ -181,6 +181,10 @@ "http://legislature.mi.gov/(a(4hqa0555fwsecu455xqckv45)S(4hqa0555fwsecu455xqckv45)f(4hqa0555fwsecu455xqckv45))/mileg.aspx?page=sessionschedules", "legislature.mi.gov/mileg.aspx?page=sessionschedules"); + // '@' in path: + checkCanonicalization( + "http://www.flickr.com/photos/36050182@N05/", + "flickr.com/photos/36050182@n05/"); Modified: trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/url/UrlOperationsTest.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/url/UrlOperationsTest.java 2010-08-10 20:41:33 UTC (rev 3223) +++ trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/url/UrlOperationsTest.java 2010-08-10 21:24:46 UTC (rev 3224) @@ -91,7 +91,9 @@ assertEquals(null,UrlOperations.urlToUserInfo("http://foo.com:80/")); assertEquals(null,UrlOperations.urlToUserInfo("http://foo.com:80")); assertEquals(null,UrlOperations.urlToUserInfo("http://www.foo.com:80\\")); + assertEquals(null,UrlOperations.urlToUserInfo("http://www.flickr.com/photos/36050182@N05/")); + assertEquals("user",UrlOperations.urlToUserInfo("http://us...@fo...")); assertEquals("user",UrlOperations.urlToUserInfo("http://us...@ww...")); assertEquals("user:pass",UrlOperations.urlToUserInfo("http://user:pa...@ww...")); @@ -190,6 +192,8 @@ assertSDP("http://b:80...@fo.../bar","http://b:80...@fo.../bar"); assertSDP("http://b:80...@fo.../bar","http://b:80...@fo...:80/bar"); assertSDP("http://b:80...@fo...:8080/ba","http://b:80...@fo...:8080/ba"); + assertSDP("http://www.flickr.com/photos/36050182@N05/","http://www.flickr.com/photos/36050182@N05/"); + } private void assertSDP(String want, String orig) { String got = UrlOperations.stripDefaultPortFromUrl(orig); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |