From: <bra...@us...> - 2010-09-28 21:22:45
|
Revision: 3259 http://archive-access.svn.sourceforge.net/archive-access/?rev=3259&view=rev Author: bradtofel Date: 2010-09-28 21:22:39 +0000 (Tue, 28 Sep 2010) Log Message: ----------- Fixed URL canonicalization test case and underlying code Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/htmllex/ParseContext.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/htmllex/ParseContext.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/htmllex/ParseContext.java 2010-09-28 21:22:20 UTC (rev 3258) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/htmllex/ParseContext.java 2010-09-28 21:22:39 UTC (rev 3259) @@ -30,6 +30,7 @@ import java.util.Map; import org.apache.commons.httpclient.URIException; +import org.apache.commons.lang.StringEscapeUtils; import org.archive.net.UURI; import org.archive.net.UURIFactory; import org.htmlparser.util.Translate; @@ -98,7 +99,10 @@ * @throws URISyntaxException if the input URL is malformed */ public String resolve(String url) throws URISyntaxException { - url = Translate.decode(url); + // BUG in Translate.decode(): "foo?a=b&lang=en" acts as if it + // was "⟨" +// url = Translate.decode(url); + url = StringEscapeUtils.unescapeHtml(url); int hashIdx = url.indexOf('#'); String frag = ""; if(hashIdx != -1) { @@ -111,7 +115,8 @@ System.err.println("No url to resolve!"); return url; } - return baseUrl.resolve(url,false).toString() + frag; + return baseUrl.resolve(url,true).toString() + frag; +// return baseUrl.resolve(url,false).toString() + frag; // return UURIFactory.getInstance(baseUrl, url).toString() + frag; } catch (URIException e) { e.printStackTrace(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |