From: <asa...@us...> - 2012-12-22 11:32:03
|
Revision: 7899 http://sourceforge.net/p/htmlunit/code/7899 Author: asashour Date: 2012-12-22 11:31:58 +0000 (Sat, 22 Dec 2012) Log Message: ----------- - Fix the encoding of clicked links to match the enclosing page (FF). - UrlUtils: deprecate encodeUrl(URL, boolean). Issue 1402 Modified Paths: -------------- trunk/htmlunit/src/changes/changes.xml trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/HttpWebConnection.java trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/WebClient.java trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/html/HtmlAnchor.java trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/util/UrlUtils.java trunk/htmlunit/src/test/java/com/gargoylesoftware/htmlunit/javascript/host/html/HTMLDocumentTest.java trunk/htmlunit/src/test/java/com/gargoylesoftware/htmlunit/util/UrlUtilsTest.java Modified: trunk/htmlunit/src/changes/changes.xml =================================================================== --- trunk/htmlunit/src/changes/changes.xml 2012-12-22 06:13:04 UTC (rev 7898) +++ trunk/htmlunit/src/changes/changes.xml 2012-12-22 11:31:58 UTC (rev 7899) @@ -9,6 +9,12 @@ <body> <release version="2.12" date="???" description="Bugfixes"> <action type="update" dev="asashour"> + UrlUtils: deprecate encodeUrl(URL, boolean). + </action> + <action type="fix" dev="asashour" issue="1402"> + Fix the encoding of clicked links to match the enclosing page (FF). + </action> + <action type="update" dev="asashour"> Upper-case the charset detected from "meta" tag. </action> <action type="update" dev="mguillem"> Modified: trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/HttpWebConnection.java =================================================================== --- trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/HttpWebConnection.java 2012-12-22 06:13:04 UTC (rev 7898) +++ trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/HttpWebConnection.java 2012-12-22 11:31:58 UTC (rev 7899) @@ -241,7 +241,7 @@ // URLs; because of this we allow some Unicode chars in URLs. However, at this point we're // handing things over the HttpClient, and HttpClient will blow up if we leave these Unicode // chars in the URL. - final URL url = UrlUtils.encodeUrl(webRequest.getUrl(), false); + final URL url = UrlUtils.encodeUrl(webRequest.getUrl(), false, webRequest.getCharset()); final String charset = webRequest.getCharset(); // URIUtils.createURI is deprecated but as of httpclient-4.2.1, URIBuilder doesn't work here as it encodes path Modified: trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/WebClient.java =================================================================== --- trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/WebClient.java 2012-12-22 06:13:04 UTC (rev 7898) +++ trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/WebClient.java 2012-12-22 11:31:58 UTC (rev 7899) @@ -1422,7 +1422,8 @@ WebAssert.notNull("method", method); WebAssert.notNull("parameters", parameters); - url = UrlUtils.encodeUrl(url, getBrowserVersion().hasFeature(URL_MINIMAL_QUERY_ENCODING)); + url = UrlUtils.encodeUrl(url, getBrowserVersion().hasFeature(URL_MINIMAL_QUERY_ENCODING), + webRequest.getCharset()); webRequest.setUrl(url); if (LOG.isDebugEnabled()) { Modified: trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/html/HtmlAnchor.java =================================================================== --- trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/html/HtmlAnchor.java 2012-12-22 06:13:04 UTC (rev 7898) +++ trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/html/HtmlAnchor.java 2012-12-22 11:31:58 UTC (rev 7899) @@ -116,6 +116,7 @@ } final WebRequest webRequest = new WebRequest(url); + webRequest.setCharset(page.getPageEncoding()); webRequest.setAdditionalHeader("Referer", page.getUrl().toExternalForm()); if (LOG.isDebugEnabled()) { LOG.debug( Modified: trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/util/UrlUtils.java =================================================================== --- trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/util/UrlUtils.java 2012-12-22 06:13:04 UTC (rev 7898) +++ trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/util/UrlUtils.java 2012-12-22 11:31:58 UTC (rev 7899) @@ -22,6 +22,7 @@ import org.apache.commons.codec.DecoderException; import org.apache.commons.codec.net.URLCodec; +import com.gargoylesoftware.htmlunit.TextUtil; import com.gargoylesoftware.htmlunit.WebAssert; /** @@ -203,8 +204,25 @@ * @param url the URL to encode * @param minimalQueryEncoding whether or not to perform minimal query encoding, like IE does * @return the encoded URL + * @deprecated as of 2.12, please use {@link #encodeUrl(URL, boolean, String)} instead */ + @Deprecated public static URL encodeUrl(final URL url, final boolean minimalQueryEncoding) { + return encodeUrl(url, minimalQueryEncoding, TextUtil.DEFAULT_CHARSET); + } + + /** + * <p>Encodes illegal characters in the specified URL's path, query string and anchor according to the URL + * encoding rules observed in real browsers.</p> + * + * <p>For example, this method changes <tt>"http://first/?a=b c"</tt> to <tt>"http://first/?a=b%20c"</tt>.</p> + * + * @param url the URL to encode + * @param minimalQueryEncoding whether or not to perform minimal query encoding, like IE does + * @param charset the charset + * @return the encoded URL + */ + public static URL encodeUrl(final URL url, final boolean minimalQueryEncoding, final String charset) { final String p = url.getProtocol(); if ("javascript".equalsIgnoreCase(p) || "about".equalsIgnoreCase(p) || "data".equalsIgnoreCase(p)) { // Special exception. @@ -213,7 +231,7 @@ try { String path = url.getPath(); if (path != null) { - path = encode(path, PATH_ALLOWED_CHARS, "utf-8"); + path = encode(path, PATH_ALLOWED_CHARS, "UTF-8"); } String query = url.getQuery(); if (query != null) { @@ -221,12 +239,12 @@ query = org.apache.commons.lang3.StringUtils.replace(query, " ", "%20"); } else { - query = encode(query, QUERY_ALLOWED_CHARS, "windows-1252"); + query = encode(query, QUERY_ALLOWED_CHARS, charset); } } String anchor = url.getRef(); if (anchor != null) { - anchor = encode(anchor, ANCHOR_ALLOWED_CHARS, "utf-8"); + anchor = encode(anchor, ANCHOR_ALLOWED_CHARS, "UTF-8"); } return createNewUrl(url.getProtocol(), url.getHost(), url.getPort(), path, anchor, query); } @@ -244,7 +262,7 @@ */ public static String encodeAnchor(String anchor) { if (anchor != null) { - anchor = encode(anchor, ANCHOR_ALLOWED_CHARS, "utf-8"); + anchor = encode(anchor, ANCHOR_ALLOWED_CHARS, "UTF-8"); } return anchor; } Modified: trunk/htmlunit/src/test/java/com/gargoylesoftware/htmlunit/javascript/host/html/HTMLDocumentTest.java =================================================================== --- trunk/htmlunit/src/test/java/com/gargoylesoftware/htmlunit/javascript/host/html/HTMLDocumentTest.java 2012-12-22 06:13:04 UTC (rev 7898) +++ trunk/htmlunit/src/test/java/com/gargoylesoftware/htmlunit/javascript/host/html/HTMLDocumentTest.java 2012-12-22 11:31:58 UTC (rev 7899) @@ -1606,4 +1606,30 @@ final WebDriver driver = loadPage2(html, URL_FIRST, "text/html", "UTF-8"); verifyAlerts(DEFAULT_WAIT_TIME, expectedAlerts, driver); } + + /** + * @throws Exception if the test fails + */ + @Test + public void encoding7() throws Exception { + final String html = "<html>\n" + + "<head>\n" + + "<meta charset='UTF-8'>\n" + + "</head><body>\n" + + " <a id='myId' href='test?\u00E8=\u00E8'>test</a>" + + "</body></html>"; + + final WebDriver driver = loadPage2(html, URL_FIRST, "text/html", "UTF-8"); + driver.findElement(By.id("myId")).click(); + String actualQuery = driver.getCurrentUrl(); + actualQuery = actualQuery.substring(actualQuery.indexOf('?')); + final String expectedQuery; + if (getBrowserVersion().isIE()) { + expectedQuery = "?\u00E8=\u00E8"; + } + else { + expectedQuery = "?%C3%A8=%C3%A8"; + } + assertTrue(actualQuery.endsWith(expectedQuery)); + } } Modified: trunk/htmlunit/src/test/java/com/gargoylesoftware/htmlunit/util/UrlUtilsTest.java =================================================================== --- trunk/htmlunit/src/test/java/com/gargoylesoftware/htmlunit/util/UrlUtilsTest.java 2012-12-22 06:13:04 UTC (rev 7898) +++ trunk/htmlunit/src/test/java/com/gargoylesoftware/htmlunit/util/UrlUtilsTest.java 2012-12-22 11:31:58 UTC (rev 7899) @@ -19,6 +19,7 @@ import org.junit.Test; import com.gargoylesoftware.htmlunit.SimpleWebTestCase; +import com.gargoylesoftware.htmlunit.TextUtil; /** * Tests for {@link UrlUtils}. @@ -230,7 +231,8 @@ @Test public void percent() throws Exception { final URL url = new URL("http://localhost/bug%21.html"); - assertEquals("http://localhost/bug%21.html", UrlUtils.encodeUrl(url, false).toExternalForm()); + assertEquals("http://localhost/bug%21.html", + UrlUtils.encodeUrl(url, false, TextUtil.DEFAULT_CHARSET).toExternalForm()); } /** |