From: <rb...@us...> - 2018-07-18 06:15:28
|
Revision: 15475 http://sourceforge.net/p/htmlunit/code/15475 Author: rbri Date: 2018-07-18 06:15:19 +0000 (Wed, 18 Jul 2018) Log Message: ----------- next step in our endless encoding fight - back to the more explicit approach Modified Paths: -------------- trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/HttpWebConnection.java trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/WebClient.java trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/WebRequest.java trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/html/HtmlAnchor.java trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/html/HtmlLink.java Modified: trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/HttpWebConnection.java =================================================================== --- trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/HttpWebConnection.java 2018-07-17 19:37:11 UTC (rev 15474) +++ trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/HttpWebConnection.java 2018-07-18 06:15:19 UTC (rev 15475) @@ -287,20 +287,17 @@ throws URISyntaxException { final HttpContext httpContext = getHttpContext(); - final Charset encCharset = webRequest.getUrlEncodingCharset(); - + final Charset charset = webRequest.getCharset(); // Make sure that the URL is fully encoded. IE actually sends some Unicode chars in request // URLs; because of this we allow some Unicode chars in URLs. However, at this point we're // handing things over the HttpClient, and HttpClient will blow up if we leave these Unicode // chars in the URL. - final URL url = UrlUtils.encodeUrl(webRequest.getUrl(), false, encCharset); + final URL url = UrlUtils.encodeUrl(webRequest.getUrl(), false, charset); URI uri = UrlUtils.toURI(url, escapeQuery(url.getQuery())); if (getVirtualHost() != null) { uri = URI.create(getVirtualHost()); } - - final Charset charset = webRequest.getCharset(); final HttpRequestBase httpMethod = buildHttpMethod(webRequest.getHttpMethod(), uri); setProxy(httpMethod, webRequest); @@ -309,7 +306,7 @@ if (!webRequest.getRequestParameters().isEmpty()) { final List<NameValuePair> pairs = webRequest.getRequestParameters(); final org.apache.http.NameValuePair[] httpClientPairs = NameValuePair.toHttpClient(pairs); - final String query = URLEncodedUtils.format(Arrays.asList(httpClientPairs), encCharset); + final String query = URLEncodedUtils.format(Arrays.asList(httpClientPairs), charset); uri = UrlUtils.toURI(url, query); httpMethod.setURI(uri); } Modified: trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/WebClient.java =================================================================== --- trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/WebClient.java 2018-07-17 19:37:11 UTC (rev 15474) +++ trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/WebClient.java 2018-07-18 06:15:19 UTC (rev 15475) @@ -36,6 +36,7 @@ import java.net.URLConnection; import java.net.URLDecoder; import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.util.ArrayDeque; import java.util.ArrayList; import java.util.Collections; @@ -461,6 +462,8 @@ @SuppressWarnings("unchecked") public <P extends Page> P getPage(final URL url) throws IOException, FailingHttpStatusCodeException { final WebRequest request = new WebRequest(url, getBrowserVersion().getHtmlAcceptHeader()); + request.setCharset(StandardCharsets.UTF_8); + return (P) getPage(getCurrentWindow().getTopWindow(), request); } @@ -881,6 +884,7 @@ if (url != null) { try { final WebRequest request = new WebRequest(url, getBrowserVersion().getHtmlAcceptHeader()); + request.setCharset(StandardCharsets.UTF_8); if (getBrowserVersion().hasFeature(DIALOGWINDOW_REFERER) && openerPage != null) { @@ -1012,6 +1016,7 @@ final HtmlPage openerPage = (HtmlPage) opener.getEnclosedPage(); final WebRequest request = new WebRequest(url, getBrowserVersion().getHtmlAcceptHeader()); + request.setCharset(StandardCharsets.UTF_8); if (getBrowserVersion().hasFeature(DIALOGWINDOW_REFERER) && openerPage != null) { final String referer = openerPage.getUrl().toExternalForm(); @@ -1337,7 +1342,7 @@ WebAssert.notNull("parameters", parameters); url = UrlUtils.encodeUrl(url, getBrowserVersion().hasFeature(URL_MINIMAL_QUERY_ENCODING), - webRequest.getUrlEncodingCharset()); + webRequest.getCharset()); webRequest.setUrl(url); if (LOG.isDebugEnabled()) { Modified: trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/WebRequest.java =================================================================== --- trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/WebRequest.java 2018-07-17 19:37:11 UTC (rev 15474) +++ trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/WebRequest.java 2018-07-18 06:15:19 UTC (rev 15475) @@ -23,7 +23,6 @@ import java.net.IDN; import java.net.URL; import java.nio.charset.Charset; -import java.nio.charset.StandardCharsets; import java.util.Collections; import java.util.HashMap; import java.util.List; @@ -64,7 +63,6 @@ private Credentials urlCredentials_; private Credentials credentials_; private transient Charset charset_ = ISO_8859_1; - private transient Charset urlEncodingCharset_; /* These two are mutually exclusive; additionally, requestBody_ should only be set for POST requests. */ private List<NameValuePair> requestParameters_ = Collections.emptyList(); @@ -432,31 +430,6 @@ } /** - * @param charset the character set to use for url (param) encoding - */ - public void setUrlEncodingCharset(final Charset charset) { - urlEncodingCharset_ = charset; - } - - /** - * @return the character set to use to encode the url (params) - */ - public Charset getUrlEncodingCharset() { - if (urlEncodingCharset_ != null) { - return urlEncodingCharset_; - } - - if (HttpMethod.GET == getHttpMethod() - || HttpMethod.DELETE == getHttpMethod() - || HttpMethod.HEAD == getHttpMethod() - || HttpMethod.OPTIONS == getHttpMethod() - || HttpMethod.TRACE == getHttpMethod()) { - return StandardCharsets.UTF_8; - } - return charset_; - } - - /** * Returns a string representation of this object. * @return a string representation of this object */ Modified: trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/html/HtmlAnchor.java =================================================================== --- trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/html/HtmlAnchor.java 2018-07-17 19:37:11 UTC (rev 15474) +++ trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/html/HtmlAnchor.java 2018-07-18 06:15:19 UTC (rev 15475) @@ -169,9 +169,9 @@ } final WebRequest webRequest = new WebRequest(url, browser.getHtmlAcceptHeader()); + // use the page encoding even if this is a GET requests webRequest.setCharset(page.getCharset()); - // use the page encoding even if this is a GET requests - webRequest.setUrlEncodingCharset(page.getCharset()); + webRequest.setAdditionalHeader(HttpHeader.REFERER, page.getUrl().toExternalForm()); if (LOG.isDebugEnabled()) { LOG.debug( Modified: trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/html/HtmlLink.java =================================================================== --- trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/html/HtmlLink.java 2018-07-17 19:37:11 UTC (rev 15474) +++ trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/html/HtmlLink.java 2018-07-18 06:15:19 UTC (rev 15475) @@ -228,7 +228,7 @@ final WebRequest request = new WebRequest(url); // use the page encoding even if this is a GET requests - request.setUrlEncodingCharset(page.getCharset()); + request.setCharset(page.getCharset()); request.setAdditionalHeader(HttpHeader.REFERER, page.getUrl().toExternalForm()); |