From: <rb...@us...> - 2017-08-01 08:51:24
|
Revision: 14746 http://sourceforge.net/p/htmlunit/code/14746 Author: rbri Date: 2017-08-01 08:51:21 +0000 (Tue, 01 Aug 2017) Log Message: ----------- fix most IE cases regarding encoding of external js Modified Paths: -------------- trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/BrowserVersionFeatures.java trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/WebResponse.java trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/html/HtmlPage.java trunk/htmlunit/src/test/java/com/gargoylesoftware/htmlunit/html/HtmlScript3Test.java Modified: trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/BrowserVersionFeatures.java =================================================================== --- trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/BrowserVersionFeatures.java 2017-07-31 15:53:36 UTC (rev 14745) +++ trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/BrowserVersionFeatures.java 2017-08-01 08:51:21 UTC (rev 14746) @@ -848,14 +848,14 @@ @BrowserFeature(CHROME) JS_DOMTOKENLIST_LENGTH_IGNORES_DUPLICATES, + /** DOMTokenList removed all whitespace chars during add. */ + @BrowserFeature(CHROME) + JS_DOMTOKENLIST_REMOVE_WHITESPACE_CHARS_ON_ADD, + /** DOMTokenList removed all whitespace chars during edit. */ @BrowserFeature({CHROME, IE}) JS_DOMTOKENLIST_REMOVE_WHITESPACE_CHARS_ON_EDIT, - /** DOMTokenList removed all whitespace chars during add. */ - @BrowserFeature(CHROME) - JS_DOMTOKENLIST_REMOVE_WHITESPACE_CHARS_ON_ADD, - /** DOMTokenList removed all whitespace chars during remove. */ @BrowserFeature({CHROME, FF52}) JS_DOMTOKENLIST_REMOVE_WHITESPACE_CHARS_ON_REMOVE, @@ -945,6 +945,10 @@ @BrowserFeature(IE) JS_IGNORES_LAST_LINE_CONTAINING_UNCOMMENTED, + /** Ignore the UTF8 BOM header when loading external js in some situations. */ + @BrowserFeature(IE) + JS_IGNORES_UTF8_BOM_SOMETIMES, + /** * The complete property returns also true, if the image download was failing * or if there was no src at all. Modified: trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/WebResponse.java =================================================================== --- trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/WebResponse.java 2017-07-31 15:53:36 UTC (rev 14745) +++ trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/WebResponse.java 2017-08-01 08:51:21 UTC (rev 14746) @@ -208,6 +208,20 @@ * @return the response content as a string or null if the content retrieval was failing */ public String getContentAsString(final Charset encoding) { + return getContentAsString(encoding, false); + } + + /** + * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br> + * + * Returns the response content as a string, using the specified charset, + * rather than the charset/encoding specified in the server response. + * If there is a bom header the charset parameter will be overwritten by the bom. + * @param encoding the charset/encoding to use to convert the response content into a string + * @param ignoreUtf8Bom if true utf8 bom header will be ignored + * @return the response content as a string or null if the content retrieval was failing + */ + public String getContentAsString(final Charset encoding, final boolean ignoreUtf8Bom) { if (responseData_ != null) { try (InputStream in = responseData_.getInputStream()) { if (in != null) { @@ -215,7 +229,7 @@ // there seems to be a bug in BOMInputStream // we have to call this before hasBOM(ByteOrderMark) if (bomIn.hasBOM()) { - if (bomIn.hasBOM(ByteOrderMark.UTF_8)) { + if (!ignoreUtf8Bom && bomIn.hasBOM(ByteOrderMark.UTF_8)) { return IOUtils.toString(bomIn, StandardCharsets.UTF_8); } if (bomIn.hasBOM(ByteOrderMark.UTF_16BE)) { Modified: trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/html/HtmlPage.java =================================================================== --- trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/html/HtmlPage.java 2017-07-31 15:53:36 UTC (rev 14745) +++ trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/html/HtmlPage.java 2017-08-01 08:51:21 UTC (rev 14746) @@ -19,6 +19,7 @@ import static com.gargoylesoftware.htmlunit.BrowserVersionFeatures.FOCUS_BODY_ELEMENT_AT_START; import static com.gargoylesoftware.htmlunit.BrowserVersionFeatures.JS_CALL_RESULT_IS_LAST_RETURN_VALUE; import static com.gargoylesoftware.htmlunit.BrowserVersionFeatures.JS_DEFERRED; +import static com.gargoylesoftware.htmlunit.BrowserVersionFeatures.JS_IGNORES_UTF8_BOM_SOMETIMES; import static com.gargoylesoftware.htmlunit.BrowserVersionFeatures.PAGE_SELECTION_RANGE_FROM_SELECTABLE_TEXT_INPUT; import static com.gargoylesoftware.htmlunit.BrowserVersionFeatures.URL_MISSING_SLASHES; import static java.nio.charset.StandardCharsets.ISO_8859_1; @@ -1070,18 +1071,29 @@ } Charset scriptEncoding = Charset.forName("windows-1252"); + boolean ignoreBom = false; final Charset contentCharset = EncodingSniffer.sniffEncodingFromHttpHeaders(response.getResponseHeaders()); if (contentCharset == null) { // use info from script tag or fall back to utf-8 - if (scriptCharset != null && scriptCharset != ISO_8859_1) { + if (scriptCharset != null && ISO_8859_1 != scriptCharset) { + ignoreBom = true; scriptEncoding = scriptCharset; } + else { + ignoreBom = ISO_8859_1 != scriptCharset; + } } - else if (contentCharset != ISO_8859_1) { + else if (ISO_8859_1 != contentCharset) { + ignoreBom = true; scriptEncoding = contentCharset; } + else { + ignoreBom = true; + } - final String scriptCode = response.getContentAsString(scriptEncoding); + final String scriptCode = response.getContentAsString(scriptEncoding, + ignoreBom + && getWebClient().getBrowserVersion().hasFeature(JS_IGNORES_UTF8_BOM_SOMETIMES)); if (null != scriptCode) { final AbstractJavaScriptEngine<?> javaScriptEngine = client.getJavaScriptEngine(); final Object script = javaScriptEngine.compile(this, scriptCode, url.toExternalForm(), 1); Modified: trunk/htmlunit/src/test/java/com/gargoylesoftware/htmlunit/html/HtmlScript3Test.java =================================================================== --- trunk/htmlunit/src/test/java/com/gargoylesoftware/htmlunit/html/HtmlScript3Test.java 2017-07-31 15:53:36 UTC (rev 14745) +++ trunk/htmlunit/src/test/java/com/gargoylesoftware/htmlunit/html/HtmlScript3Test.java 2017-08-01 08:51:21 UTC (rev 14746) @@ -287,7 +287,6 @@ @Test @Alerts(DEFAULT = {"a", "ä", "أهلاً", "мир", "房间"}, IE = {"a", "ä", "أهلاً", "мир", "房间"}) - @NotYetImplemented(IE) public void _ISO88591__ISO88591__BOMUTF8() throws Exception { charset(TestCharset.ISO88591, null, TestCharset.ISO88591, null, BOM_UTF_8); } @@ -307,7 +306,6 @@ @Test @Alerts(DEFAULT = {"a", "ä", "أهلاً", "мир", "房间"}, IE = {"a", "ä", "أهلاً", "мир", "房间"}) - @NotYetImplemented(IE) public void _ISO88591__ISO88591_UTF8_BOMUTF8() throws Exception { charset(TestCharset.ISO88591, null, TestCharset.ISO88591, TestCharset.UTF8, BOM_UTF_8); } @@ -390,7 +388,6 @@ @Test @Alerts(DEFAULT = {"a", "ä", "أهلاً", "мир", "房间"}, IE = {"a", "ä", "أهلاً", "мир", "房间"}) - @NotYetImplemented(IE) public void _ISO88591_UTF8_ISO88591__BOMUTF8() throws Exception { charset(TestCharset.ISO88591, TestCharset.UTF8, TestCharset.ISO88591, null, BOM_UTF_8); } @@ -410,7 +407,6 @@ @Test @Alerts(DEFAULT = {"a", "ä", "أهلاً", "мир", "房间"}, IE = {"a", "ä", "أهلاً", "мир", "房间"}) - @NotYetImplemented(IE) public void _ISO88591_UTF8_ISO88591_UTF8_BOMUTF8() throws Exception { charset(TestCharset.ISO88591, TestCharset.UTF8, TestCharset.ISO88591, TestCharset.UTF8, BOM_UTF_8); } @@ -520,7 +516,6 @@ @Test @Alerts(DEFAULT = {"a", "ä", "أهلاً", "мир", "房间"}, IE = {"a", "ä", "أهلاً", "мир", "房间"}) - @NotYetImplemented(IE) public void _ISO88591_ISO88591_ISO88591__BOMUTF8() throws Exception { charset(TestCharset.ISO88591, TestCharset.ISO88591, TestCharset.ISO88591, null, BOM_UTF_8); } @@ -540,7 +535,6 @@ @Test @Alerts(DEFAULT = {"a", "ä", "أهلاً", "мир", "房间"}, IE = {"a", "ä", "أهلاً", "мир", "房间"}) - @NotYetImplemented(IE) public void _ISO88591_ISO88591_ISO88591_UTF8_BOMUTF8() throws Exception { charset(TestCharset.ISO88591, TestCharset.ISO88591, TestCharset.ISO88591, TestCharset.UTF8, BOM_UTF_8); } @@ -740,7 +734,6 @@ @Test @Alerts(DEFAULT = {"a", "ä", "أهلاً", "мир", "房间"}, IE = {"a", "ä", "أهلاً", "мир", "房间"}) - @NotYetImplemented(IE) public void _UTF8_ISO88591_ISO88591_UTF8_BOMUTF8() throws Exception { charset(TestCharset.UTF8, TestCharset.ISO88591, TestCharset.ISO88591, TestCharset.UTF8, BOM_UTF_8); } @@ -751,7 +744,6 @@ @Test @Alerts(DEFAULT = {"a", "ä", "أهلاً", "мир", "房间"}, IE = {"a", "ä", "أهلاً", "мир", "房间"}) - @NotYetImplemented(IE) public void _UTF8_ISO88591_ISO88591__BOMUTF8() throws Exception { charset(TestCharset.UTF8, TestCharset.ISO88591, TestCharset.ISO88591, null, BOM_UTF_8); } @@ -762,7 +754,6 @@ @Test @Alerts(DEFAULT = {"a", "ä", "أهلاً", "мир", "房间"}, IE = {"a", "ä", "أهلاً", "мир", "房间"}) - @NotYetImplemented(IE) public void _UTF8_UTF8_ISO88591_UTF8_BOMUTF8() throws Exception { charset(TestCharset.UTF8, TestCharset.UTF8, TestCharset.ISO88591, TestCharset.UTF8, BOM_UTF_8); } @@ -773,7 +764,6 @@ @Test @Alerts(DEFAULT = {"a", "ä", "أهلاً", "мир", "房间"}, IE = {"a", "ä", "أهلاً", "мир", "房间"}) - @NotYetImplemented(IE) public void _UTF8_UTF8_ISO88591__BOMUTF8() throws Exception { charset(TestCharset.UTF8, TestCharset.UTF8, TestCharset.ISO88591, null, BOM_UTF_8); } @@ -784,7 +774,6 @@ @Test @Alerts(DEFAULT = {"a", "ä", "أهلاً", "мир", "房间"}, IE = {"a", "ä", "أهلاً", "мир", "房间"}) - @NotYetImplemented(IE) public void _UTF8__ISO88591_UTF8_BOMUTF8() throws Exception { charset(TestCharset.UTF8, null, TestCharset.ISO88591, TestCharset.UTF8, BOM_UTF_8); } @@ -795,7 +784,6 @@ @Test @Alerts(DEFAULT = {"a", "ä", "أهلاً", "мир", "房间"}, IE = {"a", "ä", "أهلاً", "мир", "房间"}) - @NotYetImplemented(IE) public void _UTF8__ISO88591__BOMUTF8() throws Exception { charset(TestCharset.UTF8, null, TestCharset.ISO88591, null, BOM_UTF_8); } @@ -977,7 +965,6 @@ @Test @Alerts(DEFAULT = {"a", "ä", "أهلاً", "мир", "房间"}, IE = {"a", "ä", "أهلاً", "мир", "房间"}) - @NotYetImplemented(IE) public void __ISO88591_ISO88591_UTF8_BOMUTF8() throws Exception { charset(null, TestCharset.ISO88591, TestCharset.ISO88591, TestCharset.UTF8, BOM_UTF_8); } @@ -988,7 +975,6 @@ @Test @Alerts(DEFAULT = {"a", "ä", "أهلاً", "мир", "房间"}, IE = {"a", "ä", "أهلاً", "мир", "房间"}) - @NotYetImplemented(IE) public void __ISO88591_ISO88591__BOMUTF8() throws Exception { charset(null, TestCharset.ISO88591, TestCharset.ISO88591, null, BOM_UTF_8); } @@ -999,7 +985,6 @@ @Test @Alerts(DEFAULT = {"a", "ä", "أهلاً", "мир", "房间"}, IE = {"a", "ä", "أهلاً", "мир", "房间"}) - @NotYetImplemented(IE) public void __UTF8_ISO88591_UTF8_BOMUTF8() throws Exception { charset(null, TestCharset.UTF8, TestCharset.ISO88591, TestCharset.UTF8, BOM_UTF_8); } @@ -1010,7 +995,6 @@ @Test @Alerts(DEFAULT = {"a", "ä", "أهلاً", "мир", "房间"}, IE = {"a", "ä", "أهلاً", "мир", "房间"}) - @NotYetImplemented(IE) public void __UTF8_ISO88591__BOMUTF8() throws Exception { charset(null, TestCharset.UTF8, TestCharset.ISO88591, null, BOM_UTF_8); } @@ -1021,7 +1005,6 @@ @Test @Alerts(DEFAULT = {"a", "ä", "أهلاً", "мир", "房间"}, IE = {"a", "ä", "أهلاً", "мир", "房间"}) - @NotYetImplemented(IE) public void ___ISO88591_UTF8_BOMUTF8() throws Exception { charset(null, null, TestCharset.ISO88591, TestCharset.UTF8, BOM_UTF_8); } @@ -1032,7 +1015,6 @@ @Test @Alerts(DEFAULT = {"a", "ä", "أهلاً", "мир", "房间"}, IE = {"a", "ä", "أهلاً", "мир", "房间"}) - @NotYetImplemented(IE) public void ___ISO88591__BOMUTF8() throws Exception { charset(null, null, TestCharset.ISO88591, null, BOM_UTF_8); } @@ -1216,7 +1198,6 @@ @Test @Alerts(DEFAULT = {"a", "ä", "أهلاً", "мир", "房间"}, IE = {"a", "ä", "أهلاً", "мир", "房间"}) - @NotYetImplemented(IE) public void _GB2312_ISO88591_ISO88591_UTF8_BOMUTF8() throws Exception { charset(TestCharset.GB2312, TestCharset.ISO88591, TestCharset.ISO88591, TestCharset.UTF8, BOM_UTF_8); } @@ -1227,7 +1208,6 @@ @Test @Alerts(DEFAULT = {"a", "ä", "أهلاً", "мир", "房间"}, IE = {"a", "ä", "أهلاً", "мир", "房间"}) - @NotYetImplemented(IE) public void _GB2312_ISO88591_ISO88591__BOMUTF8() throws Exception { charset(TestCharset.GB2312, TestCharset.ISO88591, TestCharset.ISO88591, null, BOM_UTF_8); } @@ -1238,7 +1218,6 @@ @Test @Alerts(DEFAULT = {"a", "ä", "أهلاً", "мир", "房间"}, IE = {"a", "ä", "أهلاً", "мир", "房间"}) - @NotYetImplemented(IE) public void _GB2312_UTF8_ISO88591_UTF8_BOMUTF8() throws Exception { charset(TestCharset.GB2312, TestCharset.UTF8, TestCharset.ISO88591, TestCharset.UTF8, BOM_UTF_8); } @@ -1249,7 +1228,6 @@ @Test @Alerts(DEFAULT = {"a", "ä", "أهلاً", "мир", "房间"}, IE = {"a", "ä", "أهلاً", "мир", "房间"}) - @NotYetImplemented(IE) public void _GB2312_UTF8_ISO88591__BOMUTF8() throws Exception { charset(TestCharset.GB2312, TestCharset.UTF8, TestCharset.ISO88591, null, BOM_UTF_8); } @@ -1260,7 +1238,6 @@ @Test @Alerts(DEFAULT = {"a", "ä", "أهلاً", "мир", "房间"}, IE = {"a", "ä", "أهلاً", "мир", "房间"}) - @NotYetImplemented(IE) public void _GB2312__ISO88591_UTF8_BOMUTF8() throws Exception { charset(TestCharset.GB2312, null, TestCharset.ISO88591, TestCharset.UTF8, BOM_UTF_8); } @@ -1271,7 +1248,6 @@ @Test @Alerts(DEFAULT = {"a", "ä", "أهلاً", "мир", "房间"}, IE = {"a", "ä", "أهلاً", "мир", "房间"}) - @NotYetImplemented(IE) public void _GB2312__ISO88591__BOMUTF8() throws Exception { charset(TestCharset.GB2312, null, TestCharset.ISO88591, null, BOM_UTF_8); } @@ -1282,7 +1258,6 @@ @Test @Alerts(DEFAULT = {"a", "ä", "أهلاً", "мир", "房间"}, IE = {"a", "ä", "أهلاً", "мир", "房间"}) - @NotYetImplemented(IE) public void _GB2312_ISO88591_ISO88591_ISO88591_BOMUTF8() throws Exception { charset(TestCharset.GB2312, TestCharset.ISO88591, TestCharset.ISO88591, TestCharset.ISO88591, BOM_UTF_8); } @@ -1293,7 +1268,6 @@ @Test @Alerts(DEFAULT = {"a", "ä", "أهلاً", "мир", "房间"}, IE = {"a", "ä", "أهلاً", "мир", "房间"}) - @NotYetImplemented(IE) public void _GB2312_UTF8_ISO88591_ISO88591_BOMUTF8() throws Exception { charset(TestCharset.GB2312, TestCharset.UTF8, TestCharset.ISO88591, TestCharset.ISO88591, BOM_UTF_8); } @@ -1304,7 +1278,6 @@ @Test @Alerts(DEFAULT = {"a", "ä", "أهلاً", "мир", "房间"}, IE = {"a", "ä", "أهلاً", "мир", "房间"}) - @NotYetImplemented(IE) public void _GB2312__ISO88591_ISO88591_BOMUTF8() throws Exception { charset(TestCharset.GB2312, null, TestCharset.ISO88591, TestCharset.ISO88591, BOM_UTF_8); } @@ -1315,7 +1288,6 @@ @Test @Alerts(DEFAULT = {"a", "ä", "أهلاً", "мир", "房间"}, IE = {"a", "ä", "أهلاً", "мир", "房间"}) - @NotYetImplemented(IE) public void _ISO88591_ISO88591_ISO88591_ISO88591_BOMUTF8() throws Exception { charset(TestCharset.ISO88591, TestCharset.ISO88591, TestCharset.ISO88591, TestCharset.ISO88591, BOM_UTF_8); } @@ -1326,7 +1298,6 @@ @Test @Alerts(DEFAULT = {"a", "ä", "أهلاً", "мир", "房间"}, IE = {"a", "ä", "أهلاً", "мир", "房间"}) - @NotYetImplemented(IE) public void _ISO88591_UTF8_ISO88591_ISO88591_BOMUTF8() throws Exception { charset(TestCharset.ISO88591, TestCharset.UTF8, TestCharset.ISO88591, TestCharset.ISO88591, BOM_UTF_8); } @@ -1337,7 +1308,6 @@ @Test @Alerts(DEFAULT = {"a", "ä", "أهلاً", "мир", "房间"}, IE = {"a", "ä", "أهلاً", "мир", "房间"}) - @NotYetImplemented(IE) public void _ISO88591__ISO88591_ISO88591_BOMUTF8() throws Exception { charset(TestCharset.ISO88591, null, TestCharset.ISO88591, TestCharset.ISO88591, BOM_UTF_8); } @@ -1348,7 +1318,6 @@ @Test @Alerts(DEFAULT = {"a", "ä", "أهلاً", "мир", "房间"}, IE = {"a", "ä", "أهلاً", "мир", "房间"}) - @NotYetImplemented(IE) public void _UTF8_ISO88591_ISO88591_ISO88591_BOMUTF8() throws Exception { charset(TestCharset.UTF8, TestCharset.ISO88591, TestCharset.ISO88591, TestCharset.ISO88591, BOM_UTF_8); } @@ -1359,7 +1328,6 @@ @Test @Alerts(DEFAULT = {"a", "ä", "أهلاً", "мир", "房间"}, IE = {"a", "ä", "أهلاً", "мир", "房间"}) - @NotYetImplemented(IE) public void _UTF8_UTF8_ISO88591_ISO88591_BOMUTF8() throws Exception { charset(TestCharset.UTF8, TestCharset.UTF8, TestCharset.ISO88591, TestCharset.ISO88591, BOM_UTF_8); } @@ -1370,7 +1338,6 @@ @Test @Alerts(DEFAULT = {"a", "ä", "أهلاً", "мир", "房间"}, IE = {"a", "ä", "أهلاً", "мир", "房间"}) - @NotYetImplemented(IE) public void _UTF8__ISO88591_ISO88591_BOMUTF8() throws Exception { charset(TestCharset.UTF8, null, TestCharset.ISO88591, TestCharset.ISO88591, BOM_UTF_8); } @@ -1381,7 +1348,6 @@ @Test @Alerts(DEFAULT = {"a", "ä", "أهلاً", "мир", "房间"}, IE = {"a", "ä", "أهلاً", "мир", "房间"}) - @NotYetImplemented(IE) public void __ISO88591_ISO88591_ISO88591_BOMUTF8() throws Exception { charset(null, TestCharset.ISO88591, TestCharset.ISO88591, TestCharset.ISO88591, BOM_UTF_8); } @@ -1392,7 +1358,6 @@ @Test @Alerts(DEFAULT = {"a", "ä", "أهلاً", "мир", "房间"}, IE = {"a", "ä", "أهلاً", "мир", "房间"}) - @NotYetImplemented(IE) public void __UTF8_ISO88591_ISO88591_BOMUTF8() throws Exception { charset(null, TestCharset.UTF8, TestCharset.ISO88591, TestCharset.ISO88591, BOM_UTF_8); } @@ -1403,8 +1368,37 @@ @Test @Alerts(DEFAULT = {"a", "ä", "أهلاً", "мир", "房间"}, IE = {"a", "ä", "أهلاً", "мир", "房间"}) - @NotYetImplemented(IE) public void ___ISO88591_ISO88591_BOMUTF8() throws Exception { charset(null, null, TestCharset.ISO88591, TestCharset.ISO88591, BOM_UTF_8); } + + /** + * @throws Exception if the test fails + */ + @Test + @Alerts({"a", "ä", "أهلاً", "мир", "房间"}) + @NotYetImplemented(IE) + public void _GB2312____BOMUTF8() throws Exception { + charset(TestCharset.GB2312, null, null, null, BOM_UTF_8); + } + + /** + * @throws Exception if the test fails + */ + @Test + @Alerts({"a", "ä", "أهلاً", "мир", "房间"}) + @NotYetImplemented(IE) + public void _GB2312___UTF8_BOMUTF8() throws Exception { + charset(TestCharset.GB2312, null, null, TestCharset.UTF8, BOM_UTF_8); + } + + /** + * @throws Exception if the test fails + */ + @Test + @Alerts({"a", "ä", "أهلاً", "мир", "房间"}) + @NotYetImplemented(IE) + public void _GB2312___ISO88591_BOMUTF8() throws Exception { + charset(TestCharset.GB2312, null, null, TestCharset.ISO88591, BOM_UTF_8); + } } |