From: <asa...@us...> - 2012-12-20 03:12:30
|
Revision: 7892 http://sourceforge.net/p/htmlunit/code/7892 Author: asashour Date: 2012-12-20 03:12:25 +0000 (Thu, 20 Dec 2012) Log Message: ----------- Upper-case the charset detected from "meta" tag. Modified Paths: -------------- trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/html/HtmlForm.java trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/util/EncodingSniffer.java trunk/htmlunit/src/test/java/com/gargoylesoftware/htmlunit/WebResponse2Test.java trunk/htmlunit/src/test/java/com/gargoylesoftware/htmlunit/html/HtmlFormTest.java trunk/htmlunit/src/test/java/com/gargoylesoftware/htmlunit/html/HtmlPage3Test.java trunk/htmlunit/src/test/java/com/gargoylesoftware/htmlunit/html/HtmlPageTest.java trunk/htmlunit/src/test/java/com/gargoylesoftware/htmlunit/javascript/host/html/HTMLDocumentTest.java trunk/htmlunit/src/test/java/com/gargoylesoftware/htmlunit/util/EncodingSnifferTest.java Modified: trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/html/HtmlForm.java =================================================================== --- trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/html/HtmlForm.java 2012-12-19 17:55:04 UTC (rev 7891) +++ trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/html/HtmlForm.java 2012-12-20 03:12:25 UTC (rev 7892) @@ -230,7 +230,7 @@ */ private String getSubmitCharset() { if (getAcceptCharsetAttribute().length() > 0) { - return SUBMIT_CHARSET_PATTERN.matcher(getAcceptCharsetAttribute().trim()).replaceAll(""); + return SUBMIT_CHARSET_PATTERN.matcher(getAcceptCharsetAttribute().trim()).replaceAll("").toUpperCase(); } return getPage().getPageEncoding(); } Modified: trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/util/EncodingSniffer.java =================================================================== --- trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/util/EncodingSniffer.java 2012-12-19 17:55:04 UTC (rev 7891) +++ trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/util/EncodingSniffer.java 2012-12-20 03:12:25 UTC (rev 7892) @@ -295,6 +295,7 @@ final String value = pair.getValue(); encoding = extractEncodingFromContentType(value); if (encoding != null) { + encoding = encoding.toUpperCase(); break; } } Modified: trunk/htmlunit/src/test/java/com/gargoylesoftware/htmlunit/WebResponse2Test.java =================================================================== --- trunk/htmlunit/src/test/java/com/gargoylesoftware/htmlunit/WebResponse2Test.java 2012-12-19 17:55:04 UTC (rev 7891) +++ trunk/htmlunit/src/test/java/com/gargoylesoftware/htmlunit/WebResponse2Test.java 2012-12-20 03:12:25 UTC (rev 7892) @@ -40,7 +40,7 @@ + "<body>foo</body>\n" + "</html>"; final HtmlPage page = loadPage(html); - assertEquals("utf-8", page.getWebResponse().getContentCharsetOrNull()); + assertEquals("UTF-8", page.getWebResponse().getContentCharsetOrNull()); assertEquals(html, page.getWebResponse().getContentAsString()); } Modified: trunk/htmlunit/src/test/java/com/gargoylesoftware/htmlunit/html/HtmlFormTest.java =================================================================== --- trunk/htmlunit/src/test/java/com/gargoylesoftware/htmlunit/html/HtmlFormTest.java 2012-12-19 17:55:04 UTC (rev 7891) +++ trunk/htmlunit/src/test/java/com/gargoylesoftware/htmlunit/html/HtmlFormTest.java 2012-12-20 03:12:25 UTC (rev 7892) @@ -1149,12 +1149,12 @@ */ @Test public void submitRequestCharset() throws Exception { - submitRequestCharset("utf-8", null, null, "utf-8"); - submitRequestCharset(null, "utf-8", null, "utf-8"); - submitRequestCharset("iso-8859-1", null, "utf-8", "utf-8"); - submitRequestCharset("iso-8859-1", null, "utf-8, iso-8859-1", "utf-8"); - submitRequestCharset("utf-8", null, "iso-8859-1 utf-8", "iso-8859-1"); - submitRequestCharset("iso-8859-1", null, "utf-8, iso-8859-1", "utf-8"); + submitRequestCharset("utf-8", null, null, "UTF-8"); + submitRequestCharset(null, "utf-8", null, "UTF-8"); + submitRequestCharset("iso-8859-1", null, "utf-8", "UTF-8"); + submitRequestCharset("iso-8859-1", null, "utf-8, iso-8859-1", "UTF-8"); + submitRequestCharset("utf-8", null, "iso-8859-1 utf-8", "ISO-8859-1"); + submitRequestCharset("iso-8859-1", null, "utf-8, iso-8859-1", "UTF-8"); } /** @@ -1207,7 +1207,7 @@ webConnection.setDefaultResponse(html, 200, "ok", contentType); final HtmlPage page = client.getPage(getDefaultUrl()); - final String firstPageEncoding = StringUtils.defaultString(metaCharset, headerCharset); + final String firstPageEncoding = StringUtils.defaultString(metaCharset, headerCharset).toUpperCase(); assertEquals(firstPageEncoding, page.getPageEncoding()); final HtmlForm form = page.getFormByName("form1"); Modified: trunk/htmlunit/src/test/java/com/gargoylesoftware/htmlunit/html/HtmlPage3Test.java =================================================================== --- trunk/htmlunit/src/test/java/com/gargoylesoftware/htmlunit/html/HtmlPage3Test.java 2012-12-19 17:55:04 UTC (rev 7891) +++ trunk/htmlunit/src/test/java/com/gargoylesoftware/htmlunit/html/HtmlPage3Test.java 2012-12-20 03:12:25 UTC (rev 7892) @@ -24,6 +24,7 @@ import org.openqa.selenium.WebElement; import com.gargoylesoftware.htmlunit.BrowserRunner; +import com.gargoylesoftware.htmlunit.BrowserRunner.Alerts; import com.gargoylesoftware.htmlunit.WebDriverTestCase; /** @@ -93,4 +94,36 @@ Assert.assertEquals("js", input.getAttribute("addedBy")); Assert.assertEquals("js", input.getAttribute("addedby")); } + + /** + * @throws Exception if the test fails + */ + @Test + @Alerts(IE = { "undefined", "undefined", "iso-8859-1", "windows-1252" }, + FF = { "ISO-8859-1", "ISO-8859-1", "undefined", "undefined" }, + CHROME = { "ISO-8859-1", "ISO-8859-1", "ISO-8859-1", "ISO-8859-1" }) + public void getPageEncoding() throws Exception { + final String htmlContent = "<html><head>\n" + + " <title>foo</title>\n" + + " <meta http-equiv='Content-Type' content='text/html; charset=Shift_JIS'>\n" + + " <script>\n" + + " function test() {\n" + + " alert(document.inputEncoding);\n" + + " alert(document.characterSet);\n" + + " alert(document.charset);\n" + + " alert(document.defaultCharset);\n" + + " }\n" + + " </script>\n" + + "</head><body onload='test()'>\n" + + "<table><tr><td>\n" + + "<meta name=vs_targetSchema content=\"http://schemas.microsoft.com/intellisense/ie5\">\n" + + "<form name='form1'>\n" + + " <input type='text' name='textfield1' id='textfield1' value='foo' />\n" + + " <input type='text' name='textfield2' id='textfield2'/>\n" + + "</form>\n" + + "</td></tr></table>\n" + + "</body></html>"; + loadPageWithAlerts2(htmlContent); + } + } Modified: trunk/htmlunit/src/test/java/com/gargoylesoftware/htmlunit/html/HtmlPageTest.java =================================================================== --- trunk/htmlunit/src/test/java/com/gargoylesoftware/htmlunit/html/HtmlPageTest.java 2012-12-19 17:55:04 UTC (rev 7891) +++ trunk/htmlunit/src/test/java/com/gargoylesoftware/htmlunit/html/HtmlPageTest.java 2012-12-20 03:12:25 UTC (rev 7892) @@ -607,28 +607,6 @@ } /** - * @throws Exception if the test fails - */ - @Test - public void testGetPageEncoding() throws Exception { - final String htmlContent = "<html><head>\n" - + "<title>foo</title>\n" - + "<meta http-equiv='Content-Type' content='text/html ;charset=Shift_JIS'>\n" - + "</head><body>\n" - + "<table><tr><td>\n" - + "<meta name=vs_targetSchema content=\"http://schemas.microsoft.com/intellisense/ie5\">\n" - + "<form name='form1'>\n" - + " <input type='text' name='textfield1' id='textfield1' value='foo' />\n" - + " <input type='text' name='textfield2' id='textfield2'/>\n" - + "</form>\n" - + "</td></tr></table>\n" - + "</body></html>"; - final HtmlPage page = loadPage(htmlContent); - - assertEquals("shift_jis", page.getPageEncoding()); - } - - /** * Verifies that an empty charset in a content-type meta tag is ignored. See bug 2484753. * @throws Exception if an error occurs */ Modified: trunk/htmlunit/src/test/java/com/gargoylesoftware/htmlunit/javascript/host/html/HTMLDocumentTest.java =================================================================== --- trunk/htmlunit/src/test/java/com/gargoylesoftware/htmlunit/javascript/host/html/HTMLDocumentTest.java 2012-12-19 17:55:04 UTC (rev 7891) +++ trunk/htmlunit/src/test/java/com/gargoylesoftware/htmlunit/javascript/host/html/HTMLDocumentTest.java 2012-12-20 03:12:25 UTC (rev 7892) @@ -1562,7 +1562,7 @@ public void encoding5() throws Exception { final String html = "<html>\n" + "<head>\n" - + "<meta charset='UTF-8'>\n" + + " <meta http-equiv='Content-Type' content='text/html; charset=iso-8859-1'>\n" + " <script>\n" + " function test() {\n" + " alert(document.inputEncoding);\n" @@ -1572,6 +1572,33 @@ + " }\n" + " </script>\n" + "</head><body onload='test()'>\n" + + "</body></html>"; + + final String[] expectedAlerts = getExpectedAlerts(); + final WebDriver driver = loadPage2(html, URL_FIRST, "text/html;charset=utf-8", "ISO-8859-1"); + verifyAlerts(DEFAULT_WAIT_TIME, expectedAlerts, driver); + } + + /** + * @throws Exception if the test fails + */ + @Test + @Alerts(IE = { "undefined", "undefined", "utf-8", "windows-1252" }, + FF = { "UTF-8", "UTF-8", "undefined", "undefined" }, + CHROME = { "UTF-8", "UTF-8", "UTF-8", "ISO-8859-1" }) + public void encoding6() throws Exception { + final String html = "<html>\n" + + "<head>\n" + + " <meta charset='UTF-8'>\n" + + " <script>\n" + + " function test() {\n" + + " alert(document.inputEncoding);\n" + + " alert(document.characterSet);\n" + + " alert(document.charset);\n" + + " alert(document.defaultCharset);\n" + + " }\n" + + " </script>\n" + + "</head><body onload='test()'>\n" + " <a id='myId' href='test?è=è'>test</a>" + "</body></html>"; Modified: trunk/htmlunit/src/test/java/com/gargoylesoftware/htmlunit/util/EncodingSnifferTest.java =================================================================== --- trunk/htmlunit/src/test/java/com/gargoylesoftware/htmlunit/util/EncodingSnifferTest.java 2012-12-19 17:55:04 UTC (rev 7891) +++ trunk/htmlunit/src/test/java/com/gargoylesoftware/htmlunit/util/EncodingSnifferTest.java 2012-12-20 03:12:25 UTC (rev 7892) @@ -41,8 +41,8 @@ testHeader(null, "foo", "bar"); testHeader(null, "Content-Type", "blah"); testHeader(null, "Content-Type", "text/html;charset=blah"); - testHeader("utf-8", "Content-Type", "text/html;charset=utf-8"); - testHeader("utf-8", "Content-Type", "text/html;charset=utf-8;"); + testHeader("UTF-8", "Content-Type", "text/html;charset=utf-8"); + testHeader("UTF-8", "Content-Type", "text/html;charset=utf-8;"); } private void testHeader(final String expectedEncoding, final String headerName, final String headerValue) { @@ -72,10 +72,10 @@ testMeta(null, "<meta a='b'"); testMeta(null, "<meta a='b' c=d e=\"f\"/>"); testMeta(null, "<meta a='b' c=d e=\"f\" content='text/html; charset=blah' />"); - testMeta("utf-8", "<meta a='b' c=d e=\"f\" content='text/html; charset=utf-8' />"); - testMeta("utf-8", "abc <meta http-equiv='Content-Type' content='text/html; charset=utf-8'/>"); - testMeta("utf-8", "abc <meta http-equiv='Content-Type' content='text/html; CHARSET=UTF-8'/>"); - testMeta("utf-8", "abc <meta http-equiv='Content-Type' content='text/html; chArsEt=UtF-8'/>"); + testMeta("UTF-8", "<meta a='b' c=d e=\"f\" content='text/html; charset=utf-8' />"); + testMeta("UTF-8", "abc <meta http-equiv='Content-Type' content='text/html; charset=utf-8'/>"); + testMeta("UTF-8", "abc <meta http-equiv='Content-Type' content='text/html; CHARSET=UTF-8'/>"); + testMeta("UTF-8", "abc <meta http-equiv='Content-Type' content='text/html; chArsEt=UtF-8'/>"); } private void testMeta(final String expectedEncoding, final String content) { |