From: <rb...@us...> - 2014-01-31 21:55:19
|
Revision: 9095 http://sourceforge.net/p/htmlunit/code/9095 Author: rbri Date: 2014-01-31 21:55:16 +0000 (Fri, 31 Jan 2014) Log Message: ----------- HtmlParser fixed for pages with scripts outside the html tag Modified Paths: -------------- trunk/htmlunit/src/changes/changes.xml trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/html/HTMLParser.java trunk/htmlunit/src/test/java/com/gargoylesoftware/htmlunit/html/HTMLParser4Test.java trunk/htmlunit/src/test/java/com/gargoylesoftware/htmlunit/html/MalformedHtmlTest.java Modified: trunk/htmlunit/src/changes/changes.xml =================================================================== --- trunk/htmlunit/src/changes/changes.xml 2014-01-31 15:07:40 UTC (rev 9094) +++ trunk/htmlunit/src/changes/changes.xml 2014-01-31 21:55:16 UTC (rev 9095) @@ -8,6 +8,9 @@ <body> <release version="2.14" date="???" description="FF24, Bugfixes, initial work on IE11"> + <action type="fix" dev="rbri"> + HtmlParser fixed for pages with scripts outside the html tag. + </action> <action type="add" dev="asashour" issue="215" system="features"> HtmlFileInput: ability to upload multiple files using setValueAttribute(String []). </action> Modified: trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/html/HTMLParser.java =================================================================== --- trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/html/HTMLParser.java 2014-01-31 15:07:40 UTC (rev 9094) +++ trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/html/HTMLParser.java 2014-01-31 21:55:16 UTC (rev 9095) @@ -382,6 +382,9 @@ */ static final class HtmlUnitDOMBuilder extends AbstractSAXParser implements ContentHandler, LexicalHandler, HTMLTagBalancingListener { + + private enum HeadParsed { YES, SYNTESIZED, NO }; + private final HtmlPage page_; private Locator locator_; @@ -389,7 +392,7 @@ private DomNode currentNode_; private StringBuilder characters_; - private boolean headParsed_ = false; + private HeadParsed headParsed_ = HeadParsed.NO; private boolean parsingInnerHead_ = false; private HtmlElement head_; private HtmlElement body_; @@ -498,6 +501,15 @@ public void startDocument() throws SAXException { } + /** {@inheritDoc} */ + @Override + public void startElement(final QName element, final XMLAttributes attributes, final Augmentations augs) + throws XNIException { + // just to have local access to the augmentations. A better way? + augmentations_ = augs; + super.startElement(element, attributes, augs); + } + /** {@inheritDoc ContentHandler#startElement(String,String,String,Attributes)} */ public void startElement( String namespaceURI, final String localName, @@ -519,18 +531,19 @@ namespaceURI = namespaceURI.trim(); } if ("head".equals(tagLower)) { - if (headParsed_ || page_.isParsingHtmlSnippet()) { + if (headParsed_ == HeadParsed.YES || page_.isParsingHtmlSnippet()) { parsingInnerHead_ = true; return; } - headParsed_ = true; + + headParsed_ = isSynthesized(augmentations_) ? HeadParsed.SYNTESIZED : HeadParsed.YES; } // add a head if none was there - else if (!headParsed_ && ("body".equals(tagLower) || "frameset".equals(tagLower))) { + else if (headParsed_ == HeadParsed.NO && ("body".equals(tagLower) || "frameset".equals(tagLower))) { final ElementFactory factory = getElementFactory(page_, namespaceURI, "head"); final DomElement newElement = factory.createElement(page_, "head", null); currentNode_.appendChild(newElement); - headParsed_ = true; + headParsed_ = HeadParsed.SYNTESIZED; } // If we're adding a body element, keep track of any temporary synthetic ones @@ -898,7 +911,7 @@ } } - if (headParsed_ && "head".equalsIgnoreCase(elem.localpart)) { + if (headParsed_ == HeadParsed.YES && "head".equalsIgnoreCase(elem.localpart)) { parsingInnerHead_ = true; } } @@ -917,6 +930,12 @@ page_.setBuilder(oldBuilder); } } + + private boolean isSynthesized(final Augmentations augs) { + final HTMLEventInfo info = (augs == null) ? null + : (HTMLEventInfo) augs.getItem(FEATURE_AUGMENTATIONS); + return info != null ? info.isSynthesized() : false; + } } } Modified: trunk/htmlunit/src/test/java/com/gargoylesoftware/htmlunit/html/HTMLParser4Test.java =================================================================== --- trunk/htmlunit/src/test/java/com/gargoylesoftware/htmlunit/html/HTMLParser4Test.java 2014-01-31 15:07:40 UTC (rev 9094) +++ trunk/htmlunit/src/test/java/com/gargoylesoftware/htmlunit/html/HTMLParser4Test.java 2014-01-31 21:55:16 UTC (rev 9095) @@ -17,6 +17,7 @@ import static com.gargoylesoftware.htmlunit.BrowserRunner.Browser.CHROME; import static com.gargoylesoftware.htmlunit.BrowserRunner.Browser.FF; import static com.gargoylesoftware.htmlunit.BrowserRunner.Browser.IE11; +import static com.gargoylesoftware.htmlunit.BrowserRunner.Browser.IE8; import org.junit.Test; import org.junit.runner.RunWith; @@ -90,6 +91,97 @@ } /** + * @throws Exception failure + */ + @Test + @Alerts(DEFAULT = { "4", "[object HTMLScriptElement]", "[object Text]", + "[object HTMLTitleElement]", "[object Text]" }, + IE8 = { "2", "[object HTMLTitleElement]", "[object HTMLScriptElement]", "undefined", "undefined" }) + @NotYetImplemented(IE8) + public void badlyFormedHTML_scriptBeforeHead() throws Exception { + final String html = HtmlPageTest.STANDARDS_MODE_PREFIX_ + + "<script>var i=7;</script>\n" + + "<html>\n" + + " <head>\n" + + " <title>first</title>\n" + + " </head>\n" + + " <body>\n" + + " <script>\n" + + " var headchilds = document.getElementsByTagName('head')[0].childNodes;\n" + + " alert(headchilds.length);\n" + + " alert(headchilds[0]);\n" + + " alert(headchilds[1]);\n" + + " alert(headchilds[2]);\n" + + " alert(headchilds[3]);\n" + + " </script>\n" + + " </body>\n" + + "</html>"; + + loadPageWithAlerts2(html); + } + + /** + * @throws Exception failure + */ + @Test + @Alerts(DEFAULT = { "4", "[object HTMLScriptElement]", "[object Text]", + "[object HTMLTitleElement]", "[object Text]" }, + IE8 = { "3", "[object]", "[object]", "[object]", "undefined" }) + @NotYetImplemented(IE8) + public void badlyFormedHTML_scriptBeforeDoctype() throws Exception { + final String html = "<script>var i=7;</script>\n" + + HtmlPageTest.STANDARDS_MODE_PREFIX_ + + "<html>\n" + + " <head>\n" + + " <title>first</title>\n" + + " </head>\n" + + " <body>\n" + + " <script>\n" + + " var headchilds = document.getElementsByTagName('head')[0].childNodes;\n" + + " alert(headchilds.length);\n" + + " alert(headchilds[0]);\n" + + " alert(headchilds[1]);\n" + + " alert(headchilds[2]);\n" + + " alert(headchilds[3]);\n" + + " </script>\n" + + " </body>\n" + + "</html>"; + + loadPageWithAlerts2(html); + } + + /** + * @throws Exception failure + */ + @Test + @Alerts(DEFAULT = { "4", "[object HTMLParagraphElement]", "[object Text]", + "[object HTMLScriptElement]", "[object Text]" }, + IE8 = { "3", "[object]", "[object]", "[object]", "undefined" }) + @NotYetImplemented + public void badlyFormedHTML_scriptAfterHtml() throws Exception { + final String html = HtmlPageTest.STANDARDS_MODE_PREFIX_ + + "<html>\n" + + " <head>\n" + + " <title>first</title>\n" + + " <script>\n" + + " function test(){\n" + + " var headchilds = document.getElementsByTagName('body')[0].childNodes;\n" + + " alert(headchilds.length);\n" + + " alert(headchilds[0]);\n" + + " alert(headchilds[1]);\n" + + " alert(headchilds[2]);\n" + + " alert(headchilds[3]);\n" + + " }\n" + + " </script>\n" + + " </head>\n" + + " <body onload='test()'><p>HtmlUnit</p></body>\n" + + "</html>" + + "<script>var i=7;</script>\n"; + + loadPageWithAlerts2(html); + } + + /** * Test when an illegal tag is found in head as some websites do. * @throws Exception failure */ Modified: trunk/htmlunit/src/test/java/com/gargoylesoftware/htmlunit/html/MalformedHtmlTest.java =================================================================== --- trunk/htmlunit/src/test/java/com/gargoylesoftware/htmlunit/html/MalformedHtmlTest.java 2014-01-31 15:07:40 UTC (rev 9094) +++ trunk/htmlunit/src/test/java/com/gargoylesoftware/htmlunit/html/MalformedHtmlTest.java 2014-01-31 21:55:16 UTC (rev 9095) @@ -162,8 +162,6 @@ */ @Test @Alerts("12345") - @NotYetImplemented(IE8) - // Our HTML fixed done during parsing create an HTML different from the IE8 one public void testWrongHtml_TagBeforeHtml() throws Exception { final String html = "<div>\n" + "<html>\n" |