From: <mgu...@us...> - 2009-02-09 13:11:42
|
Revision: 222 http://nekohtml.svn.sourceforge.net/nekohtml/?rev=222&view=rev Author: mguillem Date: 2009-02-09 13:11:19 +0000 (Mon, 09 Feb 2009) Log Message: ----------- Add HEAD and BODY when it is missing Issue #1898038 Modified Paths: -------------- trunk/data/canonical/test-augmentations-following-cdata.html trunk/data/canonical/test-entities.html trunk/data/canonical/test-newline-in-attribute-crlf.html trunk/data/canonical/test-newline-in-attribute-lf.html trunk/data/canonical/test-newline-in-pi-crlf.html trunk/data/canonical/test-newline-in-pi-lf.html trunk/data/canonical/test-xmldec-malformed.html trunk/data/canonical/test006.html trunk/data/canonical/test021.html trunk/data/canonical/test022.html trunk/data/canonical/test023.html trunk/data/canonical/test025.html trunk/data/canonical/test026.html trunk/data/canonical/test027.html trunk/data/canonical/test028.html trunk/data/canonical/test029.html trunk/data/canonical/test044.html trunk/data/canonical/test045.html trunk/data/canonical/test046.html trunk/data/canonical/test047.html trunk/data/canonical/test049.html trunk/data/canonical/test065.html trunk/data/canonical/test066.html trunk/data/canonical/test067.html trunk/data/canonical/test068.html trunk/data/canonical/test069.html trunk/data/canonical/test070.html trunk/data/canonical/test071.html trunk/data/canonical/test078.html trunk/data/canonical/test085.html trunk/data/canonical/test086.html trunk/data/canonical/test087.html trunk/data/canonical/test088.html trunk/data/canonical/test089.html trunk/data/canonical/test090.html trunk/data/canonical/test098.html trunk/data/canonical/test101.html trunk/doc/changes.html trunk/src/org/cyberneko/html/HTMLTagBalancer.java Added Paths: ----------- trunk/data/canonical/test-empty.html trunk/data/canonical/test-only-text.html trunk/data/test-empty.html trunk/data/test-only-text.html Removed Paths: ------------- trunk/data/canonical/test000.html trunk/data/canonical/test001.html trunk/data/test000.html trunk/data/test001.html Modified: trunk/data/canonical/test-augmentations-following-cdata.html =================================================================== --- trunk/data/canonical/test-augmentations-following-cdata.html 2009-02-09 12:41:57 UTC (rev 221) +++ trunk/data/canonical/test-augmentations-following-cdata.html 2009-02-09 13:11:19 UTC (rev 222) @@ -1,4 +1,6 @@ [synth](HTML +[synth](HEAD +[synth])HEAD [synth](BODY [1,1,0;1,13,12]"Text before [1,13,12;1,39,38]#[CDATA[ text in CDATA]] Copied: trunk/data/canonical/test-empty.html (from rev 219, trunk/data/canonical/test000.html) =================================================================== --- trunk/data/canonical/test-empty.html (rev 0) +++ trunk/data/canonical/test-empty.html 2009-02-09 13:11:19 UTC (rev 222) @@ -0,0 +1,6 @@ +(HTML +(HEAD +)HEAD +(BODY +)BODY +)HTML \ No newline at end of file Modified: trunk/data/canonical/test-entities.html =================================================================== --- trunk/data/canonical/test-entities.html 2009-02-09 12:41:57 UTC (rev 221) +++ trunk/data/canonical/test-entities.html 2009-02-09 13:11:19 UTC (rev 222) @@ -1,4 +1,6 @@ (HTML +(HEAD +)HEAD (BODY "&unknown1; & &unknown2; &unknown3; (BR Modified: trunk/data/canonical/test-newline-in-attribute-crlf.html =================================================================== --- trunk/data/canonical/test-newline-in-attribute-crlf.html 2009-02-09 12:41:57 UTC (rev 221) +++ trunk/data/canonical/test-newline-in-attribute-crlf.html 2009-02-09 13:11:19 UTC (rev 222) @@ -1,4 +1,6 @@ [synth](HTML +[synth](HEAD +[synth])HEAD [synth](BODY [1,1,0;1,11,10]"some text [1,11,10;2,23,52](SPAN Modified: trunk/data/canonical/test-newline-in-attribute-lf.html =================================================================== --- trunk/data/canonical/test-newline-in-attribute-lf.html 2009-02-09 12:41:57 UTC (rev 221) +++ trunk/data/canonical/test-newline-in-attribute-lf.html 2009-02-09 13:11:19 UTC (rev 222) @@ -1,4 +1,6 @@ [synth](HTML +[synth](HEAD +[synth])HEAD [synth](BODY [1,1,0;1,11,10]"some text [1,11,10;2,23,51](SPAN Modified: trunk/data/canonical/test-newline-in-pi-crlf.html =================================================================== --- trunk/data/canonical/test-newline-in-pi-crlf.html 2009-02-09 12:41:57 UTC (rev 221) +++ trunk/data/canonical/test-newline-in-pi-crlf.html 2009-02-09 13:11:19 UTC (rev 222) @@ -1,4 +1,6 @@ [synth](HTML +[synth](HEAD +[synth])HEAD [synth](BODY [1,1,0;1,11,10]"some text [1,11,10;2,23,63]?instruct beforenl="content"\n afternl="content" Modified: trunk/data/canonical/test-newline-in-pi-lf.html =================================================================== --- trunk/data/canonical/test-newline-in-pi-lf.html 2009-02-09 12:41:57 UTC (rev 221) +++ trunk/data/canonical/test-newline-in-pi-lf.html 2009-02-09 13:11:19 UTC (rev 222) @@ -1,4 +1,6 @@ [synth](HTML +[synth](HEAD +[synth])HEAD [synth](BODY [1,1,0;1,11,10]"some text [1,11,10;2,23,62]?instruct beforenl="content"\n afternl="content" Copied: trunk/data/canonical/test-only-text.html (from rev 219, trunk/data/canonical/test001.html) =================================================================== --- trunk/data/canonical/test-only-text.html (rev 0) +++ trunk/data/canonical/test-only-text.html 2009-02-09 13:11:19 UTC (rev 222) @@ -0,0 +1,7 @@ +(HTML +(HEAD +)HEAD +(BODY +"Just text. +)BODY +)HTML \ No newline at end of file Modified: trunk/data/canonical/test-xmldec-malformed.html =================================================================== --- trunk/data/canonical/test-xmldec-malformed.html 2009-02-09 12:41:57 UTC (rev 221) +++ trunk/data/canonical/test-xmldec-malformed.html 2009-02-09 13:11:19 UTC (rev 222) @@ -1,4 +1,6 @@ (HTML +(HEAD +)HEAD (BODY "\n\nhello )BODY Deleted: trunk/data/canonical/test000.html =================================================================== --- trunk/data/canonical/test000.html 2009-02-09 12:41:57 UTC (rev 221) +++ trunk/data/canonical/test000.html 2009-02-09 13:11:19 UTC (rev 222) @@ -1,2 +0,0 @@ -(HTML -)HTML \ No newline at end of file Deleted: trunk/data/canonical/test001.html =================================================================== --- trunk/data/canonical/test001.html 2009-02-09 12:41:57 UTC (rev 221) +++ trunk/data/canonical/test001.html 2009-02-09 13:11:19 UTC (rev 222) @@ -1,5 +0,0 @@ -(HTML -(BODY -"Just text. -)BODY -)HTML \ No newline at end of file Modified: trunk/data/canonical/test006.html =================================================================== --- trunk/data/canonical/test006.html 2009-02-09 12:41:57 UTC (rev 221) +++ trunk/data/canonical/test006.html 2009-02-09 13:11:19 UTC (rev 222) @@ -1,4 +1,6 @@ (HTML +(HEAD +)HEAD (BODY "This (I Modified: trunk/data/canonical/test021.html =================================================================== --- trunk/data/canonical/test021.html 2009-02-09 12:41:57 UTC (rev 221) +++ trunk/data/canonical/test021.html 2009-02-09 13:11:19 UTC (rev 222) @@ -1,4 +1,6 @@ (HTML +(HEAD +)HEAD (BODY "M & M )BODY Modified: trunk/data/canonical/test022.html =================================================================== --- trunk/data/canonical/test022.html 2009-02-09 12:41:57 UTC (rev 221) +++ trunk/data/canonical/test022.html 2009-02-09 13:11:19 UTC (rev 222) @@ -1,4 +1,6 @@ (HTML +(HEAD +)HEAD (BODY "&foo; )BODY Modified: trunk/data/canonical/test023.html =================================================================== --- trunk/data/canonical/test023.html 2009-02-09 12:41:57 UTC (rev 221) +++ trunk/data/canonical/test023.html 2009-02-09 13:11:19 UTC (rev 222) @@ -1,4 +1,6 @@ (HTML +(HEAD +)HEAD (BODY "< )BODY Modified: trunk/data/canonical/test025.html =================================================================== --- trunk/data/canonical/test025.html 2009-02-09 12:41:57 UTC (rev 221) +++ trunk/data/canonical/test025.html 2009-02-09 13:11:19 UTC (rev 222) @@ -1,2 +1,6 @@ (HTML +(HEAD +)HEAD +(BODY +)BODY )HTML \ No newline at end of file Modified: trunk/data/canonical/test026.html =================================================================== --- trunk/data/canonical/test026.html 2009-02-09 12:41:57 UTC (rev 221) +++ trunk/data/canonical/test026.html 2009-02-09 13:11:19 UTC (rev 222) @@ -1,2 +1,6 @@ (HTML +(HEAD +)HEAD +(BODY +)BODY )HTML \ No newline at end of file Modified: trunk/data/canonical/test027.html =================================================================== --- trunk/data/canonical/test027.html 2009-02-09 12:41:57 UTC (rev 221) +++ trunk/data/canonical/test027.html 2009-02-09 13:11:19 UTC (rev 222) @@ -1,2 +1,6 @@ (HTML +(HEAD +)HEAD +(BODY +)BODY )HTML \ No newline at end of file Modified: trunk/data/canonical/test028.html =================================================================== --- trunk/data/canonical/test028.html 2009-02-09 12:41:57 UTC (rev 221) +++ trunk/data/canonical/test028.html 2009-02-09 13:11:19 UTC (rev 222) @@ -1,4 +1,6 @@ (HTML +(HEAD +)HEAD (BODY "< = )BODY Modified: trunk/data/canonical/test029.html =================================================================== --- trunk/data/canonical/test029.html 2009-02-09 12:41:57 UTC (rev 221) +++ trunk/data/canonical/test029.html 2009-02-09 13:11:19 UTC (rev 222) @@ -1,4 +1,6 @@ (HTML +(HEAD +)HEAD (BODY "&#foo; )BODY Modified: trunk/data/canonical/test044.html =================================================================== --- trunk/data/canonical/test044.html 2009-02-09 12:41:57 UTC (rev 221) +++ trunk/data/canonical/test044.html 2009-02-09 13:11:19 UTC (rev 222) @@ -1,3 +1,7 @@ ?target (HTML +(HEAD +)HEAD +(BODY +)BODY )HTML \ No newline at end of file Modified: trunk/data/canonical/test045.html =================================================================== --- trunk/data/canonical/test045.html 2009-02-09 12:41:57 UTC (rev 221) +++ trunk/data/canonical/test045.html 2009-02-09 13:11:19 UTC (rev 222) @@ -1,3 +1,7 @@ ?target data (HTML +(HEAD +)HEAD +(BODY +)BODY )HTML \ No newline at end of file Modified: trunk/data/canonical/test046.html =================================================================== --- trunk/data/canonical/test046.html 2009-02-09 12:41:57 UTC (rev 221) +++ trunk/data/canonical/test046.html 2009-02-09 13:11:19 UTC (rev 222) @@ -1,3 +1,7 @@ ?target data\t (HTML +(HEAD +)HEAD +(BODY +)BODY )HTML \ No newline at end of file Modified: trunk/data/canonical/test047.html =================================================================== --- trunk/data/canonical/test047.html 2009-02-09 12:41:57 UTC (rev 221) +++ trunk/data/canonical/test047.html 2009-02-09 13:11:19 UTC (rev 222) @@ -1,3 +1,7 @@ ?php print "Hello, World.\\n";\n (HTML +(HEAD +)HEAD +(BODY +)BODY )HTML \ No newline at end of file Modified: trunk/data/canonical/test049.html =================================================================== --- trunk/data/canonical/test049.html 2009-02-09 12:41:57 UTC (rev 221) +++ trunk/data/canonical/test049.html 2009-02-09 13:11:19 UTC (rev 222) @@ -1,3 +1,7 @@ ! (HTML +(HEAD +)HEAD +(BODY +)BODY )HTML \ No newline at end of file Modified: trunk/data/canonical/test065.html =================================================================== --- trunk/data/canonical/test065.html 2009-02-09 12:41:57 UTC (rev 221) +++ trunk/data/canonical/test065.html 2009-02-09 13:11:19 UTC (rev 222) @@ -1,3 +1,7 @@ ! (HTML +(HEAD +)HEAD +(BODY +)BODY )HTML \ No newline at end of file Modified: trunk/data/canonical/test066.html =================================================================== --- trunk/data/canonical/test066.html 2009-02-09 12:41:57 UTC (rev 221) +++ trunk/data/canonical/test066.html 2009-02-09 13:11:19 UTC (rev 222) @@ -1,3 +1,7 @@ !HTML (HTML +(HEAD +)HEAD +(BODY +)BODY )HTML \ No newline at end of file Modified: trunk/data/canonical/test067.html =================================================================== --- trunk/data/canonical/test067.html 2009-02-09 12:41:57 UTC (rev 221) +++ trunk/data/canonical/test067.html 2009-02-09 13:11:19 UTC (rev 222) @@ -1,4 +1,8 @@ !HTML ppublic_id (HTML +(HEAD +)HEAD +(BODY +)BODY )HTML \ No newline at end of file Modified: trunk/data/canonical/test068.html =================================================================== --- trunk/data/canonical/test068.html 2009-02-09 12:41:57 UTC (rev 221) +++ trunk/data/canonical/test068.html 2009-02-09 13:11:19 UTC (rev 222) @@ -1,5 +1,9 @@ !HTML ppublic_id ssystem_id -(HTML +(HTML +(HEAD +)HEAD +(BODY +)BODY )HTML \ No newline at end of file Modified: trunk/data/canonical/test069.html =================================================================== --- trunk/data/canonical/test069.html 2009-02-09 12:41:57 UTC (rev 221) +++ trunk/data/canonical/test069.html 2009-02-09 13:11:19 UTC (rev 222) @@ -2,4 +2,8 @@ ppublic_id ssystem_id (HTML +(HEAD +)HEAD +(BODY +)BODY )HTML \ No newline at end of file Modified: trunk/data/canonical/test070.html =================================================================== --- trunk/data/canonical/test070.html 2009-02-09 12:41:57 UTC (rev 221) +++ trunk/data/canonical/test070.html 2009-02-09 13:11:19 UTC (rev 222) @@ -1,4 +1,8 @@ !HTML ssystem_id (HTML +(HEAD +)HEAD +(BODY +)BODY )HTML \ No newline at end of file Modified: trunk/data/canonical/test071.html =================================================================== --- trunk/data/canonical/test071.html 2009-02-09 12:41:57 UTC (rev 221) +++ trunk/data/canonical/test071.html 2009-02-09 13:11:19 UTC (rev 222) @@ -1,3 +1,7 @@ !ROOT (HTML +(HEAD +)HEAD +(BODY +)BODY )HTML \ No newline at end of file Modified: trunk/data/canonical/test078.html =================================================================== --- trunk/data/canonical/test078.html 2009-02-09 12:41:57 UTC (rev 221) +++ trunk/data/canonical/test078.html 2009-02-09 13:11:19 UTC (rev 222) @@ -1,3 +1,7 @@ #\na\nb\nc\n (HTML +(HEAD +)HEAD +(BODY +)BODY )HTML Modified: trunk/data/canonical/test085.html =================================================================== --- trunk/data/canonical/test085.html 2009-02-09 12:41:57 UTC (rev 221) +++ trunk/data/canonical/test085.html 2009-02-09 13:11:19 UTC (rev 222) @@ -1,4 +1,6 @@ (HTML +(HEAD +)HEAD (BODY "& )BODY Modified: trunk/data/canonical/test086.html =================================================================== --- trunk/data/canonical/test086.html 2009-02-09 12:41:57 UTC (rev 221) +++ trunk/data/canonical/test086.html 2009-02-09 13:11:19 UTC (rev 222) @@ -1,4 +1,6 @@ (HTML +(HEAD +)HEAD (BODY "&#x )BODY Modified: trunk/data/canonical/test087.html =================================================================== --- trunk/data/canonical/test087.html 2009-02-09 12:41:57 UTC (rev 221) +++ trunk/data/canonical/test087.html 2009-02-09 13:11:19 UTC (rev 222) @@ -1,6 +1,8 @@ !HTML p-//W3C//DTD HTML 4.0 Transitional//EN (HTML +(HEAD +)HEAD (BODY "Hello )BODY Modified: trunk/data/canonical/test088.html =================================================================== --- trunk/data/canonical/test088.html 2009-02-09 12:41:57 UTC (rev 221) +++ trunk/data/canonical/test088.html 2009-02-09 13:11:19 UTC (rev 222) @@ -1,4 +1,6 @@ (HTML +(HEAD +)HEAD (BODY "Hello (P Modified: trunk/data/canonical/test089.html =================================================================== --- trunk/data/canonical/test089.html 2009-02-09 12:41:57 UTC (rev 221) +++ trunk/data/canonical/test089.html 2009-02-09 13:11:19 UTC (rev 222) @@ -1,4 +1,6 @@ (HTML +(HEAD +)HEAD (BODY "& )BODY Modified: trunk/data/canonical/test090.html =================================================================== --- trunk/data/canonical/test090.html 2009-02-09 12:41:57 UTC (rev 221) +++ trunk/data/canonical/test090.html 2009-02-09 13:11:19 UTC (rev 222) @@ -1,3 +1,7 @@ xversion 1.0 (HTML +(HEAD +)HEAD +(BODY +)BODY )HTML \ No newline at end of file Modified: trunk/data/canonical/test098.html =================================================================== --- trunk/data/canonical/test098.html 2009-02-09 12:41:57 UTC (rev 221) +++ trunk/data/canonical/test098.html 2009-02-09 13:11:19 UTC (rev 222) @@ -1,2 +1,6 @@ (HTML +(HEAD +)HEAD +(BODY +)BODY )HTML \ No newline at end of file Modified: trunk/data/canonical/test101.html =================================================================== --- trunk/data/canonical/test101.html 2009-02-09 12:41:57 UTC (rev 221) +++ trunk/data/canonical/test101.html 2009-02-09 13:11:19 UTC (rev 222) @@ -1,4 +1,8 @@ ?base http://foo.bar ?tags :noads: (HTML +(HEAD +)HEAD +(BODY +)BODY )HTML \ No newline at end of file Copied: trunk/data/test-only-text.html (from rev 219, trunk/data/test001.html) =================================================================== --- trunk/data/test-only-text.html (rev 0) +++ trunk/data/test-only-text.html 2009-02-09 13:11:19 UTC (rev 222) @@ -0,0 +1 @@ +Just text. \ No newline at end of file Deleted: trunk/data/test001.html =================================================================== --- trunk/data/test001.html 2009-02-09 12:41:57 UTC (rev 221) +++ trunk/data/test001.html 2009-02-09 13:11:19 UTC (rev 222) @@ -1 +0,0 @@ -Just text. \ No newline at end of file Modified: trunk/doc/changes.html =================================================================== --- trunk/doc/changes.html 2009-02-09 12:41:57 UTC (rev 221) +++ trunk/doc/changes.html 2009-02-09 13:11:19 UTC (rev 222) @@ -29,7 +29,8 @@ when closing comment --> is missing, comment ends with > (patch provided by Tatsuhiko Miyabe, #2552096), don't treat tags with non-HTML namespace like HTML tags (patch provided by Tatsuhiko Miyabe, #2551958), force creation of BODY rather than of HEAD for unknown tags, - fix incorrect HTMLEventInfo augmentations for script content (patch provided by Louis Ryan, #2236681) + fix incorrect HTMLEventInfo augmentations for script content (patch provided by Louis Ryan, #2236681), + add HEAD and BODY tags when missing (#1898038) <dt>Version 1.9.11 (29 Dec 2008) Modified: trunk/src/org/cyberneko/html/HTMLTagBalancer.java =================================================================== --- trunk/src/org/cyberneko/html/HTMLTagBalancer.java 2009-02-09 12:41:57 UTC (rev 221) +++ trunk/src/org/cyberneko/html/HTMLTagBalancer.java 2009-02-09 13:11:19 UTC (rev 222) @@ -419,11 +419,16 @@ if (fReportErrors) { fErrorReporter.reportError("HTML2000", null); } - String ename = modifyName("html", fNamesElems); - fQName.setValues(null, ename, ename, null); if (fDocumentHandler != null) { - callStartElement(fQName, emptyAttributes(), synthesizedAugs()); + fSeenRootElementEnd = false; + forceStartBody(); // will force <html> and <head></head> + final String body = modifyName("body", fNamesElems); + fQName.setValues(null, body, body, null); callEndElement(fQName, synthesizedAugs()); + + final String ename = modifyName("html", fNamesElems); + fQName.setValues(null, ename, ename, null); + callEndElement(fQName, synthesizedAugs()); } } @@ -678,12 +683,7 @@ } } if (insertBody) { - String ename = modifyName("body", fNamesElems); - fQName.setValues(null, ename, ename, null); - if (fReportErrors) { - fErrorReporter.reportWarning("HTML2006", new Object[]{ename}); - } - startElement(fQName, null, synthesizedAugs()); + forceStartBody(); } } @@ -694,6 +694,26 @@ } // startGeneralEntity(String,XMLResourceIdentifier,String,Augmentations) + /** + * Generates a missing <body> + */ + private void forceStartBody() { + // create <head></head> if none was present + if (!fSeenHeadElement) { + final String tagName = modifyName("head", fNamesElems); + fQName.setValues(null, tagName, tagName, null); + startElement(fQName, null, synthesizedAugs()); + endElement(fQName, synthesizedAugs()); + } + + final String ename = modifyName("body", fNamesElems); + fQName.setValues(null, ename, ename, null); + if (fReportErrors) { + fErrorReporter.reportWarning("HTML2006", new Object[]{ename}); + } + startElement(fQName, null, synthesizedAugs()); + } + /** Text declaration. */ public void textDecl(String version, String encoding, Augmentations augs) throws XNIException { @@ -780,16 +800,11 @@ if (whitespace) { return; } - String ename = modifyName("body", fNamesElems); - fQName.setValues(null, ename, ename, null); - if (fReportErrors) { - fErrorReporter.reportWarning("HTML2006", new Object[]{ename}); - } - startElement(fQName, null, synthesizedAugs()); + forceStartBody(); } // handle character content in head - // NOTE: This fequently happens when the document looks like: + // NOTE: This frequently happens when the document looks like: // <title>Title</title> // And here's some text. else if (!whitespace) { @@ -801,10 +816,7 @@ if (fReportErrors) { fErrorReporter.reportWarning("HTML2009", new Object[]{hname,bname}); } - fQName.setValues(null, hname, hname, null); - endElement(fQName, synthesizedAugs()); - fQName.setValues(null, bname, bname, null); - startElement(fQName, null, synthesizedAugs()); + forceStartBody(); } } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |