[Htmlparser-cvs] htmlparser/src/org/htmlparser/tests/scannersTests ScriptScannerTest.java,1.57,1.58
Brought to you by:
derrickoswald
From: Derrick O. <der...@us...> - 2005-03-07 02:18:57
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv5186/tests/scannersTests Modified Files: ScriptScannerTest.java Log Message: Bug #1104627 Parser Crash reading javascript Bug #1024045 StringBean crashes on an URL Bug #1021925 StyleTag with missing linefeed prevents page from parsing Corrected operation with script and style scanners to recognize the ETAGO when parsing CDATA -- see http://www.w3.org/TR/html4/appendix/notes.html#notes-specifying-data. Original solution to bug #741769 ScriptScanner doesn't handle quoted </script> tags, was erroneous; it should have been recognized as faulty HTML. Several test cases changed to follow this advice: "Authors should therefore escape "</" within the content." Index: ScriptScannerTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests/ScriptScannerTest.java,v retrieving revision 1.57 retrieving revision 1.58 diff -C2 -d -r1.57 -r1.58 *** ScriptScannerTest.java 2 Sep 2004 02:28:07 -0000 1.57 --- ScriptScannerTest.java 7 Mar 2005 02:18:47 -0000 1.58 *************** *** 183,186 **** --- 183,195 ---- * string parser was not moving to the ignore state on encountering double * quotes (only single quotes were previously accepted). + * + * <pre> + * Bug #1104627 Parser Crash reading javascript + * Bug #1024045 StringBean crashes on an URL + * Bug #1021925 StyleTag with missing linefeed prevents page from parsing + * </pre> + * Altered test to correctly escape the ETAGO. + * See http://www.w3.org/TR/html4/appendix/notes.html#notes-specifying-data + * * @throws Exception */ *************** *** 203,207 **** "document.write(\"}\"); " + "// parser thinks this is the end tag.\n" + ! "document.write(\"</script>\");" + "</script>" + "<body>" + --- 212,216 ---- "document.write(\"}\"); " + "// parser thinks this is the end tag.\n" + ! "document.write(\"<\\/script>\");" + "</script>" + "<body>" + *************** *** 226,230 **** "document.write(\"}\"); " + "// parser thinks this is the end tag.\n" + ! "document.write(\"</script>\");", scriptTag.getScriptCode() ); --- 235,239 ---- "document.write(\"}\"); " + "// parser thinks this is the end tag.\n" + ! "document.write(\"<\\/script>\");", scriptTag.getScriptCode() ); *************** *** 232,240 **** } public void testScriptCodeExtraction() throws ParserException { createParser( "<SCRIPT language=JavaScript>" + "document.write(\"<a href=\"1.htm\"><img src=\"1.jpg\" " + ! "width=\"80\" height=\"20\" border=\"0\"></a>\");" + "</SCRIPT>" ); --- 241,260 ---- } + /** + * + * <pre> + * Bug #1104627 Parser Crash reading javascript + * Bug #1024045 StringBean crashes on an URL + * Bug #1021925 StyleTag with missing linefeed prevents page from parsing + * </pre> + * Altered test to correctly escape the ETAGO. + * See http://www.w3.org/TR/html4/appendix/notes.html#notes-specifying-data + * + */ public void testScriptCodeExtraction() throws ParserException { createParser( "<SCRIPT language=JavaScript>" + "document.write(\"<a href=\"1.htm\"><img src=\"1.jpg\" " + ! "width=\"80\" height=\"20\" border=\"0\"><\\/a>\");" + "</SCRIPT>" ); *************** *** 245,258 **** "script code", "document.write(\"<a href=\"1.htm\"><img src=\"1.jpg\" " + ! "width=\"80\" height=\"20\" border=\"0\"></a>\");", scriptTag.getScriptCode() ); } public void testScriptCodeExtractionWithMultipleQuotes() throws ParserException { createParser( "<SCRIPT language=JavaScript>" + "document.write(\"<a href=\\\"1.htm\\\"><img src=\\\"1.jpg\\\" " + ! "width=\\\"80\\\" height=\\\"20\\\" border=\\\"0\\\"></a>\");" + "</SCRIPT>" ); --- 265,289 ---- "script code", "document.write(\"<a href=\"1.htm\"><img src=\"1.jpg\" " + ! "width=\"80\" height=\"20\" border=\"0\"><\\/a>\");", scriptTag.getScriptCode() ); } + /** + * + * <pre> + * Bug #1104627 Parser Crash reading javascript + * Bug #1024045 StringBean crashes on an URL + * Bug #1021925 StyleTag with missing linefeed prevents page from parsing + * </pre> + * Altered test to correctly escape the ETAGO. + * See http://www.w3.org/TR/html4/appendix/notes.html#notes-specifying-data + * + */ public void testScriptCodeExtractionWithMultipleQuotes() throws ParserException { createParser( "<SCRIPT language=JavaScript>" + "document.write(\"<a href=\\\"1.htm\\\"><img src=\\\"1.jpg\\\" " + ! "width=\\\"80\\\" height=\\\"20\\\" border=\\\"0\\\"><\\/a>\");" + "</SCRIPT>" ); *************** *** 263,271 **** "script code", "document.write(\"<a href=\\\"1.htm\\\"><img src=\\\"1.jpg\\\" " + ! "width=\\\"80\\\" height=\\\"20\\\" border=\\\"0\\\"></a>\");", scriptTag.getScriptCode() ); } public void testScriptWithinComments() throws Exception { createParser( --- 294,313 ---- "script code", "document.write(\"<a href=\\\"1.htm\\\"><img src=\\\"1.jpg\\\" " + ! "width=\\\"80\\\" height=\\\"20\\\" border=\\\"0\\\"><\\/a>\");", scriptTag.getScriptCode() ); } + /** + * + * <pre> + * Bug #1104627 Parser Crash reading javascript + * Bug #1024045 StringBean crashes on an URL + * Bug #1021925 StyleTag with missing linefeed prevents page from parsing + * </pre> + * Altered test to correctly escape the ETAGO. + * See http://www.w3.org/TR/html4/appendix/notes.html#notes-specifying-data + * + */ public void testScriptWithinComments() throws Exception { createParser( *************** *** 308,312 **** "else{" + "\n" + ! "menuobj.document.write('<layer name=gui bgColor=#E6E6E6 width=165 onmouseover=\"clearhidemenu()\" onmouseout=\"hidemenu()\">'+which+'</layer>')" + "\n" + "menuobj.document.close()" + --- 350,354 ---- "else{" + "\n" + ! "menuobj.document.write('<layer name=gui bgColor=#E6E6E6 width=165 onmouseover=\"clearhidemenu()\" onmouseout=\"hidemenu()\">'+which+'<\\/layer>')" + "\n" + "menuobj.document.close()" + *************** *** 516,523 **** /** * See bug #741769 ScriptScanner doesn't handle quoted </script> tags */ public void testScanQuotedEndTag() throws ParserException { ! String html = "<SCRIPT language=\"JavaScript\">document.write('</SCRIPT>');</SCRIPT>"; createParser(html); parseAndAssertNodeCount(1); --- 558,574 ---- /** * See bug #741769 ScriptScanner doesn't handle quoted </script> tags + * + * <pre> + * Bug #1104627 Parser Crash reading javascript + * Bug #1024045 StringBean crashes on an URL + * Bug #1021925 StyleTag with missing linefeed prevents page from parsing + * </pre> + * Altered test to correctly escape the ETAGO. + * See http://www.w3.org/TR/html4/appendix/notes.html#notes-specifying-data + * */ public void testScanQuotedEndTag() throws ParserException { ! String html = "<SCRIPT language=\"JavaScript\">document.write('<\\/SCRIPT>');</SCRIPT>"; createParser(html); parseAndAssertNodeCount(1); |