[Htmlparser-cvs] htmlparser/src/org/htmlparser/tests/parserHelperTests RemarkNodeParserTest.java,1.3
Brought to you by:
derrickoswald
From: <der...@us...> - 2003-09-01 19:56:06
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/parserHelperTests In directory sc8-pr-cvs1:/tmp/cvs-serv19769/tests/parserHelperTests Modified Files: RemarkNodeParserTest.java Log Message: Workaround for bug #788746 parser crashes on comments like <!-- foobar --!>. No real solution because the codebase assumes remarks end with -->, so this just avoids the crash, but the toHtml() output will output --!-->, which isn't really correct. Added the test case as RemarkNodeParserTest.testExclamationComment(). Index: RemarkNodeParserTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/parserHelperTests/RemarkNodeParserTest.java,v retrieving revision 1.32 retrieving revision 1.33 diff -C2 -d -r1.32 -r1.33 *** RemarkNodeParserTest.java 24 Aug 2003 21:59:43 -0000 1.32 --- RemarkNodeParserTest.java 1 Sep 2003 19:55:59 -0000 1.33 *************** *** 39,230 **** public class RemarkNodeParserTest extends ParserTestCase { ! public RemarkNodeParserTest(String name) { ! super(name); ! } ! ! /** ! * The bug being reproduced is this : <BR> ! * <!-- saved from url=(0022)http://internet.e-mail --> ! * <HTML> ! * <HEAD><META name="title" content="Training Introduction"> ! * <META name="subject" content=""> ! * <!-- ! Whats gonna happen now ? ! * --> ! * <TEST> ! * </TEST> ! * ! * The above line is incorrectly parsed - the remark is not correctly identified. ! * This bug was reported by Serge Kruppa (2002-Feb-08). ! */ ! public void testRemarkNodeBug() throws ParserException ! { ! createParser( ! "<!-- saved from url=(0022)http://internet.e-mail -->\n"+ ! "<HTML>\n"+ ! "<HEAD><META name=\"title\" content=\"Training Introduction\">\n"+ ! "<META name=\"subject\" content=\"\">\n"+ ! "<!--\n"+ ! " Whats gonna happen now ?\n"+ ! "-->\n"+ ! "<TEST>\n"+ ! "</TEST>\n"); ! Parser.setLineSeparator("\r\n"); ! parseAndAssertNodeCount(8); ! // The first node should be a HTMLRemarkNode ! assertTrue("First node should be a HTMLRemarkNode",node[0] instanceof RemarkNode); ! RemarkNode remarkNode = (RemarkNode)node[0]; ! assertEquals("Text of the remarkNode #1"," saved from url=(0022)http://internet.e-mail ",remarkNode.getText()); ! // The sixth node should be a HTMLRemarkNode ! assertTrue("Sixth node should be a HTMLRemarkNode",node[5] instanceof RemarkNode); ! remarkNode = (RemarkNode)node[5]; ! assertEquals("Text of the remarkNode #6","\r\n Whats gonna happen now ?\r\n",remarkNode.getText()); ! } ! public void testToPlainTextString() throws ParserException { ! createParser( ! "<!-- saved from url=(0022)http://internet.e-mail -->\n"+ ! "<HTML>\n"+ ! "<HEAD><META name=\"title\" content=\"Training Introduction\">\n"+ ! "<META name=\"subject\" content=\"\">\n"+ ! "<!--\n"+ ! " Whats gonna happen now ?\n"+ ! "-->\n"+ ! "<TEST>\n"+ ! "</TEST>\n"); ! Parser.setLineSeparator("\r\n"); ! parseAndAssertNodeCount(8); ! // The first node should be a HTMLRemarkNode ! assertTrue("First node should be a HTMLRemarkNode",node[0] instanceof RemarkNode); ! RemarkNode remarkNode = (RemarkNode)node[0]; ! assertEquals("Plain Text of the remarkNode #1"," saved from url=(0022)http://internet.e-mail ",remarkNode.toPlainTextString()); ! // The sixth node should be a HTMLRemarkNode ! assertTrue("Sixth node should be a HTMLRemarkNode",node[5] instanceof RemarkNode); ! remarkNode = (RemarkNode)node[5]; ! assertEquals("Plain Text of the remarkNode #6","\r\n Whats gonna happen now ?\r\n",remarkNode.getText()); ! ! } ! public void testToRawString() throws ParserException { ! createParser( ! "<!-- saved from url=(0022)http://internet.e-mail -->\n"+ ! "<HTML>\n"+ ! "<HEAD><META name=\"title\" content=\"Training Introduction\">\n"+ ! "<META name=\"subject\" content=\"\">\n"+ ! "<!--\n"+ ! " Whats gonna happen now ?\n"+ ! "-->\n"+ ! "<TEST>\n"+ ! "</TEST>\n"); ! Parser.setLineSeparator("\r\n"); ! parseAndAssertNodeCount(8); ! // The first node should be a HTMLRemarkNode ! assertTrue("First node should be a HTMLRemarkNode",node[0] instanceof RemarkNode); ! RemarkNode remarkNode = (RemarkNode)node[0]; ! assertStringEquals("Raw String of the remarkNode #1","<!-- saved from url=(0022)http://internet.e-mail -->",remarkNode.toHtml()); ! // The sixth node should be a HTMLRemarkNode ! assertTrue("Sixth node should be a HTMLRemarkNode",node[5] instanceof RemarkNode); ! remarkNode = (RemarkNode)node[5]; ! assertStringEquals("Raw String of the remarkNode #6","<!--\r\n Whats gonna happen now ?\r\n-->",remarkNode.toHtml()); ! } ! ! public void testNonRemarkNode() throws ParserException { ! createParser(" <![endif]>"); ! parseAndAssertNodeCount(2); ! // The first node should be a HTMLRemarkNode ! assertTrue("First node should be a string node",node[0] instanceof StringNode); ! assertTrue("Second node should be a Tag",node[1] instanceof Tag); ! StringNode stringNode = (StringNode)node[0]; ! Tag tag = (Tag)node[1]; ! assertEquals("Text contents"," ",stringNode.getText()); ! assertEquals("Tag Contents","![endif]",tag.getText()); ! ! } ! ! /** ! * This is the simulation of bug report 586756, submitted ! * by John Zook. ! * If all the comment contains is a blank line, it breaks ! * the state ! */ ! public void testRemarkNodeWithBlankLine() throws ParserException { ! createParser("<!--\n"+ ! "\n"+ ! "-->"); ! Parser.setLineSeparator("\r\n"); ! parseAndAssertNodeCount(1); ! assertTrue("Node should be a HTMLRemarkNode",node[0] instanceof RemarkNode); ! RemarkNode remarkNode = (RemarkNode)node[0]; ! assertEquals("Expected contents","\r\n",remarkNode.getText()); ! ! } ! ! /** ! * This is the simulation of a bug report submitted ! * by Claude Duguay. ! * If it is a comment with nothing in it, parser crashes ! */ ! public void testRemarkNodeWithNothing() throws ParserException { ! createParser("<!-->"); ! parseAndAssertNodeCount(1); ! assertTrue("Node should be a HTMLRemarkNode",node[0] instanceof RemarkNode); ! RemarkNode remarkNode = (RemarkNode)node[0]; ! assertEquals("Expected contents","",remarkNode.getText()); ! ! } ! ! /** ! * Reproduction of bug reported by John Zook [594301] ! * When we have tags like : ! * <!-- <A> --> ! * it doesent get parsed correctly ! */ ! public void testTagWithinRemarkNode() throws ParserException { ! createParser("<!-- \n"+ ! "<A>\n"+ ! "bcd -->"); ! Parser.setLineSeparator("\n"); ! parseAndAssertNodeCount(1); ! assertTrue("Node should be a HTMLRemarkNode",node[0] instanceof RemarkNode); ! RemarkNode remarkNode = (RemarkNode)node[0]; ! assertStringEquals("Expected contents"," \n<A>\nbcd ",remarkNode.getText()); ! ! } ! ! /** ! * Bug reported by John Zook [594301], invalid remark nodes are accepted as remark nodes. ! * <<br> ! * -<br> ! * -<br> ! * ssd --><br> ! * This is not supposed to be a remarknode ! */ ! public void testInvalidTag() throws ParserException { ! createParser("<!\n"+ ! "-\n"+ ! "-\n"+ ! "ssd -->"); ! Parser.setLineSeparator("\n"); ! parseAndAssertNodeCount(1); ! assertTrue("Node should be a Tag but was "+node[0],node[0] instanceof Tag); ! Tag tag = (Tag)node[0]; ! assertStringEquals("Expected contents","!\n"+ ! "-\n"+ ! "-\n"+ ! "ssd --",tag.getText()); ! Parser.setLineSeparator("\r\n"); ! } ! ! /** ! * Bug reported by John Zook [594301] ! * If dashes exist in a comment, they dont get added to the comment text ! */ ! public void testDashesInComment() throws ParserException{ ! createParser("<!-- -- -->"); ! parseAndAssertNodeCount(1); ! assertTrue("Node should be a HTMLRemarkNode but was "+node[0],node[0] instanceof RemarkNode); ! RemarkNode remarkNode = (RemarkNode)node[0]; ! assertEquals("Remark Node contents"," -- ",remarkNode.getText()); ! } --- 39,230 ---- public class RemarkNodeParserTest extends ParserTestCase { ! public RemarkNodeParserTest(String name) { ! super(name); ! } ! ! /** ! * The bug being reproduced is this : <BR> ! * <!-- saved from url=(0022)http://internet.e-mail --> ! * <HTML> ! * <HEAD><META name="title" content="Training Introduction"> ! * <META name="subject" content=""> ! * <!-- ! Whats gonna happen now ? ! * --> ! * <TEST> ! * </TEST> ! * ! * The above line is incorrectly parsed - the remark is not correctly identified. ! * This bug was reported by Serge Kruppa (2002-Feb-08). ! */ ! public void testRemarkNodeBug() throws ParserException ! { ! createParser( ! "<!-- saved from url=(0022)http://internet.e-mail -->\n"+ ! "<HTML>\n"+ ! "<HEAD><META name=\"title\" content=\"Training Introduction\">\n"+ ! "<META name=\"subject\" content=\"\">\n"+ ! "<!--\n"+ ! " Whats gonna happen now ?\n"+ ! "-->\n"+ ! "<TEST>\n"+ ! "</TEST>\n"); ! Parser.setLineSeparator("\r\n"); ! parseAndAssertNodeCount(8); ! // The first node should be a HTMLRemarkNode ! assertTrue("First node should be a HTMLRemarkNode",node[0] instanceof RemarkNode); ! RemarkNode remarkNode = (RemarkNode)node[0]; ! assertEquals("Text of the remarkNode #1"," saved from url=(0022)http://internet.e-mail ",remarkNode.getText()); ! // The sixth node should be a HTMLRemarkNode ! assertTrue("Sixth node should be a HTMLRemarkNode",node[5] instanceof RemarkNode); ! remarkNode = (RemarkNode)node[5]; ! assertEquals("Text of the remarkNode #6","\r\n Whats gonna happen now ?\r\n",remarkNode.getText()); ! } ! public void testToPlainTextString() throws ParserException { ! createParser( ! "<!-- saved from url=(0022)http://internet.e-mail -->\n"+ ! "<HTML>\n"+ ! "<HEAD><META name=\"title\" content=\"Training Introduction\">\n"+ ! "<META name=\"subject\" content=\"\">\n"+ ! "<!--\n"+ ! " Whats gonna happen now ?\n"+ ! "-->\n"+ ! "<TEST>\n"+ ! "</TEST>\n"); ! Parser.setLineSeparator("\r\n"); ! parseAndAssertNodeCount(8); ! // The first node should be a HTMLRemarkNode ! assertTrue("First node should be a HTMLRemarkNode",node[0] instanceof RemarkNode); ! RemarkNode remarkNode = (RemarkNode)node[0]; ! assertEquals("Plain Text of the remarkNode #1"," saved from url=(0022)http://internet.e-mail ",remarkNode.toPlainTextString()); ! // The sixth node should be a HTMLRemarkNode ! assertTrue("Sixth node should be a HTMLRemarkNode",node[5] instanceof RemarkNode); ! remarkNode = (RemarkNode)node[5]; ! assertEquals("Plain Text of the remarkNode #6","\r\n Whats gonna happen now ?\r\n",remarkNode.getText()); ! ! } ! public void testToRawString() throws ParserException { ! createParser( ! "<!-- saved from url=(0022)http://internet.e-mail -->\n"+ ! "<HTML>\n"+ ! "<HEAD><META name=\"title\" content=\"Training Introduction\">\n"+ ! "<META name=\"subject\" content=\"\">\n"+ ! "<!--\n"+ ! " Whats gonna happen now ?\n"+ ! "-->\n"+ ! "<TEST>\n"+ ! "</TEST>\n"); ! Parser.setLineSeparator("\r\n"); ! parseAndAssertNodeCount(8); ! // The first node should be a HTMLRemarkNode ! assertTrue("First node should be a HTMLRemarkNode",node[0] instanceof RemarkNode); ! RemarkNode remarkNode = (RemarkNode)node[0]; ! assertStringEquals("Raw String of the remarkNode #1","<!-- saved from url=(0022)http://internet.e-mail -->",remarkNode.toHtml()); ! // The sixth node should be a HTMLRemarkNode ! assertTrue("Sixth node should be a HTMLRemarkNode",node[5] instanceof RemarkNode); ! remarkNode = (RemarkNode)node[5]; ! assertStringEquals("Raw String of the remarkNode #6","<!--\r\n Whats gonna happen now ?\r\n-->",remarkNode.toHtml()); ! } ! ! public void testNonRemarkNode() throws ParserException { ! createParser(" <![endif]>"); ! parseAndAssertNodeCount(2); ! // The first node should be a HTMLRemarkNode ! assertTrue("First node should be a string node",node[0] instanceof StringNode); ! assertTrue("Second node should be a Tag",node[1] instanceof Tag); ! StringNode stringNode = (StringNode)node[0]; ! Tag tag = (Tag)node[1]; ! assertEquals("Text contents"," ",stringNode.getText()); ! assertEquals("Tag Contents","![endif]",tag.getText()); ! ! } ! ! /** ! * This is the simulation of bug report 586756, submitted ! * by John Zook. ! * If all the comment contains is a blank line, it breaks ! * the state ! */ ! public void testRemarkNodeWithBlankLine() throws ParserException { ! createParser("<!--\n"+ ! "\n"+ ! "-->"); ! Parser.setLineSeparator("\r\n"); ! parseAndAssertNodeCount(1); ! assertTrue("Node should be a HTMLRemarkNode",node[0] instanceof RemarkNode); ! RemarkNode remarkNode = (RemarkNode)node[0]; ! assertEquals("Expected contents","\r\n",remarkNode.getText()); ! ! } ! ! /** ! * This is the simulation of a bug report submitted ! * by Claude Duguay. ! * If it is a comment with nothing in it, parser crashes ! */ ! public void testRemarkNodeWithNothing() throws ParserException { ! createParser("<!-->"); ! parseAndAssertNodeCount(1); ! assertTrue("Node should be a HTMLRemarkNode",node[0] instanceof RemarkNode); ! RemarkNode remarkNode = (RemarkNode)node[0]; ! assertEquals("Expected contents","",remarkNode.getText()); ! ! } ! ! /** ! * Reproduction of bug reported by John Zook [594301] ! * When we have tags like : ! * <!-- <A> --> ! * it doesent get parsed correctly ! */ ! public void testTagWithinRemarkNode() throws ParserException { ! createParser("<!-- \n"+ ! "<A>\n"+ ! "bcd -->"); ! Parser.setLineSeparator("\n"); ! parseAndAssertNodeCount(1); ! assertTrue("Node should be a HTMLRemarkNode",node[0] instanceof RemarkNode); ! RemarkNode remarkNode = (RemarkNode)node[0]; ! assertStringEquals("Expected contents"," \n<A>\nbcd ",remarkNode.getText()); ! ! } ! ! /** ! * Bug reported by John Zook [594301], invalid remark nodes are accepted as remark nodes. ! * <<br> ! * -<br> ! * -<br> ! * ssd --><br> ! * This is not supposed to be a remarknode ! */ ! public void testInvalidTag() throws ParserException { ! createParser("<!\n"+ ! "-\n"+ ! "-\n"+ ! "ssd -->"); ! Parser.setLineSeparator("\n"); ! parseAndAssertNodeCount(1); ! assertTrue("Node should be a Tag but was "+node[0],node[0] instanceof Tag); ! Tag tag = (Tag)node[0]; ! assertStringEquals("Expected contents","!\n"+ ! "-\n"+ ! "-\n"+ ! "ssd --",tag.getText()); ! Parser.setLineSeparator("\r\n"); ! } ! ! /** ! * Bug reported by John Zook [594301] ! * If dashes exist in a comment, they dont get added to the comment text ! */ ! public void testDashesInComment() throws ParserException{ ! createParser("<!-- -- -->"); ! parseAndAssertNodeCount(1); ! assertTrue("Node should be a HTMLRemarkNode but was "+node[0],node[0] instanceof RemarkNode); ! RemarkNode remarkNode = (RemarkNode)node[0]; ! assertEquals("Remark Node contents"," -- ",remarkNode.getText()); ! } *************** *** 258,262 **** ParserException { ! createParser( "<HTML>\n" + "<HEAD>\n" --- 258,262 ---- ParserException { ! createParser( "<HTML>\n" + "<HEAD>\n" *************** *** 268,275 **** + "</HTML>\n" ); ! parseAndAssertNodeCount(10); ! assertTrue("Node should be a HTMLRemarkNode but was "+node[7],node[7] instanceof RemarkNode); ! RemarkNode remarkNode = (RemarkNode)node[7]; ! assertEquals("Remark Node contents"," Id: html-sgml.sgm,v 1.5 1995/05/26 21:29:50 connolly Exp ",remarkNode.getText()); } --- 268,275 ---- + "</HTML>\n" ); ! parseAndAssertNodeCount(10); ! assertTrue("Node should be a HTMLRemarkNode but was "+node[7],node[7] instanceof RemarkNode); ! RemarkNode remarkNode = (RemarkNode)node[7]; ! assertEquals("Remark Node contents"," Id: html-sgml.sgm,v 1.5 1995/05/26 21:29:50 connolly Exp ",remarkNode.getText()); } *************** *** 281,285 **** ParserException { ! createParser( "<HTML>\n" + "<HEAD>\n" --- 281,285 ---- ParserException { ! createParser( "<HTML>\n" + "<HEAD>\n" *************** *** 291,298 **** + "</HTML>\n" ); ! parseAndAssertNodeCount(10); ! assertTrue("Node should be a HTMLRemarkNode but was "+node[7],node[7] instanceof RemarkNode); ! RemarkNode remarkNode = (RemarkNode)node[7]; ! assertEquals("Remark Node contents"," another -- -- comment ",remarkNode.getText()); } --- 291,298 ---- + "</HTML>\n" ); ! parseAndAssertNodeCount(10); ! assertTrue("Node should be a HTMLRemarkNode but was "+node[7],node[7] instanceof RemarkNode); ! RemarkNode remarkNode = (RemarkNode)node[7]; ! assertEquals("Remark Node contents"," another -- -- comment ",remarkNode.getText()); } *************** *** 304,308 **** ParserException { ! createParser( "<HTML>\n" + "<HEAD>\n" --- 304,308 ---- ParserException { ! createParser( "<HTML>\n" + "<HEAD>\n" *************** *** 314,321 **** + "</HTML>\n" ); ! parseAndAssertNodeCount(10); ! assertTrue("Node should be a HTMLRemarkNode but was "+node[7],node[7] instanceof RemarkNode); ! RemarkNode remarkNode = (RemarkNode)node[7]; ! assertEquals("Remark Node contents","",remarkNode.getText()); } --- 314,321 ---- + "</HTML>\n" ); ! parseAndAssertNodeCount(10); ! assertTrue("Node should be a HTMLRemarkNode but was "+node[7],node[7] instanceof RemarkNode); ! RemarkNode remarkNode = (RemarkNode)node[7]; ! assertEquals("Remark Node contents","",remarkNode.getText()); } *************** *** 330,334 **** // HTMLParserException // { ! // createParser( // "<HTML>\n" // + "<HEAD>\n" --- 330,334 ---- // HTMLParserException // { ! // createParser( // "<HTML>\n" // + "<HEAD>\n" *************** *** 340,348 **** // + "</HTML>\n" // ); ! // parseAndAssertNodeCount(10); ! // assertTrue("Node should not be a HTMLRemarkNode",!(node[7] instanceof HTMLRemarkNode)); ! // assertTrue("Node should be a HTMLStringNode but was "+node[7],node[7].getType()==HTMLStringNode.TYPE); ! // HTMLStringNode stringNode = (HTMLStringNode)node[7]; ! // assertEquals("String Node contents","<!- not a comment, just regular old data characters ->\n",stringNode.getText()); // } } --- 340,369 ---- // + "</HTML>\n" // ); ! // parseAndAssertNodeCount(10); ! // assertTrue("Node should not be a HTMLRemarkNode",!(node[7] instanceof HTMLRemarkNode)); ! // assertTrue("Node should be a HTMLStringNode but was "+node[7],node[7].getType()==HTMLStringNode.TYPE); ! // HTMLStringNode stringNode = (HTMLStringNode)node[7]; ! // assertEquals("String Node contents","<!- not a comment, just regular old data characters ->\n",stringNode.getText()); // } + + /** + * Test a comment ending with !--. + */ + public void testExclamationComment () + throws + ParserException + { + createParser ( + "<html>\n" + + "<head>\n" + + "<title>foobar</title>\n" + + "</head>\n" + + "<body>\n" + + "<!-- foobar --!>\n" + + "</body>\n" + + "</html>\n" + ); + parseAndAssertNodeCount (10); + } + } |