[Htmlparser-cvs] htmlparser/src/org/htmlparser/tests/parserHelperTests AllTests.java,1.24,1.25 Compo
Brought to you by:
derrickoswald
From: <der...@us...> - 2003-09-03 23:36:24
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/parserHelperTests In directory sc8-pr-cvs1:/tmp/cvs-serv31228/tests/parserHelperTests Modified Files: AllTests.java CompositeTagScannerHelperTest.java StringParserTest.java Log Message: Change tabs to spaces in all source files. Index: AllTests.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/parserHelperTests/AllTests.java,v retrieving revision 1.24 retrieving revision 1.25 diff -C2 -d -r1.24 -r1.25 *** AllTests.java 24 Aug 2003 21:59:43 -0000 1.24 --- AllTests.java 3 Sep 2003 23:36:21 -0000 1.25 *************** *** 39,48 **** public static TestSuite suite() { TestSuite suite = new TestSuite("Parser Helper Tests"); ! // To-do: Test below should be enabled after it passes // suite.addTestSuite(AttributeParserTest.class); suite.addTestSuite(CompositeTagScannerHelperTest.class); suite.addTestSuite(RemarkNodeParserTest.class); suite.addTestSuite(StringParserTest.class); ! // To-do: Test below should be enabled after it passes // suite.addTestSuite(TagParserTest.class); --- 39,48 ---- public static TestSuite suite() { TestSuite suite = new TestSuite("Parser Helper Tests"); ! // To-do: Test below should be enabled after it passes // suite.addTestSuite(AttributeParserTest.class); suite.addTestSuite(CompositeTagScannerHelperTest.class); suite.addTestSuite(RemarkNodeParserTest.class); suite.addTestSuite(StringParserTest.class); ! // To-do: Test below should be enabled after it passes // suite.addTestSuite(TagParserTest.class); Index: CompositeTagScannerHelperTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/parserHelperTests/CompositeTagScannerHelperTest.java,v retrieving revision 1.17 retrieving revision 1.18 diff -C2 -d -r1.17 -r1.18 *** CompositeTagScannerHelperTest.java 24 Aug 2003 21:59:43 -0000 1.17 --- CompositeTagScannerHelperTest.java 3 Sep 2003 23:36:21 -0000 1.18 *************** *** 41,70 **** */ public class CompositeTagScannerHelperTest extends ParserTestCase { ! private CompositeTagScannerHelper helper; ! public CompositeTagScannerHelperTest(String name) { ! super(name); ! } ! protected void setUp() { ! helper = ! new CompositeTagScannerHelper(null,null,null,null,null,false); ! } ! ! public void testIsXmlEndTagForRealXml() { ! Tag tag = new Tag( ! new TagData( ! 0,0,"something/","" ! ) ! ); ! assertTrue("should be an xml end tag",helper.isXmlEndTag(tag)); ! } ! public void testIsXmlEndTagForFalseMatches() { ! Tag tag = new Tag( ! new TagData( ! 0,0,"a href=http://someurl.com/","" ! ) ! ); ! assertFalse("should not be an xml end tag",helper.isXmlEndTag(tag)); ! } } --- 41,70 ---- */ public class CompositeTagScannerHelperTest extends ParserTestCase { ! private CompositeTagScannerHelper helper; ! public CompositeTagScannerHelperTest(String name) { ! super(name); ! } ! protected void setUp() { ! helper = ! new CompositeTagScannerHelper(null,null,null,null,null,false); ! } ! ! public void testIsXmlEndTagForRealXml() { ! Tag tag = new Tag( ! new TagData( ! 0,0,"something/","" ! ) ! ); ! assertTrue("should be an xml end tag",helper.isXmlEndTag(tag)); ! } ! public void testIsXmlEndTagForFalseMatches() { ! Tag tag = new Tag( ! new TagData( ! 0,0,"a href=http://someurl.com/","" ! ) ! ); ! assertFalse("should not be an xml end tag",helper.isXmlEndTag(tag)); ! } } Index: StringParserTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/parserHelperTests/StringParserTest.java,v retrieving revision 1.34 retrieving revision 1.35 diff -C2 -d -r1.34 -r1.35 *** StringParserTest.java 24 Aug 2003 21:59:43 -0000 1.34 --- StringParserTest.java 3 Sep 2003 23:36:21 -0000 1.35 *************** *** 40,209 **** public class StringParserTest extends ParserTestCase { ! public StringParserTest(String name) { ! super(name); ! } ! ! /** ! * The bug being reproduced is this : <BR> ! * <HTML><HEAD><TITLE>Google</TITLE> <BR> ! * The above line is incorrectly parsed in that, the text Google is missed. ! * The presence of this bug is typically when some tag is identified before the string node is. (usually seen ! * with the end tag). The bug lies in NodeReader.readElement(). ! * Creation date: (6/17/2001 4:01:06 PM) ! */ ! public void testStringNodeBug1() throws ParserException { ! createParser("<HTML><HEAD><TITLE>Google</TITLE>"); ! parseAndAssertNodeCount(5); ! // The fourth node should be a HTMLStringNode- with the text - Google ! assertTrue("Fourth node should be a HTMLStringNode",node[3] instanceof StringNode); ! StringNode stringNode = (StringNode)node[3]; ! assertEquals("Text of the StringNode","Google",stringNode.getText()); ! } ! ! /** ! * Bug reported by Kaarle Kaila of Nokia<br> ! * For the following HTML : ! * view these documents, you must have <A href='http://www.adobe.com'>Adobe <br> ! * Acrobat Reader</A> installed on your computer.<br> ! * The first string before the link is not identified, and the space after the link is also not identified ! * Creation date: (8/2/2001 2:07:32 AM) ! */ ! public void testStringNodeBug2() throws ParserException { ! // Register the link scanner ! ! createParser("view these documents, you must have <A href='http://www.adobe.com'>Adobe \n"+ ! "Acrobat Reader</A> installed on your computer."); ! Parser.setLineSeparator("\r\n"); ! parser.addScanner(new LinkScanner("-l")); ! parseAndAssertNodeCount(3); ! // The first node should be a HTMLStringNode- with the text - view these documents, you must have ! assertTrue("First node should be a HTMLStringNode",node[0] instanceof StringNode); ! StringNode stringNode = (StringNode)node[0]; ! assertEquals("Text of the StringNode","view these documents, you must have ",stringNode.getText()); ! assertTrue("Second node should be a link node",node[1] instanceof LinkTag); ! LinkTag linkNode = (LinkTag)node[1]; ! assertEquals("Link is","http://www.adobe.com",linkNode.getLink()); ! assertEquals("Link text is","Adobe \r\nAcrobat Reader",linkNode.getLinkText()); ! ! assertTrue("Third node should be a string node",node[2] instanceof StringNode); ! StringNode stringNode2 = (StringNode)node[2]; ! assertEquals("Contents of third node"," installed on your computer.",stringNode2.getText()); ! } ! ! /** ! * Bug reported by Roger Sollberger<br> ! * For the following HTML : ! * <a href="http://asgard.ch">[< ASGARD ></a><br> ! * The string node is not correctly identified ! */ ! public void testTagCharsInStringNode() throws ParserException { ! createParser("<a href=\"http://asgard.ch\">[> ASGARD <]</a>"); ! parser.addScanner(new LinkScanner("-l")); ! parseAndAssertNodeCount(1); ! assertTrue("Node identified must be a link tag",node[0] instanceof LinkTag); ! LinkTag linkTag = (LinkTag) node[0]; ! assertEquals("[> ASGARD <]",linkTag.getLinkText()); ! assertEquals("http://asgard.ch",linkTag.getLink()); ! } ! ! public void testToPlainTextString() throws ParserException { ! createParser("<HTML><HEAD><TITLE>This is the Title</TITLE></HEAD><BODY>Hello World, this is the HTML Parser</BODY></HTML>"); ! parseAndAssertNodeCount(10); ! assertTrue("Fourth Node identified must be a string node",node[3] instanceof StringNode); ! StringNode stringNode = (StringNode)node[3]; ! assertEquals("First String Node","This is the Title",stringNode.toPlainTextString()); ! assertTrue("Eighth Node identified must be a string node",node[7] instanceof StringNode); ! stringNode = (StringNode)node[7]; ! assertEquals("Second string node","Hello World, this is the HTML Parser",stringNode.toPlainTextString()); ! } ! ! public void testToHTML() throws ParserException { ! createParser("<HTML><HEAD><TITLE>This is the Title</TITLE></HEAD><BODY>Hello World, this is the HTML Parser</BODY></HTML>"); ! parseAndAssertNodeCount(10); ! assertTrue("Fourth Node identified must be a string node",node[3] instanceof StringNode); ! StringNode stringNode = (StringNode)node[3]; ! assertEquals("First String Node","This is the Title",stringNode.toHtml()); ! assertTrue("Eighth Node identified must be a string node",node[7] instanceof StringNode); ! stringNode = (StringNode)node[7]; ! assertEquals("Second string node","Hello World, this is the HTML Parser",stringNode.toHtml()); ! } ! public void testEmptyLines() throws ParserException { ! createParser( ! "David Nirenberg (Center for Advanced Study in the Behavorial Sciences, Stanford).<br>\n"+ ! " \n"+ ! "<br>" ! ); ! parseAndAssertNodeCount(4); ! assertTrue("Third Node identified must be a string node",node[2] instanceof StringNode); ! } ! /** ! * This is a bug reported by John Zook (586222), where the first few chars ! * before a remark is being missed, if its on the same line. ! */ ! public void testStringBeingMissedBug() throws ParserException { ! createParser( ! "Before Comment <!-- Comment --> After Comment" ! ); ! parseAndAssertNodeCount(3); ! assertTrue("First node should be HTMLStringNode",node[0] instanceof StringNode); ! assertTrue("Second node should be HTMLRemarkNode",node[1] instanceof RemarkNode); ! assertTrue("Third node should be HTMLStringNode",node[2] instanceof StringNode); ! StringNode stringNode = (StringNode)node[0]; ! assertEquals("First String node contents","Before Comment ",stringNode.getText()); ! StringNode stringNode2 = (StringNode)node[2]; ! assertEquals("Second String node contents"," After Comment",stringNode2.getText()); ! RemarkNode remarkNode = (RemarkNode)node[1]; ! assertEquals("Remark Node contents"," Comment ",remarkNode.getText()); ! ! } ! /** ! * Based on a bug report submitted by Cedric Rosa, if the last line contains a single character, ! * StringNode does not return the string node correctly. ! */ ! public void testLastLineWithOneChar() throws ParserException { ! createParser("a"); ! parseAndAssertNodeCount(1); ! assertTrue("First node should be HTMLStringNode",node[0] instanceof StringNode); ! StringNode stringNode = (StringNode)node[0]; ! assertEquals("First String node contents","a",stringNode.getText()); ! } ! ! public void testStringWithEmptyLine() throws ParserException { ! createParser("a\n\nb"); ! parseAndAssertNodeCount(1); ! assertTrue("First node should be HTMLStringNode",node[0] instanceof StringNode); ! StringNode stringNode = (StringNode)node[0]; ! assertStringEquals("First String node contents","a\r\n\r\nb",stringNode.getText()); ! } ! ! /** ! * An attempt to reproduce bug 677176, which passes. ! * @throws Exception ! */ ! public void testStringParserBug() throws Exception { ! createParser( ! "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 " + "Transitional//EN\">" + "<html>" + "<head>" + "<title>Untitled Document</title>" + "<meta http-equiv=\"Content-Type\" content=\"text/html; " + "charset=iso-8859-1\">" + "</head>" + "<script language=\"JavaScript\" type=\"text/JavaScript\">" + "// if this fails, output a 'hello' " + "if (true) " + "{ " + "//something good... " + "} " + "</script>" + "<body>" + "</body>" + "</html>" ); ! parser.registerScanners(); ! parseAndAssertNodeCount(10); ! assertType("fourth node",MetaTag.class,node[4]); ! MetaTag metaTag = (MetaTag)node[4]; ! ! assertStringEquals( ! "content", ! "text/html; charset=iso-8859-1", ! metaTag.getAttribute("CONTENT") ! ); ! } ! ! public void testStringWithLineBreaks() throws Exception { ! createParser("Testing &\nRefactoring"); ! parseAndAssertNodeCount(1); ! assertType("first node",StringNode.class,node[0]); ! StringNode stringNode = (StringNode)node[0]; ! assertStringEquals("text","Testing &\r\nRefactoring",stringNode.toPlainTextString()); ! } ! } --- 40,227 ---- public class StringParserTest extends ParserTestCase { ! public StringParserTest(String name) { ! super(name); ! } ! ! /** ! * The bug being reproduced is this : <BR> ! * <HTML><HEAD><TITLE>Google</TITLE> <BR> ! * The above line is incorrectly parsed in that, the text Google is missed. ! * The presence of this bug is typically when some tag is identified before the string node is. (usually seen ! * with the end tag). The bug lies in NodeReader.readElement(). ! * Creation date: (6/17/2001 4:01:06 PM) ! */ ! public void testStringNodeBug1() throws ParserException { ! createParser("<HTML><HEAD><TITLE>Google</TITLE>"); ! parseAndAssertNodeCount(5); ! // The fourth node should be a HTMLStringNode- with the text - Google ! assertTrue("Fourth node should be a HTMLStringNode",node[3] instanceof StringNode); ! StringNode stringNode = (StringNode)node[3]; ! assertEquals("Text of the StringNode","Google",stringNode.getText()); ! } ! ! /** ! * Bug reported by Kaarle Kaila of Nokia<br> ! * For the following HTML : ! * view these documents, you must have <A href='http://www.adobe.com'>Adobe <br> ! * Acrobat Reader</A> installed on your computer.<br> ! * The first string before the link is not identified, and the space after the link is also not identified ! * Creation date: (8/2/2001 2:07:32 AM) ! */ ! public void testStringNodeBug2() throws ParserException { ! // Register the link scanner ! ! createParser("view these documents, you must have <A href='http://www.adobe.com'>Adobe \n"+ ! "Acrobat Reader</A> installed on your computer."); ! Parser.setLineSeparator("\r\n"); ! parser.addScanner(new LinkScanner("-l")); ! parseAndAssertNodeCount(3); ! // The first node should be a HTMLStringNode- with the text - view these documents, you must have ! assertTrue("First node should be a HTMLStringNode",node[0] instanceof StringNode); ! StringNode stringNode = (StringNode)node[0]; ! assertEquals("Text of the StringNode","view these documents, you must have ",stringNode.getText()); ! assertTrue("Second node should be a link node",node[1] instanceof LinkTag); ! LinkTag linkNode = (LinkTag)node[1]; ! assertEquals("Link is","http://www.adobe.com",linkNode.getLink()); ! assertEquals("Link text is","Adobe \r\nAcrobat Reader",linkNode.getLinkText()); ! ! assertTrue("Third node should be a string node",node[2] instanceof StringNode); ! StringNode stringNode2 = (StringNode)node[2]; ! assertEquals("Contents of third node"," installed on your computer.",stringNode2.getText()); ! } ! ! /** ! * Bug reported by Roger Sollberger<br> ! * For the following HTML : ! * <a href="http://asgard.ch">[< ASGARD ></a><br> ! * The string node is not correctly identified ! */ ! public void testTagCharsInStringNode() throws ParserException { ! createParser("<a href=\"http://asgard.ch\">[> ASGARD <]</a>"); ! parser.addScanner(new LinkScanner("-l")); ! parseAndAssertNodeCount(1); ! assertTrue("Node identified must be a link tag",node[0] instanceof LinkTag); ! LinkTag linkTag = (LinkTag) node[0]; ! assertEquals("[> ASGARD <]",linkTag.getLinkText()); ! assertEquals("http://asgard.ch",linkTag.getLink()); ! } ! ! public void testToPlainTextString() throws ParserException { ! createParser("<HTML><HEAD><TITLE>This is the Title</TITLE></HEAD><BODY>Hello World, this is the HTML Parser</BODY></HTML>"); ! parseAndAssertNodeCount(10); ! assertTrue("Fourth Node identified must be a string node",node[3] instanceof StringNode); ! StringNode stringNode = (StringNode)node[3]; ! assertEquals("First String Node","This is the Title",stringNode.toPlainTextString()); ! assertTrue("Eighth Node identified must be a string node",node[7] instanceof StringNode); ! stringNode = (StringNode)node[7]; ! assertEquals("Second string node","Hello World, this is the HTML Parser",stringNode.toPlainTextString()); ! } ! ! public void testToHTML() throws ParserException { ! createParser("<HTML><HEAD><TITLE>This is the Title</TITLE></HEAD><BODY>Hello World, this is the HTML Parser</BODY></HTML>"); ! parseAndAssertNodeCount(10); ! assertTrue("Fourth Node identified must be a string node",node[3] instanceof StringNode); ! StringNode stringNode = (StringNode)node[3]; ! assertEquals("First String Node","This is the Title",stringNode.toHtml()); ! assertTrue("Eighth Node identified must be a string node",node[7] instanceof StringNode); ! stringNode = (StringNode)node[7]; ! assertEquals("Second string node","Hello World, this is the HTML Parser",stringNode.toHtml()); ! } ! public void testEmptyLines() throws ParserException { ! createParser( ! "David Nirenberg (Center for Advanced Study in the Behavorial Sciences, Stanford).<br>\n"+ ! " \n"+ ! "<br>" ! ); ! parseAndAssertNodeCount(4); ! assertTrue("Third Node identified must be a string node",node[2] instanceof StringNode); ! } ! /** ! * This is a bug reported by John Zook (586222), where the first few chars ! * before a remark is being missed, if its on the same line. ! */ ! public void testStringBeingMissedBug() throws ParserException { ! createParser( ! "Before Comment <!-- Comment --> After Comment" ! ); ! parseAndAssertNodeCount(3); ! assertTrue("First node should be HTMLStringNode",node[0] instanceof StringNode); ! assertTrue("Second node should be HTMLRemarkNode",node[1] instanceof RemarkNode); ! assertTrue("Third node should be HTMLStringNode",node[2] instanceof StringNode); ! StringNode stringNode = (StringNode)node[0]; ! assertEquals("First String node contents","Before Comment ",stringNode.getText()); ! StringNode stringNode2 = (StringNode)node[2]; ! assertEquals("Second String node contents"," After Comment",stringNode2.getText()); ! RemarkNode remarkNode = (RemarkNode)node[1]; ! assertEquals("Remark Node contents"," Comment ",remarkNode.getText()); ! ! } ! /** ! * Based on a bug report submitted by Cedric Rosa, if the last line contains a single character, ! * StringNode does not return the string node correctly. ! */ ! public void testLastLineWithOneChar() throws ParserException { ! createParser("a"); ! parseAndAssertNodeCount(1); ! assertTrue("First node should be HTMLStringNode",node[0] instanceof StringNode); ! StringNode stringNode = (StringNode)node[0]; ! assertEquals("First String node contents","a",stringNode.getText()); ! } ! ! public void testStringWithEmptyLine() throws ParserException { ! createParser("a\n\nb"); ! parseAndAssertNodeCount(1); ! assertTrue("First node should be HTMLStringNode",node[0] instanceof StringNode); ! StringNode stringNode = (StringNode)node[0]; ! assertStringEquals("First String node contents","a\r\n\r\nb",stringNode.getText()); ! } ! ! /** ! * An attempt to reproduce bug 677176, which passes. ! * @throws Exception ! */ ! public void testStringParserBug() throws Exception { ! createParser( ! "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 " + ! "Transitional//EN\">" + ! "<html>" + ! "<head>" + ! "<title>Untitled Document</title>" + ! "<meta http-equiv=\"Content-Type\" content=\"text/html; " + ! "charset=iso-8859-1\">" + ! "</head>" + ! "<script language=\"JavaScript\" type=\"text/JavaScript\">" + ! "// if this fails, output a 'hello' " + ! "if (true) " + ! "{ " + ! "//something good... " + ! "} " + ! "</script>" + ! "<body>" + ! "</body>" + ! "</html>" ! ); ! parser.registerScanners(); ! parseAndAssertNodeCount(10); ! assertType("fourth node",MetaTag.class,node[4]); ! MetaTag metaTag = (MetaTag)node[4]; ! ! assertStringEquals( ! "content", ! "text/html; charset=iso-8859-1", ! metaTag.getAttribute("CONTENT") ! ); ! } ! ! public void testStringWithLineBreaks() throws Exception { ! createParser("Testing &\nRefactoring"); ! parseAndAssertNodeCount(1); ! assertType("first node",StringNode.class,node[0]); ! StringNode stringNode = (StringNode)node[0]; ! assertStringEquals("text","Testing &\r\nRefactoring",stringNode.toPlainTextString()); ! } ! } |