[Htmlparser-cvs] htmlparser/src/org/htmlparser/tests/lexerTests AttributeTests.java,1.8,1.9 LexerTes
Brought to you by:
derrickoswald
From: <der...@us...> - 2004-01-02 05:01:32
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/lexerTests In directory sc8-pr-cvs1:/tmp/cvs-serv27740/lexerTests Modified Files: AttributeTests.java LexerTests.java Log Message: Added testcases but was unable to reproduce the following bugs in the version 1.4 codebase: 839264 toHtml() parse error in Javascripts with "form" keyword 833592 DOCTYPE element is not parsed correctly 826764 ParserException occurs only when using setInputHTML() instea 825820 Words conjoined 825645 <input> not getting parsed inside table 813838 links not parsed correctly and #851882 zero length alt tag causes bug in ImageScanner #832530 empty attribute causes parser to fail #805598 attribute src in tag img sometimes not correctly parsed (these 3 are all the same bug, duplicates of the following): #753012 IMG SRC not parsed v1.3 & v1.4 #755929 Empty string attr. value causes attr parsing to be stopped #778781 SRC-attribute suppression in IMG-tags Also reviewed these test cases, again, with none reproducible in 1.4: #788746 parser crashes on comments like <!-- foobar --!> #772700 Jsp Tags are not parsed correctly when in quoted attributes. Index: AttributeTests.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/lexerTests/AttributeTests.java,v retrieving revision 1.8 retrieving revision 1.9 diff -C2 -d -r1.8 -r1.9 *** AttributeTests.java 8 Dec 2003 01:31:53 -0000 1.8 --- AttributeTests.java 2 Jan 2004 05:01:28 -0000 1.9 *************** *** 38,41 **** --- 38,42 ---- import org.htmlparser.lexer.nodes.Attribute; import org.htmlparser.lexer.nodes.PageAttribute; + import org.htmlparser.tags.ImageTag; import org.htmlparser.tags.Tag; import org.htmlparser.tests.ParserTestCase; *************** *** 519,522 **** --- 520,604 ---- assertTrue ("Attribute missing", table.containsKey ("OTHER")); assertEquals ("Attribute has wrong value", "fred", (String)table.get ("OTHER")); + } + + /** + * see bug #778781 SRC-attribute suppression in IMG-tags + * & #753012 IMG SRC not parsed v1.3 & v1.4 + * & #755929 Empty string attr. value causes attr parsing to be stopped + * & #778781 SRC-attribute suppression in IMG-tags + * & #832530 empty attribute causes parser to fail + * & #851882 zero length alt tag causes bug in ImageScanner + * + * HTML before parse: + * <img src="images/first" alt="first">" + * <img src="images/second" alt=""> + * <img alt="third" src="images/third"> + * <img alt="" src="images/fourth"> + * + * HTML after parse: + * <IMG ALT="first" SRC="images/first"> + * <IMG ALT="" SRC="images/second"> + * <IMG ALT="third" SRC="images/third"> + * <IMG ALT=""> + */ + public void testSrcAndAlt () throws ParserException + { + String html = "<img src=\"images/first\" alt=\"first\">"; + + createParser (html); + parseAndAssertNodeCount (1); + assertTrue ("Node should be an ImageTag", node[0] instanceof ImageTag); + ImageTag img = (ImageTag)node[0]; + assertTrue ("bad source", "images/first".equals (img.getImageURL ())); + assertTrue ("bad alt", "first".equals (img.getAttribute ("alt"))); + assertStringEquals ("toHtml()", html, img.toHtml ()); + } + + /** + * see bug #778781 SRC-attribute suppression in IMG-tags + */ + public void testSrcAndEmptyAlt () throws ParserException + { + String html = "<img src=\"images/second\" alt=\"\">"; + + createParser (html); + parseAndAssertNodeCount (1); + assertTrue ("Node should be an ImageTag", node[0] instanceof ImageTag); + ImageTag img = (ImageTag)node[0]; + assertTrue ("bad source", "images/second".equals (img.getImageURL ())); + assertTrue ("bad alt", "".equals (img.getAttribute ("alt"))); + assertStringEquals ("toHtml()", html, img.toHtml ()); + } + + /** + * see bug #778781 SRC-attribute suppression in IMG-tags + */ + public void testAltAndSrc () throws ParserException + { + String html = "<img alt=\"third\" src=\"images/third\">"; + + createParser (html); + parseAndAssertNodeCount (1); + assertTrue ("Node should be an ImageTag", node[0] instanceof ImageTag); + ImageTag img = (ImageTag)node[0]; + assertTrue ("bad source", "images/third".equals (img.getImageURL ())); + assertTrue ("bad alt", "third".equals (img.getAttribute ("alt"))); + assertStringEquals ("toHtml()", html, img.toHtml ()); + } + + /** + * see bug #778781 SRC-attribute suppression in IMG-tags + */ + public void testEmptyAltAndSrc () throws ParserException + { + String html = "<img alt=\"\" src=\"images/third\">"; + + createParser (html); + parseAndAssertNodeCount (1); + assertTrue ("Node should be an ImageTag", node[0] instanceof ImageTag); + ImageTag img = (ImageTag)node[0]; + assertTrue ("bad source", "images/third".equals (img.getImageURL ())); + assertTrue ("bad alt", "".equals (img.getAttribute ("alt"))); + assertStringEquals ("toHtml()", html, img.toHtml ()); } } Index: LexerTests.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/lexerTests/LexerTests.java,v retrieving revision 1.13 retrieving revision 1.14 diff -C2 -d -r1.13 -r1.14 *** LexerTests.java 31 Dec 2003 14:40:50 -0000 1.13 --- LexerTests.java 2 Jan 2004 05:01:28 -0000 1.14 *************** *** 697,700 **** --- 697,750 ---- } + /** + * See bug #825820 Words conjoined + */ + public void testConjoined () + throws + ParserException + { + StringBuffer buffer; + NodeIterator iterator; + Node node; + String expected; + + expected = "The Title\nThis is the body."; + String html1 = "<html><title>The Title\n</title>" + + "<body>This is <a href=\"foo.html\">the body</a>.</body></html>"; + createParser (html1); + buffer = new StringBuffer (); + for (iterator = parser.elements (); iterator.hasMoreNodes (); ) + { + node = iterator.nextNode (); + String text = node.toPlainTextString (); + buffer.append (text); + } + assertStringEquals ("conjoined text", expected, buffer.toString ()); + + String html2 = "<html><title>The Title</title>\n" + + "<body>This is <a href=\"foo.html\">the body</a>.</body></html>"; + createParser (html2); + buffer = new StringBuffer (); + for (iterator = parser.elements (); iterator.hasMoreNodes (); ) + { + node = iterator.nextNode (); + String text = node.toPlainTextString (); + buffer.append (text); + } + assertStringEquals ("conjoined text", expected, buffer.toString ()); + + String html3 = "<html><title>The Title</title>" + + "<body>\nThis is <a href=\"foo.html\">the body</a>.</body></html>"; + createParser (html3); + buffer = new StringBuffer (); + for (iterator = parser.elements (); iterator.hasMoreNodes (); ) + { + node = iterator.nextNode (); + String text = node.toPlainTextString (); + buffer.append (text); + } + assertStringEquals ("conjoined text", expected, buffer.toString ()); + } + } |