[Htmlparser-cvs] htmlparser/src/org/htmlparser/tests/scannersTests CompositeTagScannerTest.java,1.56
Brought to you by:
derrickoswald
From: <der...@us...> - 2003-12-20 23:47:58
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests In directory sc8-pr-cvs1:/tmp/cvs-serv12747/org/htmlparser/tests/scannersTests Modified Files: CompositeTagScannerTest.java Log Message: Reduce recursion on the JVM stack in CompositeTagScanner. Pass a stack of open tags to the scanner. Add smarter tag closing by walking up the stack on encountering an unopened end tag. Avoids a problem with bad HTML such as that found at http://scores.nba.com/games/20031029/scoreboard.html by Shaun Roach. Added testInvalidNesting to CompositeTagScanner Test based on the above. Index: CompositeTagScannerTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests/CompositeTagScannerTest.java,v retrieving revision 1.56 retrieving revision 1.57 diff -C2 -d -r1.56 -r1.57 *** CompositeTagScannerTest.java 8 Dec 2003 13:13:59 -0000 1.56 --- CompositeTagScannerTest.java 20 Dec 2003 23:47:55 -0000 1.57 *************** *** 37,40 **** --- 37,45 ---- import org.htmlparser.scanners.CompositeTagScanner; import org.htmlparser.tags.CompositeTag; + import org.htmlparser.tags.Div; + import org.htmlparser.tags.LinkTag; + import org.htmlparser.tags.TableColumn; + import org.htmlparser.tags.TableRow; + import org.htmlparser.tags.TableTag; import org.htmlparser.tags.Tag; import org.htmlparser.tests.ParserTestCase; *************** *** 756,759 **** --- 761,802 ---- return (mEndTagEnders); } + } + + /** + * Extracted from "http://scores.nba.com/games/20031029/scoreboard.html" + * which has a lot of table columns with unclosed DIV tags because the + * closing DIV doesn't have a slash. + * This caused java.lang.StackOverflowError on Windows. + * Tests the new non-recursive CompositeTagScanner with the walk back + * through the parse stack. + * See also Bug #750117 StackOverFlow while Node-Iteration and + * others. + */ + public void testInvalidNesting () throws ParserException + { + String html = "<table cellspacing=\"2\" cellpadding=\"0\" border=\"0\" width=\"600\">\n" + + "<tr>\n" + + "<td><div class=\"ScoreBoardSec\"> <a target=\"_parent\" class=\"ScoreBoardSec\" href=\"http://www.nba.com/heat/\">Heat</a><div></td>\n" + + "</tr>\n" + + "</table>"; + createParser (html); + parseAndAssertNodeCount (1); + assertType ("table", TableTag.class, node[0]); + TableTag table = (TableTag)node[0]; + assertTrue ("table should have 3 nodes", 3 == table.getChildCount ()); + assertType ("row", TableRow.class, table.childAt (1)); + TableRow row = (TableRow)table.childAt (1); + assertTrue ("row should have 3 nodes", 3 == row.getChildCount ()); + assertType ("column", TableColumn.class, row.childAt (1)); + TableColumn column = (TableColumn)row.childAt (1); + assertTrue ("column should have 1 node", 1 == column.getChildCount ()); + assertType ("element", Div.class, column.childAt (0)); + Div div = (Div)column.childAt (0); + assertTrue ("div should have 3 nodes", 3 == div.getChildCount ()); + assertType ("link", LinkTag.class, div.childAt (1)); + LinkTag link = (LinkTag)div.childAt (1); + assertTrue ("link contents", link.getLink ().equals ("http://www.nba.com/heat/")); + assertType ("bogus div", Div.class, div.childAt (2)); + assertTrue ("bogus div should have no children", 0 == ((Div)div.childAt (2)).getChildCount ()); } } |