[Htmlparser-cvs] htmlparser/src/org/htmlparser/tests/parserHelperTests RemarkNodeParserTest.java,1.4
Brought to you by:
derrickoswald
From: <der...@us...> - 2003-12-07 23:41:45
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/parserHelperTests In directory sc8-pr-cvs1:/tmp/cvs-serv16537/tests/parserHelperTests Modified Files: RemarkNodeParserTest.java StringParserTest.java Log Message: Remove most of the scanners. The only scanners left are ones that really do something different (script and jsp). Instead of registering a scanner to enable returning a specific tag you now add a tag to the a PrototypicalNodeFactory. All known tags are 'registered' by default in a new Parser which is similar to having called the old 'registerDOMScanners()', so tags are fully nested. This is different behaviour, and specifically, you will need to recurse into returned nodes to get at what you want. I've tried to adjust the applications accordingly, but worked examples are still scarce. If you want to return only some of the derived tags while keeping most as generic tags, there are various constructors and manipulators on the factory. See the javadocs and examples in the tests package. Nearly all the old scanner tests are folded into the tag tests. toString() has been revamped. This means that the default Parser mainline now returns an indented listing of tags, making it easy to see the structure of a page. The downside is the text of the page had to have newlines, tabs etc. turned into escape sequences. But if you were really interested in content you would be using toHtml() or toPlainTextString(). Index: RemarkNodeParserTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/parserHelperTests/RemarkNodeParserTest.java,v retrieving revision 1.40 retrieving revision 1.41 diff -C2 -d -r1.40 -r1.41 *** RemarkNodeParserTest.java 9 Nov 2003 17:07:15 -0000 1.40 --- RemarkNodeParserTest.java 7 Dec 2003 23:41:41 -0000 1.41 *************** *** 31,34 **** --- 31,35 ---- import org.htmlparser.Parser; + import org.htmlparser.PrototypicalNodeFactory; import org.htmlparser.RemarkNode; import org.htmlparser.StringNode; *************** *** 75,78 **** --- 76,80 ---- "<TEST>\n"+ "</TEST>\n"); + parser.setNodeFactory (new PrototypicalNodeFactory (true)); Parser.setLineSeparator("\r\n"); parseAndAssertNodeCount(15); *************** *** 98,101 **** --- 100,104 ---- "<TEST>\n"+ "</TEST>\n"); + parser.setNodeFactory (new PrototypicalNodeFactory (true)); Parser.setLineSeparator("\r\n"); parseAndAssertNodeCount(15); *************** *** 122,125 **** --- 125,129 ---- "<TEST>\n"+ "</TEST>\n"); + parser.setNodeFactory (new PrototypicalNodeFactory (true)); Parser.setLineSeparator("\r\n"); parseAndAssertNodeCount(15); *************** *** 157,160 **** --- 161,165 ---- "\n"+ "-->"); + parser.setNodeFactory (new PrototypicalNodeFactory (true)); Parser.setLineSeparator("\r\n"); parseAndAssertNodeCount(1); *************** *** 172,175 **** --- 177,181 ---- public void testRemarkNodeWithNothing() throws ParserException { createParser("<!-->"); + parser.setNodeFactory (new PrototypicalNodeFactory (true)); parseAndAssertNodeCount(1); assertTrue("Node should be a RemarkNode",node[0] instanceof RemarkNode); *************** *** 189,192 **** --- 195,199 ---- "<A>\n"+ "bcd -->"); + parser.setNodeFactory (new PrototypicalNodeFactory (true)); Parser.setLineSeparator("\n"); parseAndAssertNodeCount(1); *************** *** 210,213 **** --- 217,221 ---- "-\n"+ "ssd -->"); + parser.setNodeFactory (new PrototypicalNodeFactory (true)); Parser.setLineSeparator("\n"); parseAndAssertNodeCount(1); *************** *** 227,230 **** --- 235,239 ---- public void testDashesInComment() throws ParserException{ createParser("<!-- -- -->"); + parser.setNodeFactory (new PrototypicalNodeFactory (true)); parseAndAssertNodeCount(1); assertTrue("Node should be a HTMLRemarkNode but was "+node[0],node[0] instanceof RemarkNode); *************** *** 273,276 **** --- 282,286 ---- + "</HTML>\n" ); + parser.setNodeFactory (new PrototypicalNodeFactory (true)); parseAndAssertNodeCount(18); assertTrue("Node should be a RemarkNode but was "+node[12],node[12] instanceof RemarkNode); *************** *** 296,299 **** --- 306,310 ---- + "</HTML>\n" ); + parser.setNodeFactory (new PrototypicalNodeFactory (true)); parseAndAssertNodeCount(18); assertTrue("Node should be a RemarkNode but was "+node[12],node[12] instanceof RemarkNode); *************** *** 319,322 **** --- 330,334 ---- + "</HTML>\n" ); + parser.setNodeFactory (new PrototypicalNodeFactory (true)); parseAndAssertNodeCount(18); assertTrue("Node should be a RemarkNode but was "+node[12],node[12] instanceof RemarkNode); *************** *** 369,372 **** --- 381,385 ---- + "</html>\n" ); + parser.setNodeFactory (new PrototypicalNodeFactory (true)); parseAndAssertNodeCount (18); } Index: StringParserTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/parserHelperTests/StringParserTest.java,v retrieving revision 1.43 retrieving revision 1.44 diff -C2 -d -r1.43 -r1.44 *** StringParserTest.java 9 Nov 2003 17:07:15 -0000 1.43 --- StringParserTest.java 7 Dec 2003 23:41:42 -0000 1.44 *************** *** 29,36 **** package org.htmlparser.tests.parserHelperTests; import org.htmlparser.Parser; import org.htmlparser.RemarkNode; import org.htmlparser.StringNode; ! import org.htmlparser.scanners.LinkScanner; import org.htmlparser.tags.LinkTag; import org.htmlparser.tags.MetaTag; --- 29,39 ---- package org.htmlparser.tests.parserHelperTests; + import org.htmlparser.Parser; + import org.htmlparser.PrototypicalNodeFactory; import org.htmlparser.RemarkNode; import org.htmlparser.StringNode; ! import org.htmlparser.tags.HeadTag; ! import org.htmlparser.tags.Html; import org.htmlparser.tags.LinkTag; import org.htmlparser.tags.MetaTag; *************** *** 59,62 **** --- 62,66 ---- public void testStringNodeBug1() throws ParserException { createParser("<HTML><HEAD><TITLE>Google</TITLE>"); + parser.setNodeFactory (new PrototypicalNodeFactory (true)); parseAndAssertNodeCount(5); // The fourth node should be a StringNode- with the text - Google *************** *** 80,84 **** "Acrobat Reader</A> installed on your computer."); Parser.setLineSeparator("\r\n"); - parser.addScanner(new LinkScanner("-l")); parseAndAssertNodeCount(3); // The first node should be a StringNode- with the text - view these documents, you must have --- 84,87 ---- *************** *** 104,108 **** public void testTagCharsInStringNode() throws ParserException { createParser("<a href=\"http://asgard.ch\">[> ASGARD <]</a>"); - parser.addScanner(new LinkScanner("-l")); parseAndAssertNodeCount(1); assertTrue("Node identified must be a link tag",node[0] instanceof LinkTag); --- 107,110 ---- *************** *** 114,117 **** --- 116,120 ---- public void testToPlainTextString() throws ParserException { createParser("<HTML><HEAD><TITLE>This is the Title</TITLE></HEAD><BODY>Hello World, this is the HTML Parser</BODY></HTML>"); + parser.setNodeFactory (new PrototypicalNodeFactory (true)); parseAndAssertNodeCount(10); assertTrue("Fourth Node identified must be a string node",node[3] instanceof StringNode); *************** *** 125,128 **** --- 128,132 ---- public void testToHTML() throws ParserException { createParser("<HTML><HEAD><TITLE>This is the Title</TITLE></HEAD><BODY>Hello World, this is the HTML Parser</BODY></HTML>"); + parser.setNodeFactory (new PrototypicalNodeFactory (true)); parseAndAssertNodeCount(10); assertTrue("Fourth Node identified must be a string node",node[3] instanceof StringNode); *************** *** 140,143 **** --- 144,148 ---- "<br>" ); + parser.setNodeFactory (new PrototypicalNodeFactory (true)); parseAndAssertNodeCount(4); assertTrue("Third Node identified must be a string node",node[2] instanceof StringNode); *************** *** 152,155 **** --- 157,161 ---- "Before Comment <!-- Comment --> After Comment" ); + parser.setNodeFactory (new PrototypicalNodeFactory (true)); parseAndAssertNodeCount(3); assertTrue("First node should be StringNode",node[0] instanceof StringNode); *************** *** 171,174 **** --- 177,181 ---- public void testLastLineWithOneChar() throws ParserException { createParser("a"); + parser.setNodeFactory (new PrototypicalNodeFactory (true)); parseAndAssertNodeCount(1); assertTrue("First node should be StringNode",node[0] instanceof StringNode); *************** *** 180,183 **** --- 187,191 ---- String text = "a\n\nb"; createParser(text); + parser.setNodeFactory (new PrototypicalNodeFactory (true)); parseAndAssertNodeCount(1); assertTrue("First node should be StringNode",node[0] instanceof StringNode); *************** *** 211,218 **** "</html>" ); ! parser.registerScanners(); ! parseAndAssertNodeCount(10); ! assertType("fourth node",MetaTag.class,node[4]); ! MetaTag metaTag = (MetaTag)node[4]; assertStringEquals( --- 219,231 ---- "</html>" ); ! parseAndAssertNodeCount(2); ! assertTrue(node[1] instanceof Html); ! Html htmlTag = (Html)node[1]; ! assertTrue("The HTML tag should have 3 nodes", 3 == htmlTag.getChildCount ()); ! assertTrue("The first child should be a HEAD tag",htmlTag.getChild(0) instanceof HeadTag); ! HeadTag headTag = (HeadTag)htmlTag.getChild(0); ! assertTrue("The HEAD tag should have 2 nodes", 2 == headTag.getChildCount ()); ! assertTrue("The second child should be a META tag",headTag.getChild(1) instanceof MetaTag); ! MetaTag metaTag = (MetaTag)headTag.getChild(1); assertStringEquals( *************** *** 226,229 **** --- 239,243 ---- String text = "Testing &\nRefactoring"; createParser(text); + parser.setNodeFactory (new PrototypicalNodeFactory (true)); parseAndAssertNodeCount(1); assertType("first node",StringNode.class,node[0]); |