[Htmlparser-cvs] htmlparser/src/org/htmlparser/tests/scannersTests AllTests.java,1.52,1.53 Composite
Brought to you by:
derrickoswald
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests In directory sc8-pr-cvs1:/tmp/cvs-serv16537/tests/scannersTests Modified Files: AllTests.java CompositeTagScannerTest.java JspScannerTest.java ScriptScannerTest.java XmlEndTagScanningTest.java Removed Files: AppletScannerTest.java BaseHREFScannerTest.java BodyScannerTest.java BulletListScannerTest.java BulletScannerTest.java DivScannerTest.java FormScannerTest.java FrameScannerTest.java FrameSetScannerTest.java HeadScannerTest.java HtmlTest.java ImageScannerTest.java InputTagScannerTest.java LabelScannerTest.java LinkScannerTest.java MetaTagScannerTest.java OptionTagScannerTest.java SelectTagScannerTest.java SpanScannerTest.java StyleScannerTest.java TableScannerTest.java TextareaTagScannerTest.java TitleScannerTest.java Log Message: Remove most of the scanners. The only scanners left are ones that really do something different (script and jsp). Instead of registering a scanner to enable returning a specific tag you now add a tag to the a PrototypicalNodeFactory. All known tags are 'registered' by default in a new Parser which is similar to having called the old 'registerDOMScanners()', so tags are fully nested. This is different behaviour, and specifically, you will need to recurse into returned nodes to get at what you want. I've tried to adjust the applications accordingly, but worked examples are still scarce. If you want to return only some of the derived tags while keeping most as generic tags, there are various constructors and manipulators on the factory. See the javadocs and examples in the tests package. Nearly all the old scanner tests are folded into the tag tests. toString() has been revamped. This means that the default Parser mainline now returns an indented listing of tags, making it easy to see the structure of a page. The downside is the text of the page had to have newlines, tabs etc. turned into escape sequences. But if you were really interested in content you would be using toHtml() or toPlainTextString(). Index: AllTests.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests/AllTests.java,v retrieving revision 1.52 retrieving revision 1.53 diff -C2 -d -r1.52 -r1.53 *** AllTests.java 9 Nov 2003 17:07:15 -0000 1.52 --- AllTests.java 7 Dec 2003 23:41:42 -0000 1.53 *************** *** 67,96 **** TestSuite suite = new TestSuite("Scanner Tests"); suite.addTestSuite(TagScannerTest.class); - suite.addTestSuite(AppletScannerTest.class); suite.addTestSuite(ScriptScannerTest.class); - suite.addTestSuite(ImageScannerTest.class); - suite.addTestSuite(LinkScannerTest.class); - suite.addTestSuite(StyleScannerTest.class); - suite.addTestSuite(MetaTagScannerTest.class); - suite.addTestSuite(TitleScannerTest.class); - suite.addTestSuite(FormScannerTest.class); - suite.addTestSuite(FrameScannerTest.class); - suite.addTestSuite(FrameSetScannerTest.class); - suite.addTestSuite(InputTagScannerTest.class); - suite.addTestSuite(OptionTagScannerTest.class); - suite.addTestSuite(SelectTagScannerTest.class); - suite.addTestSuite(TextareaTagScannerTest.class); - suite.addTestSuite(BaseHREFScannerTest.class); suite.addTestSuite(JspScannerTest.class); - suite.addTestSuite(TableScannerTest.class); - suite.addTestSuite(SpanScannerTest.class); - suite.addTestSuite(DivScannerTest.class); - suite.addTestSuite(LabelScannerTest.class); - suite.addTestSuite(BodyScannerTest.class); suite.addTestSuite(CompositeTagScannerTest.class); - suite.addTestSuite(HeadScannerTest.class); - suite.addTestSuite(BulletListScannerTest.class); - suite.addTestSuite(BulletScannerTest.class); - suite.addTestSuite(HtmlTest.class); suite.addTestSuite(XmlEndTagScanningTest.class); return suite; --- 67,73 ---- Index: CompositeTagScannerTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests/CompositeTagScannerTest.java,v retrieving revision 1.53 retrieving revision 1.54 diff -C2 -d -r1.53 -r1.54 *** CompositeTagScannerTest.java 9 Nov 2003 17:07:15 -0000 1.53 --- CompositeTagScannerTest.java 7 Dec 2003 23:41:42 -0000 1.54 *************** *** 32,35 **** --- 32,36 ---- import org.htmlparser.AbstractNode; import org.htmlparser.Node; + import org.htmlparser.PrototypicalNodeFactory; import org.htmlparser.StringNode; import org.htmlparser.lexer.Page; *************** *** 72,76 **** private CustomTag parseCustomTag(int expectedNodeCount) throws ParserException { ! parser.addScanner(new CustomScanner()); parseAndAssertNodeCount(expectedNodeCount); assertType("node",CustomTag.class,node[0]); --- 73,77 ---- private CustomTag parseCustomTag(int expectedNodeCount) throws ParserException { ! parser.setNodeFactory (new PrototypicalNodeFactory (new CustomTag ())); parseAndAssertNodeCount(expectedNodeCount); assertType("node",CustomTag.class,node[0]); *************** *** 150,155 **** "</Custom>" ); ! parser.addScanner(new AnotherScanner()); ! CustomTag customTag = parseCustomTag(1); assertEquals("child count",1,customTag.getChildCount()); assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); --- 151,164 ---- "</Custom>" ); ! parser.setNodeFactory ( ! new PrototypicalNodeFactory ( ! new Tag[] ! { ! new CustomTag (), ! new AnotherTag (true), ! })); ! parseAndAssertNodeCount(1); ! assertType("node",CustomTag.class,node[0]); ! CustomTag customTag = (CustomTag)node[0]; assertEquals("child count",1,customTag.getChildCount()); assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); *************** *** 175,179 **** "<Custom/>" ); ! parser.addScanner(new CustomScanner()); parseAndAssertNodeCount(2); assertType("tag 1",CustomTag.class,node[0]); --- 184,188 ---- "<Custom/>" ); ! parser.setNodeFactory (new PrototypicalNodeFactory (new CustomTag ())); parseAndAssertNodeCount(2); assertType("tag 1",CustomTag.class,node[0]); *************** *** 189,194 **** "<Custom/>" ); ! parser.addScanner(new CustomScanner()); ! parser.addScanner(new AnotherScanner()); parseAndAssertNodeCount(2); assertType("first node",CustomTag.class,node[0]); --- 198,207 ---- "<Custom/>" ); ! parser.setNodeFactory ( ! new PrototypicalNodeFactory ( ! new Tag[] { ! new CustomTag (), ! new AnotherTag (false), ! })); parseAndAssertNodeCount(2); assertType("first node",CustomTag.class,node[0]); *************** *** 211,216 **** "<Custom/>" ); ! parser.addScanner(new CustomScanner()); ! parser.addScanner(new AnotherScanner()); parseAndAssertNodeCount(2); assertType("first node",CustomTag.class,node[0]); --- 224,233 ---- "<Custom/>" ); ! parser.setNodeFactory ( ! new PrototypicalNodeFactory ( ! new Tag[] { ! new CustomTag (), ! new AnotherTag (false), ! })); parseAndAssertNodeCount(2); assertType("first node",CustomTag.class,node[0]); *************** *** 240,245 **** "<Custom/>" ); ! parser.addScanner(new CustomScanner()); ! parser.addScanner(new AnotherScanner()); parseAndAssertNodeCount(2); assertType("first node",CustomTag.class,node[0]); --- 257,266 ---- "<Custom/>" ); ! parser.setNodeFactory ( ! new PrototypicalNodeFactory ( ! new Tag[] { ! new CustomTag (), ! new AnotherTag (false), ! })); parseAndAssertNodeCount(2); assertType("first node",CustomTag.class,node[0]); *************** *** 301,305 **** String tag2 = "<custom></endtag>"; createParser(tag1 + tag2); ! parser.addScanner(new CustomScanner(false)); parseAndAssertNodeCount(2); CustomTag customTag = (CustomTag)node[0]; --- 322,326 ---- String tag2 = "<custom></endtag>"; createParser(tag1 + tag2); ! parser.setNodeFactory (new PrototypicalNodeFactory (new CustomTag (false))); parseAndAssertNodeCount(2); CustomTag customTag = (CustomTag)node[0]; *************** *** 323,328 **** custom ); ! parser.addScanner(new AnotherScanner()); ! parser.addScanner(new CustomScanner()); parseAndAssertNodeCount(2); AnotherTag anotherTag = (AnotherTag)node[0]; --- 344,353 ---- custom ); ! parser.setNodeFactory ( ! new PrototypicalNodeFactory ( ! new Tag[] { ! new CustomTag (), ! new AnotherTag (false), ! })); parseAndAssertNodeCount(2); AnotherTag anotherTag = (AnotherTag)node[0]; *************** *** 346,351 **** "</custom>" ); ! parser.addScanner(new AnotherScanner(true)); ! CustomTag customTag = parseCustomTag(1); assertEquals("child count",1,customTag.getChildCount()); assertFalse("custom tag should be xml end tag",customTag.isEmptyXmlTag()); --- 371,384 ---- "</custom>" ); ! parser.setNodeFactory ( ! new PrototypicalNodeFactory ( ! new Tag[] ! { ! new CustomTag (), ! new AnotherTag (true), ! })); ! parseAndAssertNodeCount(1); ! assertType("node",CustomTag.class,node[0]); ! CustomTag customTag = (CustomTag)node[0]; assertEquals("child count",1,customTag.getChildCount()); assertFalse("custom tag should be xml end tag",customTag.isEmptyXmlTag()); *************** *** 368,373 **** "</custom>" ); ! parser.addScanner(new AnotherScanner(true)); ! CustomTag customTag = parseCustomTag(2); assertEquals("child count",1,customTag.getChildCount()); assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); --- 401,415 ---- "</custom>" ); ! parser.setNodeFactory ( ! new PrototypicalNodeFactory ( ! new Tag[] ! { ! new CustomTag (), ! new AnotherTag (true), ! })); ! parseAndAssertNodeCount(2); ! assertType("node",CustomTag.class,node[0]); ! CustomTag customTag = (CustomTag)node[0]; ! assertEquals("child count",1,customTag.getChildCount()); assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); *************** *** 399,404 **** "</custom>" ); ! parser.addScanner(new AnotherScanner(true)); ! CustomTag customTag = parseCustomTag(1); assertEquals("child count",1,customTag.getChildCount()); assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); --- 441,454 ---- "</custom>" ); ! parser.setNodeFactory ( ! new PrototypicalNodeFactory ( ! new Tag[] ! { ! new CustomTag (), ! new AnotherTag (true), ! })); ! parseAndAssertNodeCount(1); ! assertType("node",CustomTag.class,node[0]); ! CustomTag customTag = (CustomTag)node[0]; assertEquals("child count",1,customTag.getChildCount()); assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); *************** *** 418,423 **** String tag3 = "</custom>"; createParser(tag1 + tag2 + tag3); ! parser.addScanner(new CustomScanner(false)); ! parser.addScanner(new AnotherScanner()); parseAndAssertNodeCount(3); --- 468,477 ---- String tag3 = "</custom>"; createParser(tag1 + tag2 + tag3); ! parser.setNodeFactory ( ! new PrototypicalNodeFactory ( ! new Tag[] { ! new CustomTag (false), ! new AnotherTag (false), ! })); parseAndAssertNodeCount(3); *************** *** 450,455 **** String tag3 = "</custom>"; createParser(tag1 + tag2 + tag3); ! parser.addScanner(new CustomScanner(false)); ! parser.addScanner(new AnotherScanner()); parseAndAssertNodeCount(3); --- 504,513 ---- String tag3 = "</custom>"; createParser(tag1 + tag2 + tag3); ! parser.setNodeFactory ( ! new PrototypicalNodeFactory ( ! new Tag[] { ! new CustomTag (false), ! new AnotherTag (false), ! })); parseAndAssertNodeCount(3); *************** *** 499,514 **** createParser("<Custom/>","http://www.yahoo.com"); ! parser.addScanner(new CustomScanner() ! // { ! // public Tag createTag(Page page, int start, int end, Vector attributes, Tag startTag, Tag endTag, NodeList children) throws ParserException ! // { ! // if (null != page) ! // url = page.getUrl (); ! // else ! // url = null; ! // return (super.createTag (page, start, end, attributes, startTag, endTag, children)); ! // } ! // } ! ); parseAndAssertNodeCount(1); assertStringEquals("url","http://www.yahoo.com",((AbstractNode)node[0]).getPage ().getUrl ()); --- 557,561 ---- createParser("<Custom/>","http://www.yahoo.com"); ! parser.setNodeFactory (new PrototypicalNodeFactory (new CustomTag ())); parseAndAssertNodeCount(1); assertStringEquals("url","http://www.yahoo.com",((AbstractNode)node[0]).getPage ().getUrl ()); *************** *** 526,531 **** "</custom>" ); ! parser.addScanner(new CustomScanner()); ! parser.addScanner(new AnotherScanner(false)); parseAndAssertNodeCount(1); assertType("root node",CustomTag.class, node[0]); --- 573,582 ---- "</custom>" ); ! parser.setNodeFactory ( ! new PrototypicalNodeFactory ( ! new Tag[] { ! new CustomTag (), ! new AnotherTag (false), ! })); parseAndAssertNodeCount(1); assertType("root node",CustomTag.class, node[0]); *************** *** 550,554 **** "</custom>" ); ! parser.addScanner(new CustomScanner(false)); parseAndAssertNodeCount(3); for (int i=0;i<nodeCount;i++) { --- 601,605 ---- "</custom>" ); ! parser.setNodeFactory (new PrototypicalNodeFactory (new CustomTag (false))); parseAndAssertNodeCount(3); for (int i=0;i<nodeCount;i++) { *************** *** 637,640 **** --- 688,696 ---- protected String[] mEnders; + /** + * The default scanner for custom tags. + */ + protected final static CustomScanner mDefaultScanner = new CustomScanner (); + public CustomTag () { *************** *** 648,651 **** --- 704,708 ---- else mEnders = mIds; + setThisScanner (mDefaultScanner); } *************** *** 667,670 **** --- 724,729 ---- return (mEnders); } + + } *************** *** 686,689 **** --- 745,753 ---- private final String[] mEndTagEnders; + /** + * The default scanner for custom tags. + */ + protected final static AnotherScanner mDefaultScanner = new AnotherScanner (); + public AnotherTag (boolean acceptCustomTagsButDontAcceptCustomEndTags) { *************** *** 698,701 **** --- 762,766 ---- mEndTagEnders = new String[] {"CUSTOM"}; } + setThisScanner (mDefaultScanner); } Index: JspScannerTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests/JspScannerTest.java,v retrieving revision 1.33 retrieving revision 1.34 diff -C2 -d -r1.33 -r1.34 *** JspScannerTest.java 9 Nov 2003 17:07:15 -0000 1.33 --- JspScannerTest.java 7 Dec 2003 23:41:42 -0000 1.34 *************** *** 30,33 **** --- 30,34 ---- import org.htmlparser.Parser; + import org.htmlparser.PrototypicalNodeFactory; import org.htmlparser.scanners.JspScanner; import org.htmlparser.tags.JspTag; *************** *** 58,63 **** "</h1>"); ! // Register the Jsp Scanner ! parser.addScanner(new JspScanner("-j")); parseAndAssertNodeCount(5); // The first node should be an JspTag --- 59,63 ---- "</h1>"); ! parser.setNodeFactory (new PrototypicalNodeFactory (new JspTag ())); parseAndAssertNodeCount(5); // The first node should be an JspTag *************** *** 89,94 **** "%>"); Parser.setLineSeparator("\r\n"); ! // Register the Jsp Scanner ! parser.addScanner(new JspScanner("-j")); parseAndAssertNodeCount(1); } --- 89,93 ---- "%>"); Parser.setLineSeparator("\r\n"); ! parser.setNodeFactory (new PrototypicalNodeFactory (new JspTag ())); parseAndAssertNodeCount(1); } Index: ScriptScannerTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests/ScriptScannerTest.java,v retrieving revision 1.47 retrieving revision 1.48 diff -C2 -d -r1.47 -r1.48 *** ScriptScannerTest.java 9 Nov 2003 17:07:15 -0000 1.47 --- ScriptScannerTest.java 7 Dec 2003 23:41:42 -0000 1.48 *************** *** 34,37 **** --- 34,38 ---- import org.htmlparser.Parser; import org.htmlparser.scanners.ScriptScanner; + import org.htmlparser.tags.BodyTag; import org.htmlparser.tags.ScriptTag; import org.htmlparser.tests.ParserTestCase; *************** *** 53,58 **** String testHtml = "<SCRIPT>document.write(d+\".com\")</SCRIPT>"; createParser(testHtml,"http://www.google.com/test/index.html"); - // Register the script scanner - parser.addScanner(new ScriptScanner("-s")); parseAndAssertNodeCount(1); assertTrue("Node should be a script tag",node[0] instanceof ScriptTag); --- 54,57 ---- *************** *** 74,79 **** String src = "../js/DetermineBrowser.js"; createParser("<SCRIPT LANGUAGE=\"JavaScript\" SRC=\"" + src + "\"></SCRIPT>","http://www.google.com/test/index.html"); - // Register the image scanner - parser.addScanner(new ScriptScanner("-s")); parseAndAssertNodeCount(1); assertTrue("Node should be a script tag",node[0] instanceof ScriptTag); --- 73,76 ---- *************** *** 113,125 **** createParser(testHTML1,"http://www.google.com/test/index.html"); Parser.setLineSeparator("\r\n"); ! // Register the image scanner ! parser.addScanner(new ScriptScanner("-s")); ! ! parseAndAssertNodeCount(2); ! ! assertTrue("Node should be a script tag",node[1] ! instanceof ScriptTag); ! // Check the data in the applet tag ! ScriptTag scriptTag = (ScriptTag)node[1]; String s = scriptTag.getScriptCode(); assertStringEquals("Expected Script Code",testHTML2,s); --- 110,120 ---- createParser(testHTML1,"http://www.google.com/test/index.html"); Parser.setLineSeparator("\r\n"); ! parseAndAssertNodeCount(1); ! assertTrue("Node should be a body tag", node[0] instanceof BodyTag); ! BodyTag body = (BodyTag)node[0]; ! assertTrue("Node should have one child", 1 == body.getChildCount ()); ! assertTrue("Child should be a script tag", body.getChild (0) instanceof ScriptTag); ! // Check the data in the script tag ! ScriptTag scriptTag = (ScriptTag)body.getChild (0); String s = scriptTag.getScriptCode(); assertStringEquals("Expected Script Code",testHTML2,s); *************** *** 135,142 **** createParser(testHTML1,"http://www.hardwareextreme.com/"); - // Register the image scanner - parser.registerScanners(); - //parser.addScanner(new HTMLScriptScanner("-s")); - parseAndAssertNodeCount(2); assertTrue("Node should be a script tag",node[0] --- 130,133 ---- *************** *** 156,161 **** createParser("<SCRIPT Language=\"JavaScript\">"+expectedCode+ "</SCRIPT>","http://www.hardwareextreme.com/"); - // Register the image scanner - parser.registerScanners(); parseAndAssertNodeCount(1); assertTrue("Node should be a script tag",node[0] --- 147,150 ---- *************** *** 180,185 **** "</SCRIPT>"; createParser(testHtml); - - parser.addScanner(new ScriptScanner("-s")); parseAndAssertNodeCount(1); ScriptTag scriptTag = (ScriptTag)node[0]; --- 169,172 ---- *************** *** 219,223 **** "</html>" ); - parser.registerScanners(); Node scriptNodes [] = parser.extractAllNodesThatAre(ScriptTag.class); --- 206,209 ---- *************** *** 250,254 **** "</SCRIPT>" ); - parser.registerScanners(); parseAndAssertNodeCount(1); assertType("script",ScriptTag.class,node[0]); --- 236,239 ---- *************** *** 269,273 **** "</SCRIPT>" ); - parser.registerScanners(); parseAndAssertNodeCount(1); assertType("script",ScriptTag.class,node[0]); --- 254,257 ---- *************** *** 485,489 **** "</script>" ); - parser.registerScanners(); parseAndAssertNodeCount(1); --- 469,472 ---- *************** *** 509,513 **** String scriptContents = "alert()\r\nalert()"; createParser("<script>" + scriptContents + "</script>"); - parser.registerScanners(); parseAndAssertNodeCount(1); assertType("script",ScriptTag.class,node[0]); --- 492,495 ---- *************** *** 526,530 **** public void testScanNoEndTag() throws ParserException { createParser("<script>"); - parser.addScanner(new ScriptScanner("-s")); parseAndAssertNodeCount(1); } --- 508,511 ---- *************** *** 537,541 **** String html = "<SCRIPT language=\"JavaScript\">document.write('</SCRIPT>');</SCRIPT>"; createParser(html); - parser.addScanner(new ScriptScanner("-s")); parseAndAssertNodeCount(1); assertStringEquals ("Parse error", html, node[0].toHtml ()); --- 518,521 ---- *************** *** 547,551 **** String javascript = "\n// This is javascript with <li> tag in the comment\n"; createParser("<script>"+ javascript + "</script>"); - parser.registerScanners(); parseAndAssertNodeCount(1); assertTrue("Node should be a script tag",node[0] instanceof ScriptTag); --- 527,530 ---- *************** *** 561,565 **** "that spans multiple lines;\"\n"; createParser("<script>"+ javascript + "</script>"); - parser.registerScanners(); parseAndAssertNodeCount(1); assertTrue("Node should be a script tag",node[0] instanceof ScriptTag); --- 540,543 ---- *************** *** 573,577 **** String javascript = "\nAnything inside the script tag should be unchanged, even <li> and other html tags\n"; createParser("<script>"+ javascript + "</script>"); - parser.registerScanners(); parseAndAssertNodeCount(1); assertTrue("Node should be a script tag",node[0] instanceof ScriptTag); --- 551,554 ---- Index: XmlEndTagScanningTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests/XmlEndTagScanningTest.java,v retrieving revision 1.33 retrieving revision 1.34 diff -C2 -d -r1.33 -r1.34 *** XmlEndTagScanningTest.java 9 Nov 2003 17:07:15 -0000 1.33 --- XmlEndTagScanningTest.java 7 Dec 2003 23:41:42 -0000 1.34 *************** *** 46,50 **** public void testSingleTagParsing() throws ParserException { createParser("<div style=\"page-break-before: always; \" />"); - parser.registerScanners(); parseAndAssertNodeCount(1); assertType("div tag",Div.class,node[0]); --- 46,49 ---- --- AppletScannerTest.java DELETED --- --- BaseHREFScannerTest.java DELETED --- --- BodyScannerTest.java DELETED --- --- BulletListScannerTest.java DELETED --- --- BulletScannerTest.java DELETED --- --- DivScannerTest.java DELETED --- --- FormScannerTest.java DELETED --- --- FrameScannerTest.java DELETED --- --- FrameSetScannerTest.java DELETED --- --- HeadScannerTest.java DELETED --- --- HtmlTest.java DELETED --- --- ImageScannerTest.java DELETED --- --- InputTagScannerTest.java DELETED --- --- LabelScannerTest.java DELETED --- --- LinkScannerTest.java DELETED --- --- MetaTagScannerTest.java DELETED --- --- OptionTagScannerTest.java DELETED --- --- SelectTagScannerTest.java DELETED --- --- SpanScannerTest.java DELETED --- --- StyleScannerTest.java DELETED --- --- TableScannerTest.java DELETED --- --- TextareaTagScannerTest.java DELETED --- --- TitleScannerTest.java DELETED --- |