[Htmlparser-cvs] htmlparser/src/org/htmlparser/tests FunctionalTests.java,1.50,1.51 InstanceofPerfor
Brought to you by:
derrickoswald
From: <der...@us...> - 2003-12-07 23:42:15
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests In directory sc8-pr-cvs1:/tmp/cvs-serv16537/tests Modified Files: FunctionalTests.java InstanceofPerformanceTest.java LineNumberAssignedByNodeReaderTest.java ParserTest.java ParserTestCase.java PerformanceTest.java Log Message: Remove most of the scanners. The only scanners left are ones that really do something different (script and jsp). Instead of registering a scanner to enable returning a specific tag you now add a tag to the a PrototypicalNodeFactory. All known tags are 'registered' by default in a new Parser which is similar to having called the old 'registerDOMScanners()', so tags are fully nested. This is different behaviour, and specifically, you will need to recurse into returned nodes to get at what you want. I've tried to adjust the applications accordingly, but worked examples are still scarce. If you want to return only some of the derived tags while keeping most as generic tags, there are various constructors and manipulators on the factory. See the javadocs and examples in the tests package. Nearly all the old scanner tests are folded into the tag tests. toString() has been revamped. This means that the default Parser mainline now returns an indented listing of tags, making it easy to see the structure of a page. The downside is the text of the page had to have newlines, tabs etc. turned into escape sequences. But if you were really interested in content you would be using toHtml() or toPlainTextString(). Index: FunctionalTests.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/FunctionalTests.java,v retrieving revision 1.50 retrieving revision 1.51 diff -C2 -d -r1.50 -r1.51 *** FunctionalTests.java 9 Nov 2003 17:07:13 -0000 1.50 --- FunctionalTests.java 7 Dec 2003 23:41:41 -0000 1.51 *************** *** 42,46 **** import org.htmlparser.Node; import org.htmlparser.Parser; ! import org.htmlparser.scanners.ImageScanner; import org.htmlparser.tags.ImageTag; import org.htmlparser.util.DefaultParserFeedback; --- 42,46 ---- import org.htmlparser.Node; import org.htmlparser.Parser; ! import org.htmlparser.PrototypicalNodeFactory; import org.htmlparser.tags.ImageTag; import org.htmlparser.util.DefaultParserFeedback; *************** *** 89,93 **** public int countImageTagsWithHTMLParser() throws ParserException { Parser parser = new Parser("http://education.yahoo.com/",new DefaultParserFeedback()); ! parser.addScanner(new ImageScanner("-i")); setParser (parser); int parserImgTagCount = 0; --- 89,93 ---- public int countImageTagsWithHTMLParser() throws ParserException { Parser parser = new Parser("http://education.yahoo.com/",new DefaultParserFeedback()); ! parser.setNodeFactory (new PrototypicalNodeFactory (new ImageTag ())); setParser (parser); int parserImgTagCount = 0; Index: InstanceofPerformanceTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/InstanceofPerformanceTest.java,v retrieving revision 1.17 retrieving revision 1.18 diff -C2 -d -r1.17 -r1.18 *** InstanceofPerformanceTest.java 9 Nov 2003 17:07:13 -0000 1.17 --- InstanceofPerformanceTest.java 7 Dec 2003 23:41:41 -0000 1.18 *************** *** 35,43 **** import org.htmlparser.Parser; import org.htmlparser.tags.FormTag; - import org.htmlparser.tests.scannersTests.FormScannerTest; import org.htmlparser.util.NodeIterator; import org.htmlparser.util.SimpleNodeIterator; public class InstanceofPerformanceTest { FormTag formTag; Vector formChildren; --- 35,59 ---- import org.htmlparser.Parser; import org.htmlparser.tags.FormTag; import org.htmlparser.util.NodeIterator; import org.htmlparser.util.SimpleNodeIterator; public class InstanceofPerformanceTest { + + public static final String FORM_HTML = + "<FORM METHOD=\""+FormTag.POST+"\" ACTION=\"do_login.php\" NAME=\"login_form\" onSubmit=\"return CheckData()\">\n"+ + "<TR><TD ALIGN=\"center\"> </TD></TR>\n"+ + "<TR><TD ALIGN=\"center\"><FONT face=\"Arial, verdana\" size=2><b>User Name</b></font></TD></TR>\n"+ + "<TR><TD ALIGN=\"center\"><INPUT TYPE=\"text\" NAME=\"name\" SIZE=\"20\"></TD></TR>\n"+ + "<TR><TD ALIGN=\"center\"><FONT face=\"Arial, verdana\" size=2><b>Password</b></font></TD></TR>\n"+ + "<TR><TD ALIGN=\"center\"><INPUT TYPE=\"password\" NAME=\"passwd\" SIZE=\"20\"></TD></TR>\n"+ + "<TR><TD ALIGN=\"center\"> </TD></TR>\n"+ + "<TR><TD ALIGN=\"center\"><INPUT TYPE=\"submit\" NAME=\"submit\" VALUE=\"Login\"></TD></TR>\n"+ + "<TR><TD ALIGN=\"center\"> </TD></TR>\n"+ + "<TEXTAREA name=\"Description\" rows=\"15\" cols=\"55\" wrap=\"virtual\" class=\"composef\" tabindex=\"5\">Contents of TextArea</TEXTAREA>\n"+ + // "<TEXTAREA name=\"AnotherDescription\" rows=\"15\" cols=\"55\" wrap=\"virtual\" class=\"composef\" tabindex=\"5\">\n"+ + "<INPUT TYPE=\"hidden\" NAME=\"password\" SIZE=\"20\">\n"+ + "<INPUT TYPE=\"submit\">\n"+ + "</FORM>"; + FormTag formTag; Vector formChildren; *************** *** 45,51 **** Parser parser = Parser.createParser( ! FormScannerTest.FORM_HTML ); - parser.registerScanners(); NodeIterator e = parser.elements(); Node node = e.nextNode(); --- 61,66 ---- Parser parser = Parser.createParser( ! FORM_HTML ); NodeIterator e = parser.elements(); Node node = e.nextNode(); Index: LineNumberAssignedByNodeReaderTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/LineNumberAssignedByNodeReaderTest.java,v retrieving revision 1.28 retrieving revision 1.29 diff -C2 -d -r1.28 -r1.29 *** LineNumberAssignedByNodeReaderTest.java 9 Nov 2003 17:07:13 -0000 1.28 --- LineNumberAssignedByNodeReaderTest.java 7 Dec 2003 23:41:41 -0000 1.29 *************** *** 35,41 **** import junit.framework.TestSuite; ! import org.htmlparser.tests.scannersTests.CompositeTagScannerTest.CustomScanner; import org.htmlparser.tests.scannersTests.CompositeTagScannerTest.CustomTag; import org.htmlparser.util.ParserException; /** * @author Somik Raha --- 35,42 ---- import junit.framework.TestSuite; ! import org.htmlparser.PrototypicalNodeFactory; import org.htmlparser.tests.scannersTests.CompositeTagScannerTest.CustomTag; import org.htmlparser.util.ParserException; + /** * @author Somik Raha *************** *** 145,149 **** private void testLineNumber(String xml, int numNodes, int useNode, int expectedStartLine, int expectedEndLine) throws ParserException { createParser(xml); ! parser.addScanner(new CustomScanner()); parseAndAssertNodeCount(numNodes); assertType("custom node",CustomTag.class,node[useNode]); --- 146,150 ---- private void testLineNumber(String xml, int numNodes, int useNode, int expectedStartLine, int expectedEndLine) throws ParserException { createParser(xml); ! parser.setNodeFactory (new PrototypicalNodeFactory (new CustomTag ())); parseAndAssertNodeCount(numNodes); assertType("custom node",CustomTag.class,node[useNode]); Index: ParserTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/ParserTest.java,v retrieving revision 1.49 retrieving revision 1.50 diff -C2 -d -r1.49 -r1.50 *** ParserTest.java 9 Nov 2003 17:07:14 -0000 1.49 --- ParserTest.java 7 Dec 2003 23:41:41 -0000 1.50 *************** *** 40,43 **** --- 40,44 ---- import org.htmlparser.Node; import org.htmlparser.Parser; + import org.htmlparser.PrototypicalNodeFactory; import org.htmlparser.StringNode; import org.htmlparser.filters.NodeClassFilter; *************** *** 45,53 **** import org.htmlparser.lexer.Lexer; import org.htmlparser.lexer.Page; - import org.htmlparser.scanners.FormScanner; import org.htmlparser.scanners.TagScanner; import org.htmlparser.tags.BodyTag; import org.htmlparser.tags.ImageTag; import org.htmlparser.tags.LinkTag; import org.htmlparser.tags.Tag; import org.htmlparser.util.DefaultParserFeedback; --- 46,54 ---- import org.htmlparser.lexer.Lexer; import org.htmlparser.lexer.Page; import org.htmlparser.scanners.TagScanner; import org.htmlparser.tags.BodyTag; import org.htmlparser.tags.ImageTag; import org.htmlparser.tags.LinkTag; + import org.htmlparser.tags.MetaTag; import org.htmlparser.tags.Tag; import org.htmlparser.util.DefaultParserFeedback; *************** *** 300,303 **** --- 301,305 ---- out.close (); parser = new Parser (connection); + parser.setNodeFactory (new PrototypicalNodeFactory (true)); } catch (Exception e) *************** *** 352,355 **** --- 354,358 ---- out.close (); parser = new Parser (file.getAbsolutePath (), new DefaultParserFeedback(DefaultParserFeedback.QUIET)); + parser.setNodeFactory (new PrototypicalNodeFactory (true)); nodes = new AbstractNode[30]; i = 0; *************** *** 404,408 **** { parser = new Parser("http://www.sony.co.jp", Parser.noFeedback); - parser.registerScanners (); assertEquals("Character set by default is ISO-8859-1", "ISO-8859-1", parser.getEncoding ()); enumeration = parser.elements(); --- 407,410 ---- *************** *** 432,435 **** --- 434,438 ---- parser = new Parser(url); + parser.setNodeFactory (new PrototypicalNodeFactory (new MetaTag ())); i = 0; nodes = new AbstractNode[30]; *************** *** 454,458 **** parser = new Parser(url); - parser.registerScanners (); for (NodeIterator e = parser.elements();e.hasMoreNodes();) e.nextNode(); --- 457,460 ---- *************** *** 475,479 **** parser = new Parser(url); - parser.registerScanners (); for (NodeIterator e = parser.elements();e.hasMoreNodes();) e.nextNode(); --- 477,480 ---- *************** *** 544,548 **** page.setConnection (connection); parser = new Parser (new Lexer (page)); - parser.registerScanners (); // must be the default assertTrue ("Wrong encoding", parser.getEncoding ().equals ("ISO-8859-1")); --- 545,548 ---- *************** *** 575,578 **** --- 575,579 ---- parser = new Parser(url); + parser.setNodeFactory (new PrototypicalNodeFactory (true)); Node node [] = new AbstractNode[30]; int i = 0; *************** *** 636,640 **** "<p><font size=-2>©2002 Google</font><font size=-2> - Searching 3,083,324,652 web pages</font></center></body></html>\n" ); - parser.registerScanners(); NodeList collectionList = new NodeList(); NodeClassFilter filter = new NodeClassFilter (LinkTag.class); --- 637,640 ---- *************** *** 690,694 **** "</body>\n"+ "</html>"); - parser.registerScanners(); NodeList collectionList = new NodeList(); TagNameFilter filter = new TagNameFilter ("IMG"); --- 690,693 ---- *************** *** 703,717 **** } - public void testRemoveScanner() throws Exception { - createParser( - "" - ); - parser.registerScanners(); - parser.removeScanner(new FormScanner("",parser)); - Map scanners = parser.getScanners(); - TagScanner scanner = (TagScanner)scanners.get("FORM"); - assertNull("shouldnt have found scanner",scanner); - } - /** * See bug #728241 OutOfMemory error/ Infinite loop --- 702,705 ---- *************** *** 748,751 **** --- 736,740 ---- + "</table>\n" + "</body></html>"); + parser.setNodeFactory (new PrototypicalNodeFactory (true)); int i = 0; for (NodeIterator e = parser.elements();e.hasMoreNodes();) Index: ParserTestCase.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/ParserTestCase.java,v retrieving revision 1.40 retrieving revision 1.41 diff -C2 -d -r1.40 -r1.41 *** ParserTestCase.java 9 Nov 2003 17:07:14 -0000 1.40 --- ParserTestCase.java 7 Dec 2003 23:41:41 -0000 1.41 *************** *** 67,71 **** protected void parse(String response) throws ParserException { createParser(response,10000); - parser.registerScanners(); parseNodes(); } --- 67,70 ---- Index: PerformanceTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/PerformanceTest.java,v retrieving revision 1.44 retrieving revision 1.45 diff -C2 -d -r1.44 -r1.45 *** PerformanceTest.java 9 Nov 2003 17:07:14 -0000 1.44 --- PerformanceTest.java 7 Dec 2003 23:41:41 -0000 1.45 *************** *** 89,93 **** // Create the parser object parser = new Parser(file,new DefaultParserFeedback()); - parser.registerScanners(); Node node; long start=System.currentTimeMillis(); --- 89,92 ---- |