[Htmlparser-cvs] htmlparser/src/org/htmlparser/tests AllTests.java,1.47,1.48 AssertXmlEqualsTest.jav
Brought to you by:
derrickoswald
From: <der...@us...> - 2003-09-03 23:36:55
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests In directory sc8-pr-cvs1:/tmp/cvs-serv31228/tests Modified Files: AllTests.java AssertXmlEqualsTest.java BadTagIdentifier.java FunctionalTests.java InstanceofPerformanceTest.java LineNumberAssignedByNodeReaderTest.java ParserTest.java ParserTestCase.java PerformanceTest.java Log Message: Change tabs to spaces in all source files. Index: AllTests.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/AllTests.java,v retrieving revision 1.47 retrieving revision 1.48 diff -C2 -d -r1.47 -r1.48 *** AllTests.java 24 Aug 2003 21:59:43 -0000 1.47 --- AllTests.java 3 Sep 2003 23:36:20 -0000 1.48 *************** *** 34,40 **** { ! public AllTests(String name) { ! super(name); ! } /** --- 34,40 ---- { ! public AllTests(String name) { ! super(name); ! } /** *************** *** 49,53 **** * All other options are passed on to the junit framework. */ ! public static void main(String[] args) { String runner; --- 49,53 ---- * All other options are passed on to the junit framework. */ ! public static void main(String[] args) { String runner; *************** *** 99,119 **** + ")"); } ! } ! public static TestSuite suite() { ! TestSuite suite = new TestSuite("HTMLParser Tests"); ! TestSuite basic = new TestSuite("Basic Tests"); ! basic.addTestSuite(ParserTest.class); suite.addTest(basic); ! suite.addTest(org.htmlparser.tests.scannersTests.AllTests.suite()); ! suite.addTest(org.htmlparser.tests.utilTests.AllTests.suite()); ! suite.addTest(org.htmlparser.tests.tagTests.AllTests.suite()); ! suite.addTest(org.htmlparser.tests.visitorsTests.AllTests.suite()); ! suite.addTest(org.htmlparser.tests.parserHelperTests.AllTests.suite()); ! suite.addTest(org.htmlparser.tests.nodeDecoratorTests.AllTests.suite()); ! suite.addTest(AssertXmlEqualsTest.suite()); ! suite.addTest(LineNumberAssignedByNodeReaderTest.suite()); ! return suite; ! } } --- 99,119 ---- + ")"); } ! } ! public static TestSuite suite() { ! TestSuite suite = new TestSuite("HTMLParser Tests"); ! TestSuite basic = new TestSuite("Basic Tests"); ! basic.addTestSuite(ParserTest.class); suite.addTest(basic); ! suite.addTest(org.htmlparser.tests.scannersTests.AllTests.suite()); ! suite.addTest(org.htmlparser.tests.utilTests.AllTests.suite()); ! suite.addTest(org.htmlparser.tests.tagTests.AllTests.suite()); ! suite.addTest(org.htmlparser.tests.visitorsTests.AllTests.suite()); ! suite.addTest(org.htmlparser.tests.parserHelperTests.AllTests.suite()); ! suite.addTest(org.htmlparser.tests.nodeDecoratorTests.AllTests.suite()); ! suite.addTest(AssertXmlEqualsTest.suite()); ! suite.addTest(LineNumberAssignedByNodeReaderTest.suite()); ! return suite; ! } } Index: AssertXmlEqualsTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/AssertXmlEqualsTest.java,v retrieving revision 1.9 retrieving revision 1.10 diff -C2 -d -r1.9 -r1.10 *** AssertXmlEqualsTest.java 24 Aug 2003 21:59:43 -0000 1.9 --- AssertXmlEqualsTest.java 3 Sep 2003 23:36:20 -0000 1.10 *************** *** 34,81 **** public class AssertXmlEqualsTest extends ParserTestCase { ! public AssertXmlEqualsTest(String name) { ! super(name); ! } ! ! public void testNestedTagWithText() throws Exception { ! assertXmlEquals("nested with text","<hello> <hi>My name is Nothing</hi></hello>","<hello><hi>My name is Nothing</hi> </hello>"); ! } ! ! public void testThreeTagsDifferent() throws Exception { ! assertXmlEquals("two tags different","<someTag></someTag><someOtherTag>","<someTag/><someOtherTag>"); ! } ! ! public void testOneTag() throws Exception { ! assertXmlEquals("one tag","<someTag>","<someTag>"); ! } ! public void testTwoTags() throws Exception { ! assertXmlEquals("two tags","<someTag></someTag>","<someTag></someTag>"); ! } ! public void testTwoTagsDifferent() throws Exception { ! assertXmlEquals("two tags different","<someTag></someTag>","<someTag/>"); ! } ! ! public void testTwoTagsDifferent2() throws Exception { ! assertXmlEquals("two tags different","<someTag/>","<someTag></someTag>"); ! } ! ! public void testTwoTagsWithSameAttributes() throws Exception { ! assertXmlEquals("attributes","<tag name=\"John\" age=\"22\" sex=\"M\"/>","<tag sex=\"M\" name=\"John\" age=\"22\"/>"); ! } ! ! public void testTagWithText() throws Exception { ! assertXmlEquals("text","<hello> My name is Nothing</hello>","<hello>My name is Nothing </hello>"); ! } ! ! public void testStringWithLineBreaks() throws Exception { ! assertXmlEquals("string with line breaks","testing & refactoring","testing &\nrefactoring"); ! } ! ! public static TestSuite suite() { ! TestSuite suite = new TestSuite("XML Tests"); suite.addTestSuite(AssertXmlEqualsTest.class); return (suite); ! } } --- 34,81 ---- public class AssertXmlEqualsTest extends ParserTestCase { ! public AssertXmlEqualsTest(String name) { ! super(name); ! } ! ! public void testNestedTagWithText() throws Exception { ! assertXmlEquals("nested with text","<hello> <hi>My name is Nothing</hi></hello>","<hello><hi>My name is Nothing</hi> </hello>"); ! } ! ! public void testThreeTagsDifferent() throws Exception { ! assertXmlEquals("two tags different","<someTag></someTag><someOtherTag>","<someTag/><someOtherTag>"); ! } ! ! public void testOneTag() throws Exception { ! assertXmlEquals("one tag","<someTag>","<someTag>"); ! } ! public void testTwoTags() throws Exception { ! assertXmlEquals("two tags","<someTag></someTag>","<someTag></someTag>"); ! } ! public void testTwoTagsDifferent() throws Exception { ! assertXmlEquals("two tags different","<someTag></someTag>","<someTag/>"); ! } ! ! public void testTwoTagsDifferent2() throws Exception { ! assertXmlEquals("two tags different","<someTag/>","<someTag></someTag>"); ! } ! ! public void testTwoTagsWithSameAttributes() throws Exception { ! assertXmlEquals("attributes","<tag name=\"John\" age=\"22\" sex=\"M\"/>","<tag sex=\"M\" name=\"John\" age=\"22\"/>"); ! } ! ! public void testTagWithText() throws Exception { ! assertXmlEquals("text","<hello> My name is Nothing</hello>","<hello>My name is Nothing </hello>"); ! } ! ! public void testStringWithLineBreaks() throws Exception { ! assertXmlEquals("string with line breaks","testing & refactoring","testing &\nrefactoring"); ! } ! ! public static TestSuite suite() { ! TestSuite suite = new TestSuite("XML Tests"); suite.addTestSuite(AssertXmlEqualsTest.class); return (suite); ! } } Index: BadTagIdentifier.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/BadTagIdentifier.java,v retrieving revision 1.9 retrieving revision 1.10 diff -C2 -d -r1.9 -r1.10 *** BadTagIdentifier.java 24 Aug 2003 21:59:43 -0000 1.9 --- BadTagIdentifier.java 3 Sep 2003 23:36:20 -0000 1.10 *************** *** 34,67 **** public class BadTagIdentifier { ! public BadTagIdentifier() { ! super(); ! } ! public static void main(String[] args) ! throws Exception { ! BadTagIdentifier badTags = ! new BadTagIdentifier(); ! badTags.identify("http://www.amazon.com"); ! } ! ! private void identify(String url) ! throws Exception{ ! String [] tagsBeingChecked = ! {"TABLE","DIV","SPAN"}; ! ! Parser parser = ! new Parser(url); ! TagFindingVisitor tagFinder = ! new TagFindingVisitor(tagsBeingChecked, true); ! parser.visitAllNodesWith(tagFinder); ! for (int i=0;i<tagsBeingChecked.length;i++) { ! System.out.println( ! "Number of "+tagsBeingChecked[i]+" begin tags = "+ ! tagFinder.getTagCount(i)); ! System.out.println( ! "Number of "+tagsBeingChecked[i]+" end tags = "+ ! tagFinder.getEndTagCount(i)); ! } ! ! } } --- 34,67 ---- public class BadTagIdentifier { ! public BadTagIdentifier() { ! super(); ! } ! public static void main(String[] args) ! throws Exception { ! BadTagIdentifier badTags = ! new BadTagIdentifier(); ! badTags.identify("http://www.amazon.com"); ! } ! ! private void identify(String url) ! throws Exception{ ! String [] tagsBeingChecked = ! {"TABLE","DIV","SPAN"}; ! ! Parser parser = ! new Parser(url); ! TagFindingVisitor tagFinder = ! new TagFindingVisitor(tagsBeingChecked, true); ! parser.visitAllNodesWith(tagFinder); ! for (int i=0;i<tagsBeingChecked.length;i++) { ! System.out.println( ! "Number of "+tagsBeingChecked[i]+" begin tags = "+ ! tagFinder.getTagCount(i)); ! System.out.println( ! "Number of "+tagsBeingChecked[i]+" end tags = "+ ! tagFinder.getEndTagCount(i)); ! } ! ! } } Index: FunctionalTests.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/FunctionalTests.java,v retrieving revision 1.40 retrieving revision 1.41 diff -C2 -d -r1.40 -r1.41 *** FunctionalTests.java 24 Aug 2003 21:59:43 -0000 1.40 --- FunctionalTests.java 3 Sep 2003 23:36:20 -0000 1.41 *************** *** 50,133 **** public class FunctionalTests extends TestCase { ! public FunctionalTests(String arg0) { ! super(arg0); ! } ! /** ! * Based on a suspected bug report by Annette Doyle, ! * to check if the no of image tags are correctly ! * identified by the parser ! */ ! public void testNumImageTagsInYahooWithoutRegisteringScanners() throws ParserException { ! // First count the image tags as is ! int imgTagCount; ! imgTagCount = findImageTagCount(); ! try { ! int parserImgTagCount = countImageTagsWithHTMLParser(); ! assertEquals("Image Tag Count",imgTagCount,parserImgTagCount); ! } ! catch (ParserException e) { ! throw new ParserException("Error thrown in call to countImageTagsWithHTMLParser()",e); ! } ! ! } ! public int findImageTagCount() { ! int imgTagCount = 0; ! try { ! URL url = new URL("http://www.yahoo.com"); ! InputStream is = url.openStream(); ! BufferedReader reader; ! reader = new BufferedReader(new InputStreamReader(is)); ! imgTagCount = countImageTagsWithoutHTMLParser(reader); ! is.close(); ! } ! catch (MalformedURLException e) { ! System.err.println("URL was malformed!"); ! } ! catch (IOException e) { ! System.err.println("IO Exception occurred while trying to open stream"); ! } ! return imgTagCount; ! } ! public int countImageTagsWithHTMLParser() throws ParserException { ! Parser parser = new Parser("http://www.yahoo.com",new DefaultParserFeedback()); ! parser.addScanner(new ImageScanner("-i",new LinkProcessor())); ! int parserImgTagCount = 0; ! Node node; ! for (NodeIterator e= parser.elements();e.hasMoreNodes();) { ! node = (Node)e.nextNode(); ! if (node instanceof ImageTag) { ! parserImgTagCount++; ! } ! } ! return parserImgTagCount; ! } ! public int countImageTagsWithoutHTMLParser(BufferedReader reader) throws IOException { ! String line; ! int imgTagCount = 0; ! do { ! line = reader.readLine(); ! if (line!=null) { ! // Check the line for image tags ! String newline = line.toUpperCase(); ! int fromIndex = -1; ! do { ! fromIndex = newline.indexOf("<IMG",fromIndex+1); ! if (fromIndex!=-1) { ! imgTagCount++; ! } ! } ! while (fromIndex!=-1); ! } ! } ! while (line!=null); ! return imgTagCount; ! } ! public static TestSuite suite() { ! return new TestSuite(FunctionalTests.class); ! } } --- 50,133 ---- public class FunctionalTests extends TestCase { ! public FunctionalTests(String arg0) { ! super(arg0); ! } ! /** ! * Based on a suspected bug report by Annette Doyle, ! * to check if the no of image tags are correctly ! * identified by the parser ! */ ! public void testNumImageTagsInYahooWithoutRegisteringScanners() throws ParserException { ! // First count the image tags as is ! int imgTagCount; ! imgTagCount = findImageTagCount(); ! try { ! int parserImgTagCount = countImageTagsWithHTMLParser(); ! assertEquals("Image Tag Count",imgTagCount,parserImgTagCount); ! } ! catch (ParserException e) { ! throw new ParserException("Error thrown in call to countImageTagsWithHTMLParser()",e); ! } ! ! } ! public int findImageTagCount() { ! int imgTagCount = 0; ! try { ! URL url = new URL("http://www.yahoo.com"); ! InputStream is = url.openStream(); ! BufferedReader reader; ! reader = new BufferedReader(new InputStreamReader(is)); ! imgTagCount = countImageTagsWithoutHTMLParser(reader); ! is.close(); ! } ! catch (MalformedURLException e) { ! System.err.println("URL was malformed!"); ! } ! catch (IOException e) { ! System.err.println("IO Exception occurred while trying to open stream"); ! } ! return imgTagCount; ! } ! public int countImageTagsWithHTMLParser() throws ParserException { ! Parser parser = new Parser("http://www.yahoo.com",new DefaultParserFeedback()); ! parser.addScanner(new ImageScanner("-i",new LinkProcessor())); ! int parserImgTagCount = 0; ! Node node; ! for (NodeIterator e= parser.elements();e.hasMoreNodes();) { ! node = (Node)e.nextNode(); ! if (node instanceof ImageTag) { ! parserImgTagCount++; ! } ! } ! return parserImgTagCount; ! } ! public int countImageTagsWithoutHTMLParser(BufferedReader reader) throws IOException { ! String line; ! int imgTagCount = 0; ! do { ! line = reader.readLine(); ! if (line!=null) { ! // Check the line for image tags ! String newline = line.toUpperCase(); ! int fromIndex = -1; ! do { ! fromIndex = newline.indexOf("<IMG",fromIndex+1); ! if (fromIndex!=-1) { ! imgTagCount++; ! } ! } ! while (fromIndex!=-1); ! } ! } ! while (line!=null); ! return imgTagCount; ! } ! public static TestSuite suite() { ! return new TestSuite(FunctionalTests.class); ! } } Index: InstanceofPerformanceTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/InstanceofPerformanceTest.java,v retrieving revision 1.11 retrieving revision 1.12 diff -C2 -d -r1.11 -r1.12 *** InstanceofPerformanceTest.java 24 Aug 2003 21:59:43 -0000 1.11 --- InstanceofPerformanceTest.java 3 Sep 2003 23:36:20 -0000 1.12 *************** *** 40,108 **** public class InstanceofPerformanceTest { ! FormTag formTag; ! Vector formChildren; ! public void setUp() throws Exception { ! Parser parser = ! Parser.createParser( ! FormScannerTest.FORM_HTML ! ); ! parser.registerScanners(); ! NodeIterator e = parser.elements(); ! Node node = e.nextNode(); ! formTag = (FormTag)node; ! formChildren = new Vector(); ! for (SimpleNodeIterator se = formTag.children();se.hasMoreNodes();) { ! formChildren.addElement(se.nextNode()); ! } ! } ! ! public void doInstanceofTest(long [] time,int index, long numTimes) { ! System.out.println("doInstanceofTest("+index+")"); ! long start = System.currentTimeMillis(); ! for (long i=0;i<numTimes;i++) { ! for (Enumeration e = formChildren.elements();e.hasMoreElements();) { ! Node node = (Node)e.nextElement(); ! } ! } ! long end = System.currentTimeMillis(); ! time[index] = end-start; ! } ! ! public void doGetTypeTest(long [] time,int index, long numTimes) { ! System.out.println("doGetTypeTest("+index+")"); ! long start = System.currentTimeMillis(); ! for (long i=0;i<numTimes;i++) { ! for (SimpleNodeIterator e = formTag.children();e.hasMoreNodes();) { ! Node node = e.nextNode(); ! } ! } ! long end = System.currentTimeMillis(); ! time[index] = end-start; ! } ! public void perform() { ! int numTimes = 30; ! long time1[] = new long[numTimes], ! time2[] = new long[numTimes]; ! ! for (int i=0;i<numTimes;i++) ! doInstanceofTest(time1,i,i*10000); ! ! for (int i=0;i<numTimes;i++) ! doGetTypeTest(time2,i,i*10000); ! ! print(time1,time2); ! } ! public void print(long [] time1, long [] time2) { ! for (int i=0;i<time1.length;i++) { ! System.out.println(i*1000000+":"+","+time1[i]+" "+time2[i]); ! } ! } ! public static void main(String [] args) throws Exception { ! InstanceofPerformanceTest test = ! new InstanceofPerformanceTest(); ! test.setUp(); ! test.perform(); ! } } --- 40,108 ---- public class InstanceofPerformanceTest { ! FormTag formTag; ! Vector formChildren; ! public void setUp() throws Exception { ! Parser parser = ! Parser.createParser( ! FormScannerTest.FORM_HTML ! ); ! parser.registerScanners(); ! NodeIterator e = parser.elements(); ! Node node = e.nextNode(); ! formTag = (FormTag)node; ! formChildren = new Vector(); ! for (SimpleNodeIterator se = formTag.children();se.hasMoreNodes();) { ! formChildren.addElement(se.nextNode()); ! } ! } ! ! public void doInstanceofTest(long [] time,int index, long numTimes) { ! System.out.println("doInstanceofTest("+index+")"); ! long start = System.currentTimeMillis(); ! for (long i=0;i<numTimes;i++) { ! for (Enumeration e = formChildren.elements();e.hasMoreElements();) { ! Node node = (Node)e.nextElement(); ! } ! } ! long end = System.currentTimeMillis(); ! time[index] = end-start; ! } ! ! public void doGetTypeTest(long [] time,int index, long numTimes) { ! System.out.println("doGetTypeTest("+index+")"); ! long start = System.currentTimeMillis(); ! for (long i=0;i<numTimes;i++) { ! for (SimpleNodeIterator e = formTag.children();e.hasMoreNodes();) { ! Node node = e.nextNode(); ! } ! } ! long end = System.currentTimeMillis(); ! time[index] = end-start; ! } ! public void perform() { ! int numTimes = 30; ! long time1[] = new long[numTimes], ! time2[] = new long[numTimes]; ! ! for (int i=0;i<numTimes;i++) ! doInstanceofTest(time1,i,i*10000); ! ! for (int i=0;i<numTimes;i++) ! doGetTypeTest(time2,i,i*10000); ! ! print(time1,time2); ! } ! public void print(long [] time1, long [] time2) { ! for (int i=0;i<time1.length;i++) { ! System.out.println(i*1000000+":"+","+time1[i]+" "+time2[i]); ! } ! } ! public static void main(String [] args) throws Exception { ! InstanceofPerformanceTest test = ! new InstanceofPerformanceTest(); ! test.setUp(); ! test.perform(); ! } } Index: LineNumberAssignedByNodeReaderTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/LineNumberAssignedByNodeReaderTest.java,v retrieving revision 1.18 retrieving revision 1.19 diff -C2 -d -r1.18 -r1.19 *** LineNumberAssignedByNodeReaderTest.java 24 Aug 2003 21:59:43 -0000 1.18 --- LineNumberAssignedByNodeReaderTest.java 3 Sep 2003 23:36:20 -0000 1.19 *************** *** 46,123 **** public class LineNumberAssignedByNodeReaderTest extends ParserTestCase { ! public LineNumberAssignedByNodeReaderTest(String name) { ! super(name); ! } ! ! /** ! * Test to ensure that the <code>Tag</code> being created by the ! * <code>CompositeTagScanner</code> has the correct startLine and endLine ! * information in the <code>TagData</code> it is constructed with. ! * @throws ParserException if there is a problem parsing the test data ! */ ! public void testLineNumbers() throws ParserException { ! testLineNumber("<Custom/>", 1, 0, 1, 1); ! testLineNumber("<Custom />", 1, 0, 1, 1); ! testLineNumber("<Custom></Custom>", 1, 0, 1, 1); ! testLineNumber("<Custom>Content</Custom>", 1, 0, 1, 1); ! testLineNumber("<Custom>Content<Custom></Custom>", 1, 0, 1, 1); ! testLineNumber( ! "<Custom>\n" + ! " Content\n" + ! "</Custom>", ! 1, 0, 1, 3 ! ); ! testLineNumber( ! "Foo\n" + ! "<Custom>\n" + ! " Content\n" + ! "</Custom>", ! 2, 1, 2, 4 ! ); ! testLineNumber( ! "Foo\n" + ! "<Custom>\n" + ! " <Custom>SubContent</Custom>\n" + ! "</Custom>", ! 2, 1, 2, 4 ! ); ! char[] oneHundredNewLines = new char[100]; ! Arrays.fill(oneHundredNewLines, '\n'); ! testLineNumber( ! "Foo\n" + ! new String(oneHundredNewLines) + ! "<Custom>\n" + ! " <Custom>SubContent</Custom>\n" + ! "</Custom>", ! 2, 1, 102, 104 ! ); ! } ! ! /** ! * Helper method to ensure that the <code>Tag</code> being created by the ! * <code>CompositeTagScanner</code> has the correct startLine and endLine ! * information in the <code>TagData</code> it is constructed with. ! * @param xml String containing HTML or XML to parse, containing a Custom tag ! * @param numNodes int number of expected nodes returned by parser ! * @param useNode int index of the node to test (should be of type CustomTag) ! * @param startLine int the expected start line number of the tag ! * @param endLine int the expected end line number of the tag ! * @throws ParserException if there is an exception during parsing ! */ ! private void testLineNumber(String xml, int numNodes, int useNode, int expectedStartLine, int expectedEndLine) throws ParserException { ! createParser(xml); ! parser.addScanner(new CustomScanner()); ! parseAndAssertNodeCount(numNodes); ! assertType("custom node",CustomTag.class,node[useNode]); ! CustomTag tag = (CustomTag)node[useNode]; ! assertEquals("start line", expectedStartLine, tag.tagData.getStartLine()); ! assertEquals("end line", expectedEndLine, tag.tagData.getEndLine()); ! ! } ! public static TestSuite suite() { ! TestSuite suite = new TestSuite("Line Number Tests"); suite.addTestSuite(LineNumberAssignedByNodeReaderTest.class); return (suite); ! } } --- 46,123 ---- public class LineNumberAssignedByNodeReaderTest extends ParserTestCase { ! public LineNumberAssignedByNodeReaderTest(String name) { ! super(name); ! } ! ! /** ! * Test to ensure that the <code>Tag</code> being created by the ! * <code>CompositeTagScanner</code> has the correct startLine and endLine ! * information in the <code>TagData</code> it is constructed with. ! * @throws ParserException if there is a problem parsing the test data ! */ ! public void testLineNumbers() throws ParserException { ! testLineNumber("<Custom/>", 1, 0, 1, 1); ! testLineNumber("<Custom />", 1, 0, 1, 1); ! testLineNumber("<Custom></Custom>", 1, 0, 1, 1); ! testLineNumber("<Custom>Content</Custom>", 1, 0, 1, 1); ! testLineNumber("<Custom>Content<Custom></Custom>", 1, 0, 1, 1); ! testLineNumber( ! "<Custom>\n" + ! " Content\n" + ! "</Custom>", ! 1, 0, 1, 3 ! ); ! testLineNumber( ! "Foo\n" + ! "<Custom>\n" + ! " Content\n" + ! "</Custom>", ! 2, 1, 2, 4 ! ); ! testLineNumber( ! "Foo\n" + ! "<Custom>\n" + ! " <Custom>SubContent</Custom>\n" + ! "</Custom>", ! 2, 1, 2, 4 ! ); ! char[] oneHundredNewLines = new char[100]; ! Arrays.fill(oneHundredNewLines, '\n'); ! testLineNumber( ! "Foo\n" + ! new String(oneHundredNewLines) + ! "<Custom>\n" + ! " <Custom>SubContent</Custom>\n" + ! "</Custom>", ! 2, 1, 102, 104 ! ); ! } ! ! /** ! * Helper method to ensure that the <code>Tag</code> being created by the ! * <code>CompositeTagScanner</code> has the correct startLine and endLine ! * information in the <code>TagData</code> it is constructed with. ! * @param xml String containing HTML or XML to parse, containing a Custom tag ! * @param numNodes int number of expected nodes returned by parser ! * @param useNode int index of the node to test (should be of type CustomTag) ! * @param startLine int the expected start line number of the tag ! * @param endLine int the expected end line number of the tag ! * @throws ParserException if there is an exception during parsing ! */ ! private void testLineNumber(String xml, int numNodes, int useNode, int expectedStartLine, int expectedEndLine) throws ParserException { ! createParser(xml); ! parser.addScanner(new CustomScanner()); ! parseAndAssertNodeCount(numNodes); ! assertType("custom node",CustomTag.class,node[useNode]); ! CustomTag tag = (CustomTag)node[useNode]; ! assertEquals("start line", expectedStartLine, tag.tagData.getStartLine()); ! assertEquals("end line", expectedEndLine, tag.tagData.getEndLine()); ! ! } ! public static TestSuite suite() { ! TestSuite suite = new TestSuite("Line Number Tests"); suite.addTestSuite(LineNumberAssignedByNodeReaderTest.class); return (suite); ! } } Index: ParserTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/ParserTest.java,v retrieving revision 1.38 retrieving revision 1.39 diff -C2 -d -r1.38 -r1.39 *** ParserTest.java 24 Aug 2003 21:59:43 -0000 1.38 --- ParserTest.java 3 Sep 2003 23:36:20 -0000 1.39 *************** *** 53,111 **** public class ParserTest extends ParserTestCase { ! public ParserTest(String name) { ! super(name); ! } ! public void testElements() throws Exception { ! StringBuffer hugeData = new StringBuffer(); ! for (int i=0;i<5001;i++) hugeData.append('a'); ! createParser(hugeData.toString()); ! int i = 0; ! for (NodeIterator e = parser.elements();e.hasMoreNodes();) ! { ! node[i++] = e.nextNode(); ! } ! assertEquals("There should be 1 node identified",1,i); ! // Now try getting the elements again ! // i = 0; ! // reader.reset(); ! // reader.setLineCount(1); ! // reader.setPosInLine(-1); ! // for (HTMLEnumeration e = parser.elements();e.hasMoreNodes();) ! // { ! // node[i++] = e.nextHTMLNode(); ! // } ! // assertEquals("There should be 1 node identified (second call to parser.elements())",1,i); ! } ! /** ! * This testcase needs you to be online. ! */ ! public void testElementsFromWeb() throws Exception { ! Parser parser; ! try { ! parser = new Parser("http://www.google.com"); ! } ! catch (Exception e ){ ! throw new ParserException("You must be offline! This test needs you to be connected to the internet.",e); ! } ! parser.getReader().mark(5000); ! Node [] node = new AbstractNode[500]; ! int i = 0; ! for (NodeIterator e = parser.elements();e.hasMoreNodes();) ! { ! node[i++] = e.nextNode(); ! } ! int cnt = i; ! parser.getReader().reset(); ! // Now try getting the elements again ! i = 0; ! for (NodeIterator e = parser.elements();e.hasMoreNodes();) ! { ! node[i++] = e.nextNode(); ! } ! assertEquals("There should be "+cnt+" nodes identified (second call to parser.elements())",cnt,i); ! } ! /** * Test the Parser(URLConnection) constructor. --- 53,111 ---- public class ParserTest extends ParserTestCase { ! public ParserTest(String name) { ! super(name); ! } ! public void testElements() throws Exception { ! StringBuffer hugeData = new StringBuffer(); ! for (int i=0;i<5001;i++) hugeData.append('a'); ! createParser(hugeData.toString()); ! int i = 0; ! for (NodeIterator e = parser.elements();e.hasMoreNodes();) ! { ! node[i++] = e.nextNode(); ! } ! assertEquals("There should be 1 node identified",1,i); ! // Now try getting the elements again ! // i = 0; ! // reader.reset(); ! // reader.setLineCount(1); ! // reader.setPosInLine(-1); ! // for (HTMLEnumeration e = parser.elements();e.hasMoreNodes();) ! // { ! // node[i++] = e.nextHTMLNode(); ! // } ! // assertEquals("There should be 1 node identified (second call to parser.elements())",1,i); ! } ! /** ! * This testcase needs you to be online. ! */ ! public void testElementsFromWeb() throws Exception { ! Parser parser; ! try { ! parser = new Parser("http://www.google.com"); ! } ! catch (Exception e ){ ! throw new ParserException("You must be offline! This test needs you to be connected to the internet.",e); ! } ! parser.getReader().mark(5000); ! Node [] node = new AbstractNode[500]; ! int i = 0; ! for (NodeIterator e = parser.elements();e.hasMoreNodes();) ! { ! node[i++] = e.nextNode(); ! } ! int cnt = i; ! parser.getReader().reset(); ! // Now try getting the elements again ! i = 0; ! for (NodeIterator e = parser.elements();e.hasMoreNodes();) ! { ! node[i++] = e.nextNode(); ! } ! assertEquals("There should be "+cnt+" nodes identified (second call to parser.elements())",cnt,i); ! } ! /** * Test the Parser(URLConnection) constructor. *************** *** 114,118 **** * <pre> * <form NAME="SearchQuick" method="POST" action="cp_search_response-e.asp" ! * onSubmit="return runSubmit();"> * * <!-- begin test hidden field code --> --- 114,118 ---- * <pre> * <form NAME="SearchQuick" method="POST" action="cp_search_response-e.asp" ! * onSubmit="return runSubmit();"> * * <!-- begin test hidden field code --> *************** *** 190,196 **** * <tr> * <td colspan="2" align="right" nowrap> ! * <input type="image" src="images/bb_submit-e.gif" name="Search" border="0" WIDTH="88" HEIGHT="23"> * &nbsp; <a href="#" onclick="javascript:fClearAllFields();"><img src="images/bb_clear_form-e.gif" name="Clear" border="0" WIDTH="88" HEIGHT="23"></a> ! * </td> * </tr> * </table> --- 190,196 ---- * <tr> * <td colspan="2" align="right" nowrap> ! * <input type="image" src="images/bb_submit-e.gif" name="Search" border="0" WIDTH="88" HEIGHT="23"> * &nbsp; <a href="#" onclick="javascript:fClearAllFields();"><img src="images/bb_clear_form-e.gif" name="Clear" border="0" WIDTH="88" HEIGHT="23"></a> ! * </td> * </tr> * </table> *************** *** 210,214 **** final String postal_code = "K2B 7V4"; ! Parser parser; URL url; HttpURLConnection connection; --- 210,214 ---- final String postal_code = "K2B 7V4"; ! Parser parser; URL url; HttpURLConnection connection; *************** *** 289,302 **** out.print (buffer); out.close (); ! parser = new Parser (connection); ! } ! catch (Exception e) { ! throw new ParserException ("You must be offline! This test needs you to be connected to the internet.", e); ! } pass = false; ! for (enumeration = parser.elements (); enumeration.hasMoreNodes ();) ! { node = enumeration.nextNode (); if (node instanceof StringNode) --- 289,302 ---- out.print (buffer); out.close (); ! parser = new Parser (connection); ! } ! catch (Exception e) { ! throw new ParserException ("You must be offline! This test needs you to be connected to the internet.", e); ! } pass = false; ! for (enumeration = parser.elements (); enumeration.hasMoreNodes ();) ! { node = enumeration.nextNode (); if (node instanceof StringNode) *************** *** 306,312 **** pass = true; } ! } ! assertTrue("POST operation failed.", pass); ! } /** --- 306,312 ---- pass = true; } ! } ! assertTrue("POST operation failed.", pass); ! } /** *************** *** 368,381 **** public void testHTTPCharset () { ! Parser parser; ! try { ! parser = new Parser("http://www.ibm.com/jp/", Parser.noFeedback); ! assertTrue("Character set should be Shift_JIS", parser.getEncoding ().equalsIgnoreCase ("Shift_JIS")); ! } ! catch (ParserException e) { fail ("could not open http://www.ibm.com/jp/"); ! } } --- 368,381 ---- public void testHTTPCharset () { ! Parser parser; ! try { ! parser = new Parser("http://www.ibm.com/jp/", Parser.noFeedback); ! assertTrue("Character set should be Shift_JIS", parser.getEncoding ().equalsIgnoreCase ("Shift_JIS")); ! } ! catch (ParserException e) { fail ("could not open http://www.ibm.com/jp/"); ! } } *************** *** 388,405 **** public void testHTMLCharset () { ! Parser parser; NodeIterator enumeration; ! try { ! parser = new Parser("http://www.sony.co.jp", Parser.noFeedback); ! assertEquals("Character set by default is ISO-8859-1", "ISO-8859-1", parser.getEncoding ()); enumeration = parser.elements(); ! assertTrue("Character set should be Shift_JIS", parser.getEncoding ().equalsIgnoreCase ("Shift_JIS")); ! } ! catch (ParserException e) { fail ("could not open http://www.sony.co.jp"); ! } } --- 388,405 ---- public void testHTMLCharset () { ! Parser parser; NodeIterator enumeration; ! try { ! parser = new Parser("http://www.sony.co.jp", Parser.noFeedback); ! assertEquals("Character set by default is ISO-8859-1", "ISO-8859-1", parser.getEncoding ()); enumeration = parser.elements(); ! assertTrue("Character set should be Shift_JIS", parser.getEncoding ().equalsIgnoreCase ("Shift_JIS")); ! } ! catch (ParserException e) { fail ("could not open http://www.sony.co.jp"); ! } } *************** *** 409,424 **** * and bug #699886 can't parse website other than iso-8859-1 */ ! public void testSwitchCharset () throws ParserException { ! Parser parser; ! String url = "http://htmlparser.sourceforge.net/test/gb2312Charset.html"; int i; Node[] nodes; ! ! parser = new Parser(url); i = 0; nodes = new AbstractNode[30]; ! for (NodeIterator e = parser.elements(); e.hasMoreNodes();) ! nodes[i++] = e.nextNode(); assertEquals ("Expected nodes", 14, i); } --- 409,424 ---- * and bug #699886 can't parse website other than iso-8859-1 */ ! public void testSwitchCharset () throws ParserException { ! Parser parser; ! String url = "http://htmlparser.sourceforge.net/test/gb2312Charset.html"; int i; Node[] nodes; ! ! parser = new Parser(url); i = 0; nodes = new AbstractNode[30]; ! for (NodeIterator e = parser.elements(); e.hasMoreNodes();) ! nodes[i++] = e.nextNode(); assertEquals ("Expected nodes", 14, i); } *************** *** 433,444 **** * Nonetheless, it would be nice to handle this case. */ ! public void testDoubleQuotedCharset () throws ParserException { ! Parser parser; ! String url = "http://htmlparser.sourceforge.net/test/DoublequotedCharset.html"; ! ! parser = new Parser(url); ! for (NodeIterator e = parser.elements();e.hasMoreNodes();) ! e.nextNode(); assertTrue ("Wrong encoding", parser.getEncoding ().equals ("UTF-8")); } --- 433,444 ---- * Nonetheless, it would be nice to handle this case. */ ! public void testDoubleQuotedCharset () throws ParserException { ! Parser parser; ! String url = "http://htmlparser.sourceforge.net/test/DoublequotedCharset.html"; ! ! parser = new Parser(url); ! for (NodeIterator e = parser.elements();e.hasMoreNodes();) ! e.nextNode(); assertTrue ("Wrong encoding", parser.getEncoding ().equals ("UTF-8")); } *************** *** 453,464 **** * Nonetheless, it would be nice to handle this case. */ ! public void testSingleQuotedCharset () throws ParserException { ! Parser parser; ! String url = "http://htmlparser.sourceforge.net/test/SinglequotedCharset.html"; ! ! parser = new Parser(url); ! for (NodeIterator e = parser.elements();e.hasMoreNodes();) ! e.nextNode(); assertTrue ("Wrong encoding", parser.getEncoding ().equals ("UTF-8")); } --- 453,464 ---- * Nonetheless, it would be nice to handle this case. */ ! public void testSingleQuotedCharset () throws ParserException { ! Parser parser; ! String url = "http://htmlparser.sourceforge.net/test/SinglequotedCharset.html"; ! ! parser = new Parser(url); ! for (NodeIterator e = parser.elements();e.hasMoreNodes();) ! e.nextNode(); assertTrue ("Wrong encoding", parser.getEncoding ().equals ("UTF-8")); } *************** *** 472,481 **** * AOL it would be nice to handle this case. */ ! public void testCommaListCharset () throws ParserException { URL url; URLConnection connection; ! Parser parser; ! String idiots = "http://users.aol.com/geinster/rej.htm"; try --- 472,481 ---- * AOL it would be nice to handle this case. */ ! public void testCommaListCharset () throws ParserException { URL url; URLConnection connection; ! Parser parser; ! String idiots = "http://users.aol.com/geinster/rej.htm"; try *************** *** 537,696 **** public void testNullUrl() { ! Parser parser; ! try { ! parser = new Parser("http://someoneexisting.com", Parser.noFeedback); ! assertTrue("Should have thrown an exception!",false); ! } ! catch (ParserException e) { ! ! } ! } ! ! public void testURLWithSpaces() throws ParserException{ ! Parser parser; ! String url = "http://htmlparser.sourceforge.net/test/This is a Test Page.html"; ! ! parser = new Parser(url); ! Node node [] = new AbstractNode[30]; ! int i = 0; ! for (NodeIterator e = parser.elements();e.hasMoreNodes();) { ! node[i] = e.nextNode(); ! i++; ! ! } ! assertEquals("Expected nodes",12,i); ! } ! public void testLinkCollection() throws ParserException { ! createParser( ! "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\"><title>Google</title><style><!--\n"+ ! "body,td,a,p,.h{font-family:arial,sans-serif;} .h{font-size: 20px;} .h{color:} .q{text-decoration:none; color:#0000cc;}\n"+ ! "//--></style>\n"+ ! "<script>\n"+ ! "<!--\n"+ ! "function sf(){document.f.q.focus();}\n"+ ! "function c(p){var f=document.f;if (f.action) {f.action = 'http://'+p;f.submit();return false;}return true;}\n"+ ! "// -->\n"+ ! "</script>\n"+ ! "</head><body bgcolor=#ffffff text=#000000 link=#0000cc vlink=#551a8b alink=#ff0000 onLoad=sf()><center><table border=0 cellspacing=0 cellpadding=0><tr><td><img src=\"images/logo.gif\" width=276 height=110 alt=\"Google\"></td></tr></table><br>\n"+ ! "<table border=0 cellspacing=0 cellpadding=0>" + ! "<tr>" + ! "<td width=15> </td>" + ! "<td id=0 bgcolor=#3366cc align=center width=95 nowrap>" + ! "<font color=#ffffff size=-1><b>Web</b></font>" + ! "</td>" + ! "<td width=15> </td>" + ! "<td id=1 bgcolor=#efefef align=center width=95 nowrap onClick=\"return c('www.google.com/imghp');\" style=cursor:pointer;cursor:hand;><a id=1a class=q href=\"/imghp?hl=en&ie=UTF-8&oe=UTF-8\" onClick=\"return c('www.google.com/imghp');\"><font size=-1>Images</font></a></td><td width=15> </td><td id=2 bgcolor=#efefef align=center width=95 nowrap onClick=\"return c('www.google.com/grphp');\" style=cursor:pointer;cursor:hand;><a id=2a class=q href=\"/grphp?hl=en&ie=UTF-8&oe=UTF-8\" onClick=\"return c('www.google.com/grphp');\"><font size=-1>Groups</font></a></td><td width=15> </td><td id=3 bgcolor=#efefef align=center width=95 nowrap onClick=\"return c('www.google.com/dirhp');\" style=cursor:pointer;cursor:hand;><a id=3a class=q href=\"/dirhp?hl=en&ie=UTF-8&oe=UTF-8\" onClick=\"return c('www.google.com/dirhp');\"><font size=-1>Directory</font></a></td><td width=15> </td><td id=4 bgcolor=#efefef align=center width=95 nowrap onClick=\"return c('www.google.com/nwshp');\" style=cursor:pointer;cursor:hand;><a id=4a class=q href=\"/nwshp?hl=en&ie=UTF-8&oe=UTF-8\" onClick=\"return c('www.google.com/nwshp');\"><font size=-1><nobr>News-<font color=red>New!</font></nobr></font></a></td><td width=15> </td></tr><tr><td colspan=12 bgcolor=#3366cc><img width=1 height=1 alt=\"\">" + ! "</td>" + ! "</tr>" + ! "</table>" + ! "<br>" + ! "<form action=\"/search\" name=f>" + ! "<table cellspacing=0 cellpadding=0>" + ! "<tr>" + ! "<td width=75> </td>" + ! "<td align=center>" + ! "<input type=hidden name=hl value=en>" + ! "<input type=hidden name=ie value=\"UTF-8\">" + ! "<input type=hidden name=oe value=\"UTF-8\">" + ! "<input maxLength=256 size=55 name=q value=\"\"><br>" + ! "<input type=submit value=\"Google Search\" name=btnG>" + ! "<input type=submit value=\"I'm Feeling Lucky\" name=btnI>" + ! "</td>" + ! "<td valign=top nowrap>" + ! "<font size=-2> • <a href=/advanced_search?hl=en>Advanced Search</a>" + ! "<br> • <a href=/preferences?hl=en>Preferences</a>" + ! "<br> • <a href=/language_tools?hl=en>Language Tools</a>" + ! "</font>" + ! "</td>" + ! "</tr>" + ! "</table>" + ! "</form><br>\n"+ ! "<br><font size=-1><a href=\"/ads/\">Advertise with Us</a> - <a href=\"/services/\">Search Solutions</a> - <a href=\"/options/\">Services & Tools</a> - <a href=/about.html>Jobs, Press, & Help</a><span id=hp style=\"behavior:url(#default#homepage)\"></span>\n"+ ! "<script>\n"+ ! "if (!hp.isHomePage('http://www.google.com/')) {document.write(\"<p><a href=\"/mgyhp.html\" onClick=\"style.behavior='url(#default#homepage)';setHomePage('http://www.google.com/');\">Make Google Your Homepage!</a>\");}\n"+ ! "</script></font>\n"+ ! "<p><font size=-2>©2002 Google</font><font size=-2> - Searching 3,083,324,652 web pages</font></center></body></html>\n" ! ); ! parser.registerScanners(); ! NodeList collectionList = new NodeList(); ! for (NodeIterator e = parser.elements();e.hasMoreNodes();) { ! Node node = e.nextNode(); ! node.collectInto(collectionList,LinkTag.class); ! } ! assertEquals("Size of collection vector should be 11",11,collectionList.size()); ! // All items in collection vector should be links ! for (SimpleNodeIterator e = collectionList.elements();e.hasMoreNodes();) { ! Node node = e.nextNode(); ! assertTrue("Only links should have been parsed",node instanceof LinkTag); ! } ! } ! public void testImageCollection() throws ParserException { ! createParser( ! "<html>\n"+ ! "<head>\n"+ ! "<meta name=\"generator\" content=\"Created Using Yahoo! PageBuilder 2.60.24\">\n"+ ! "</head>\n"+ ! "<body bgcolor=\"#FFFFFF\" link=\"#0000FF\" vlink=\"#FF0000\" text=\"#000000\"\n"+ ! " onLoad=\"window.onresize=new Function('if (navigator.appVersion==\'Netscape\') history.go(0);');\">\n"+ ! "<div id=\"layer0\" style=\"position:absolute;left:218;top:40;width:240;height:26;\">\n"+ ! "<table width=240 height=26 border=0 cellpadding=0 cellspacing=0><tr valign=\"top\">\n"+ ! "<td><b><font size=\"+2\"><span style=\"font-size:24\">NISHI-HONGWAN-JI</span></font></b></td>\n"+ ! "</tr></table></div>\n"+ ! "<div id=\"layer1\" style=\"position:absolute;left:75;top:88;width:542;height:83;\">\n"+ ! "<table width=542 height=83 border=0 cellpadding=0 cellspacing=0><tr valign=\"top\">\n"+ ! "<td><span style=\"font-size:14\">The Nihi Hongwanj-ji temple is very traditional, very old, and very beautiful. This is the place that we stayed on our first night in Kyoto. We then attended the morning prayer ceremony, at 6:30 am. Staying here costed us 7,500 yen, which was inclusive of dinner and breakfast, and usage of the o-furo (public bath). Felt more like a luxury hotel than a temple.</span></td>\n"+ ! "</tr></table></div>\n"+ ! "<div id=\"layer2\" style=\"position:absolute;left:144;top:287;width:128;height:96;\">\n"+ ! "<table width=128 height=96 border=0 cellpadding=0 cellspacing=0><tr valign=\"top\">\n"+ ! "<td><a href=\"nishi-hongwanji1.html\"><img height=96 width=128 src=\"nishi-hongwanji1-thumb.jpg\" border=0 ></a></td>\n"+ ! "</tr></table></div>\n"+ ! "<div id=\"layer3\" style=\"position:absolute;left:415;top:285;width:128;height:96;\">\n"+ ! "<table width=128 height=96 border=0 cellpadding=0 cellspacing=0><tr valign=\"top\">\n"+ ! "<td><a href=\"nishi-hongwanji3.html\"><img height=96 width=128 src=\"nishi-hongwanji2-thumb.jpg\" border=0 ></a></td>\n"+ ! "</tr></table></div>\n"+ ! "<div id=\"layer4\" style=\"position:absolute;left:414;top:182;width:128;height:96;\">\n"+ ! "<table width=128 height=96 border=0 cellpadding=0 cellspacing=0><tr valign=\"top\">\n"+ ! "<td><a href=\"higashi-hongwanji.html\"><img height=96 width=128 src=\"higashi-hongwanji-thumb.jpg\" border=0 ></a></td>\n"+ ! "</tr></table></div>\n"+ ! "<div id=\"layer5\" style=\"position:absolute;left:78;top:396;width:530;height:49;\">\n"+ ! "<table width=530 height=49 border=0 cellpadding=0 cellspacing=0><tr valign=\"top\">\n"+ ! "<td><span style=\"font-size:14\">Click on the pictures to see the full-sized versions. The picture at the top right corner is taken in Higashi-Hongwanji. Nishi means west, and Higashi means east. These two temples are adjacent to each other and represent two different Buddhist sects.</span></td>\n"+ ! "</tr></table></div>\n"+ ! "<div id=\"layer6\" style=\"position:absolute;left:143;top:180;width:128;height:102;\">\n"+ ! "<table width=128 height=102 border=0 cellpadding=0 cellspacing=0><tr valign=\"top\">\n"+ ! "<td><a href=\"nishi-hongwanji4.html\"><img height=102 width=128 src=\"nishi-hongwanji4-thumb.jpg\" border=0 ></a></td>\n"+ ! "</tr></table></div>\n"+ ! "<div id=\"layer7\" style=\"position:absolute;left:280;top:235;width:124;height:99;\">\n"+ ! "<table width=124 height=99 border=0 cellpadding=0 cellspacing=0><tr valign=\"top\">\n"+ ! "<td><a href=\"nishi-hongwanji-lodging.html\"><img height=99 width=124 src=\"nishi-hongwanji-lodging-thumb.jpg\" border=0 ></a></td>\n"+ ! "</tr></table></div>\n"+ ! "</body>\n"+ ! "</html>"); ! parser.registerScanners(); ! NodeList collectionList = new NodeList(); ! for (NodeIterator e = parser.elements();e.hasMoreNodes();) { ! Node node = e.nextNode(); ! node.collectInto(collectionList,ImageTag.IMAGE_TAG_FILTER); ! } ! assertEquals("Size of collection vector should be 5",5,collectionList.size()); ! // All items in collection vector should be links ! for (SimpleNodeIterator e = collectionList.elements();e.hasMoreNodes();) { ! Node node = e.nextNode(); ! assertTrue("Only images should have been parsed",node instanceof ImageTag); ! } ! } ! public void testRemoveScanner() throws Exception { ! createParser( ! "" ! ); ! parser.registerScanners(); ! parser.removeScanner(new FormScanner("",parser)); ! Map scanners = parser.getScanners(); ! TagScanner scanner = (TagScanner)scanners.get("FORM"); ! assertNull("shouldnt have found scanner",scanner); ! } /** --- 537,696 ---- public void testNullUrl() { ! Parser parser; ! try { ! parser = new Parser("http://someoneexisting.com", Parser.noFeedback); ! assertTrue("Should have thrown an exception!",false); ! } ! catch (ParserException e) { ! ! } ! } ! ! public void testURLWithSpaces() throws ParserException{ ! Parser parser; ! String url = "http://htmlparser.sourceforge.net/test/This is a Test Page.html"; ! ! parser = new Parser(url); ! Node node [] = new AbstractNode[30]; ! int i = 0; ! for (NodeIterator e = parser.elements();e.hasMoreNodes();) { ! node[i] = e.nextNode(); ! i++; ! ! } ! assertEquals("Expected nodes",12,i); ! } ! public void testLinkCollection() throws ParserException { ! createParser( ! "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\"><title>Google</title><style><!--\n"+ ! "body,td,a,p,.h{font-family:arial,sans-serif;} .h{font-size: 20px;} .h{color:} .q{text-decoration:none; color:#0000cc;}\n"+ ! "//--></style>\n"+ ! "<script>\n"+ ! "<!--\n"+ ! "function sf(){document.f.q.focus();}\n"+ ! "function c(p){var f=document.f;if (f.action) {f.action = 'http://'+p;f.submit();return false;}return true;}\n"+ ! "// -->\n"+ ! "</script>\n"+ ! "</head><body bgcolor=#ffffff text=#000000 link=#0000cc vlink=#551a8b alink=#ff0000 onLoad=sf()><center><table border=0 cellspacing=0 cellpadding=0><tr><td><img src=\"images/logo.gif\" width=276 height=110 alt=\"Google\"></td></tr></table><br>\n"+ ! "<table border=0 cellspacing=0 cellpadding=0>" + ! "<tr>" + ! "<td width=15> </td>" + ! "<td id=0 bgcolor=#3366cc align=center width=95 nowrap>" + ! "<font color=#ffffff size=-1><b>Web</b></font>" + ! "</td>" + ! "<td width=15> </td>" + ! "<td id=1 bgcolor=#efefef align=center width=95 nowrap onClick=\"return c('www.google.com/imghp');\" style=cursor:pointer;cursor:hand;><a id=1a class=q href=\"/imghp?hl=en&ie=UTF-8&oe=UTF-8\" onClick=\"return c('www.google.com/imghp');\"><font size=-1>Images</font></a></td><td width=15> </td><td id=2 bgcolor=#efefef align=center width=95 nowrap onClick=\"return c('www.google.com/grphp');\" style=cursor:pointer;cursor:hand;><a id=2a class=q href=\"/grphp?hl=en&ie=UTF-8&oe=UTF-8\" onClick=\"return c('www.google.com/grphp');\"><font size=-1>Groups</font></a></td><td width=15> </td><td id=3 bgcolor=#efefef align=center width=95 nowrap onClick=\"return c('www.google.com/dirhp');\" style=cursor:pointer;cursor:hand;><a id=3a class=q href=\"/dirhp?hl=en&ie=UTF-8&oe=UTF-8\" onClick=\"return c('www.google.com/dirhp');\"><font size=-1>Directory</font></a></td><td width=15> </td><td id=4 bgcolor=#efefef align=center width=95 nowrap onClick=\"return c('www.google.com/nwshp');\" style=cursor:pointer;cursor:hand;><a id=4a class=q href=\"/nwshp?hl=en&ie=UTF-8&oe=UTF-8\" onClick=\"return c('www.google.com/nwshp');\"><font size=-1><nobr>News-<font color=red>New!</font></nobr></font></a></td><td width=15> </td></tr><tr><td colspan=12 bgcolor=#3366cc><img width=1 height=1 alt=\"\">" + ! "</td>" + ! "</tr>" + ! "</table>" + ! "<br>" + ! "<form action=\"/search\" name=f>" + ! "<table cellspacing=0 cellpadding=0>" + ! "<tr>" + ! "<td width=75> </td>" + ! "<td align=center>" + ! "<input type=hidden name=hl value=en>" + ! "<input type=hidden name=ie value=\"UTF-8\">" + ! "<input type=hidden name=oe value=\"UTF-8\">" + ! "<input maxLength=256 size=55 name=q value=\"\"><br>" + ! "<input type=submit value=\"Google Search\" name=btnG>" + ! "<input type=submit value=\"I'm Feeling Lucky\" name=btnI>" + ! "</td>" + ! "<td valign=top nowrap>" + ! "<font size=-2> • <a href=/advanced_search?hl=en>Advanced Search</a>" + ! "<br> • <a href=/preferences?hl=en>Preferences</a>" + ! "<br> • <a href=/language_tools?hl=en>Language Tools</a>" + ! "</font>" + ! "</td>" + ! "</tr>" + ! "</table>" + ! "</form><br>\n"+ ! "<br><font size=-1><a href=\"/ads/\">Advertise with Us</a> - <a href=\"/services/\">Search Solutions</a> - <a href=\"/options/\">Services & Tools</a> - <a href=/about.html>Jobs, Press, & Help</a><span id=hp style=\"behavior:url(#default#homepage)\"></span>\n"+ ! "<script>\n"+ ! "if (!hp.isHomePage('http://www.google.com/')) {document.write(\"<p><a href=\"/mgyhp.html\" onClick=\"style.behavior='url(#default#homepage)';setHomePage('http://www.google.com/');\">Make Google Your Homepage!</a>\");}\n"+ ! "</script></font>\n"+ ! "<p><font size=-2>©2002 Google</font><font size=-2> - Searching 3,083,324,652 web pages</font></center></body></html>\n" ! ); ! parser.registerScanners(); ! NodeList collectionList = new NodeList(); ! for (NodeIterator e = parser.elements();e.hasMoreNodes();) { ! Node node = e.nextNode(); ! node.collectInto(collectionList,LinkTag.class); ! } ! assertEquals("Size of collection vector should be 11",11,collectionList.size()); ! // All items in collection vector should be links ! for (SimpleNodeIterator e = collectionList.elements();e.hasMoreNodes();) { ! Node node = e.nextNode(); ! assertTrue("Only links should have been parsed",node instanceof LinkTag); ! } ! } ! public void testImageCollection() throws ParserException { ! createParser( ! "<html>\n"+ ! "<head>\n"+ ! "<meta name=\"generator\" content=\"Created Using Yahoo! PageBuilder 2.60.24\">\n"+ ! "</head>\n"+ ! "<body bgcolor=\"#FFFFFF\" link=\"#0000FF\" vlink=\"#FF0000\" text=\"#000000\"\n"+ ! " onLoad=\"window.onresize=new Function('if (navigator.appVersion==\'Netscape\') history.go(0);');\">\n"+ ! "<div id=\"layer0\" style=\"position:absolute;left:218;top:40;width:240;height:26;\">\n"+ ! "<table width=240 height=26 border=0 cellpadding=0 cellspacing=0><tr valign=\"top\">\n"+ ! "<td><b><font size=\"+2\"><span style=\"font-size:24\">NISHI-HONGWAN-JI</span></font></b></td>\n"+ ! "</tr></table></div>\n"+ ! "<div id=\"layer1\" style=\"position:absolute;left:75;top:88;width:542;height:83;\">\n"+ ! "<table width=542 height=83 border=0 cellpadding=0 cellspacing=0><tr valign=\"top\">\n"+ ! "<td><span style=\"font-size:14\">The Nihi Hongwanj-ji temple is very traditional, very old, and very beautiful. This is the place that we stayed on our first night in Kyoto. We then attended the morning prayer ceremony, at 6:30 am. Staying here costed us 7,500 yen, which was inclusive of dinner and breakfast, and usage of the o-furo (public bath). Felt more like a luxury hotel than a temple.</span></td>\n"+ ! "</tr></table></div>\n"+ ! "<div id=\"layer2\" style=\"position:absolute;left:144;top:287;width:128;height:96;\">\n"+ ! "<table width=128 height=96 border=0 cellpadding=0 cellspacing=0><tr valign=\"top\">\n"+ ! "<td><a href=\"nishi-hongwanji1.html\"><img height=96 width=128 src=\"nishi-hongwanji1-thumb.jpg\" border=0 ></a></td>\n"+ ! "</tr></table></div>\n"+ ! "<div id=\"layer3\" style=\"position:absolute;left:415;top:285;width:128;height:96;\">\n"+ ! "<table width=128 height=96 border=0 cellpadding=0 cellspacing=0><tr valign=\"top\">\n"+ ! "<td><a href=\"nishi-hongwanji3.html\"><img height=96 width=128 src=\"nishi-hongwanji2-thumb.jpg\" border=0 ></a></td>\n"+ ! "</tr></table></div>\n"+ ! "<div id=\"layer4\" style=\"position:absolute;left:414;top:182;width:128;height:96;\">\n"+ ! "<table width=128 height=96 border=0 cellpadding=0 cellspacing=0><tr valign=\"top\">\n"+ ! "<td><a href=\"higashi-hongwanji.html\"><img height=96 width=128 src=\"higashi-hongwanji-thumb.jpg\" border=0 ></a></td>\n"+ ! "</tr></table></div>\n"+ ! "<div id=\"layer5\" style=\"position:absolute;left:78;top:396;width:530;height:49;\">\n"+ ! "<table width=530 height=49 border=0 cellpadding=0 cellspacing=0><tr valign=\"top\">\n"+ ! "<td><span style=\"font-size:14\">Click on the pictures to see the full-sized versions. The picture at the top right corner is taken in Higashi-Hongwanji. Nishi means west, and Higashi means east. These two temples are adjacent to each other and represent two different Buddhist sects.</span></td>\n"+ ! "</tr></table></div>\n"+ ! "<div id=\"layer6\" style=\"position:absolute;left:143;top:180;width:128;height:102;\">\n"+ ! "<table width=128 height=102 border=0 cellpadding=0 cellspacing=0><tr valign=\"top\">\n"+ ! "<td><a href=\"nishi-hongwanji4.html\"><img height=102 width=128 src=\"nishi-hongwanji4-thumb.jpg\" border=0 ></a></td>\n"+ ! "</tr></table></div>\n"+ ! "<div id=\"layer7\" style=\"position:absolute;left:280;top:235;width:124;height:99;\">\n"+ ! "<table width=124 height=99 border=0 cellpadding=0 cellspacing=0><tr valign=\"top\">\n"+ ! "<td><a href=\"nishi-hongwanji-lodging.html\"><img height=99 width=124 src=\"nishi-hongwanji-lodging-thumb.jpg\" border=0 ></a></td>\n"+ ! "</tr></table></div>\n"+ ! "</body>\n"+ ! "</html>"); ! parser.registerScanners(); ! NodeList collectionList = new NodeList(); ! for (NodeIterator e = parser.elements();e.hasMoreNodes();) { ! Node node = e.nextNode(); ! node.collectInto(collectionList,ImageTag.IMAGE_TAG_FILTER); ! } ! assertEquals("Size of collection vector should be 5",5,collectionList.size()); ! // All items in collection vector should be links ! for (SimpleNodeIterator e = collectionList.elements();e.hasMoreNodes();) { ! Node node = e.nextNode(); ! assertTrue("Only images sh... [truncated message content] |