[Htmlparser-cvs] htmlparser/src/org/htmlparser/tests/parserHelperTests AllTests.java,1.24,1.25 Compo

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/parserHelperTests
In directory sc8-pr-cvs1:/tmp/cvs-serv31228/tests/parserHelperTests

Modified Files:
	AllTests.java CompositeTagScannerHelperTest.java 
	StringParserTest.java 
Log Message:
Change tabs to spaces in all source files.

Index: AllTests.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/parserHelperTests/AllTests.java,v
retrieving revision 1.24
retrieving revision 1.25
diff -C2 -d -r1.24 -r1.25
*** AllTests.java	24 Aug 2003 21:59:43 -0000	1.24
--- AllTests.java	3 Sep 2003 23:36:21 -0000	1.25
***************
*** 39,48 ****
        public static TestSuite suite() {
          TestSuite suite = new TestSuite("Parser Helper Tests");
! 		// To-do: Test below should be enabled after it passes
  //      suite.addTestSuite(AttributeParserTest.class);
          suite.addTestSuite(CompositeTagScannerHelperTest.class);
          suite.addTestSuite(RemarkNodeParserTest.class);
          suite.addTestSuite(StringParserTest.class);
! 		// To-do: Test below should be enabled after it passes
  //      suite.addTestSuite(TagParserTest.class);

--- 39,48 ----
        public static TestSuite suite() {
          TestSuite suite = new TestSuite("Parser Helper Tests");
!         // To-do: Test below should be enabled after it passes
  //      suite.addTestSuite(AttributeParserTest.class);
          suite.addTestSuite(CompositeTagScannerHelperTest.class);
          suite.addTestSuite(RemarkNodeParserTest.class);
          suite.addTestSuite(StringParserTest.class);
!         // To-do: Test below should be enabled after it passes
  //      suite.addTestSuite(TagParserTest.class);

Index: CompositeTagScannerHelperTest.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/parserHelperTests/CompositeTagScannerHelperTest.java,v
retrieving revision 1.17
retrieving revision 1.18
diff -C2 -d -r1.17 -r1.18
*** CompositeTagScannerHelperTest.java	24 Aug 2003 21:59:43 -0000	1.17
--- CompositeTagScannerHelperTest.java	3 Sep 2003 23:36:21 -0000	1.18
***************
*** 41,70 ****
   */
  public class CompositeTagScannerHelperTest extends ParserTestCase {
! 	private CompositeTagScannerHelper helper;
! 	public CompositeTagScannerHelperTest(String name) {
! 		super(name);
! 	}

! 	protected void setUp() {
! 		helper = 
! 			new CompositeTagScannerHelper(null,null,null,null,null,false);
! 	}
! 	
! 	public void testIsXmlEndTagForRealXml() { 
! 		Tag tag = new Tag(
! 			new TagData(
! 				0,0,"something/",""
! 			)
! 		);
! 		assertTrue("should be an xml end tag",helper.isXmlEndTag(tag));
! 	}

! 	public void testIsXmlEndTagForFalseMatches() { 
! 		Tag tag = new Tag(
! 			new TagData(
! 				0,0,"a href=http://someurl.com/",""
! 			)
! 		); 
! 		assertFalse("should not be an xml end tag",helper.isXmlEndTag(tag)); 
! 	}
  }
--- 41,70 ----
   */
  public class CompositeTagScannerHelperTest extends ParserTestCase {
!     private CompositeTagScannerHelper helper;
!     public CompositeTagScannerHelperTest(String name) {
!         super(name);
!     }

!     protected void setUp() {
!         helper = 
!             new CompositeTagScannerHelper(null,null,null,null,null,false);
!     }
!     
!     public void testIsXmlEndTagForRealXml() { 
!         Tag tag = new Tag(
!             new TagData(
!                 0,0,"something/",""
!             )
!         );
!         assertTrue("should be an xml end tag",helper.isXmlEndTag(tag));
!     }

!     public void testIsXmlEndTagForFalseMatches() { 
!         Tag tag = new Tag(
!             new TagData(
!                 0,0,"a href=http://someurl.com/",""
!             )
!         ); 
!         assertFalse("should not be an xml end tag",helper.isXmlEndTag(tag)); 
!     }
  }

Index: StringParserTest.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/parserHelperTests/StringParserTest.java,v
retrieving revision 1.34
retrieving revision 1.35
diff -C2 -d -r1.34 -r1.35
*** StringParserTest.java	24 Aug 2003 21:59:43 -0000	1.34
--- StringParserTest.java	3 Sep 2003 23:36:21 -0000	1.35
***************
*** 40,209 ****
  public class StringParserTest extends ParserTestCase {

! 	public StringParserTest(String name) {
! 		super(name);
! 	}
! 	
! 	/**
! 	 * The bug being reproduced is this : 
! 	 * &lt;HTML&gt;&lt;HEAD&gt;&lt;TITLE&gt;Google&lt;/TITLE&gt; 
! 	 * The above line is incorrectly parsed in that, the text Google is missed.
! 	 * The presence of this bug is typically when some tag is identified before the string node is. (usually seen
! 	 * with the end tag). The bug lies in NodeReader.readElement().
! 	 * Creation date: (6/17/2001 4:01:06 PM)
! 	 */
! 	public void testStringNodeBug1() throws ParserException {
! 		createParser("<HTML><HEAD><TITLE>Google</TITLE>");
! 		parseAndAssertNodeCount(5);
! 		// The fourth node should be a HTMLStringNode- with the text - Google
! 		assertTrue("Fourth node should be a HTMLStringNode",node[3] instanceof StringNode);
! 		StringNode stringNode = (StringNode)node[3];
! 		assertEquals("Text of the StringNode","Google",stringNode.getText());
! 	}
! 	
! 	/**
! 	 * Bug reported by Kaarle Kaila of Nokia 
! 	 * For the following HTML :
! 	 * view these documents, you must have &lt;A href='http://www.adobe.com'&gt;Adobe 
! 	 * Acrobat Reader&lt;/A&gt; installed on your computer. 
! 	 * The first string before the link is not identified, and the space after the link is also not identified
! 	 * Creation date: (8/2/2001 2:07:32 AM)
! 	 */
! 	public void testStringNodeBug2() throws ParserException {
! 		// Register the link scanner
! 		
! 		createParser("view these documents, you must have <A href='http://www.adobe.com'>Adobe \n"+
! 			"Acrobat Reader</A> installed on your computer.");
! 		Parser.setLineSeparator("\r\n");
! 		parser.addScanner(new LinkScanner("-l"));
! 		parseAndAssertNodeCount(3);
! 		// The first node should be a HTMLStringNode- with the text - view these documents, you must have 
! 		assertTrue("First node should be a HTMLStringNode",node[0] instanceof StringNode);
! 		StringNode stringNode = (StringNode)node[0];
! 		assertEquals("Text of the StringNode","view these documents, you must have ",stringNode.getText());
! 		assertTrue("Second node should be a link node",node[1] instanceof LinkTag);
! 		LinkTag linkNode = (LinkTag)node[1];
! 		assertEquals("Link is","http://www.adobe.com",linkNode.getLink());
! 		assertEquals("Link text is","Adobe \r\nAcrobat Reader",linkNode.getLinkText());
! 	
! 		assertTrue("Third node should be a string node",node[2] instanceof StringNode);
! 		StringNode stringNode2 = (StringNode)node[2];
! 		assertEquals("Contents of third node"," installed on your computer.",stringNode2.getText());
! 	}
! 	
! 	/**
! 	 * Bug reported by Roger Sollberger 
! 	 * For the following HTML :
! 	 * &lt;a href="http://asgard.ch"&gt;[&lt; ASGARD &gt;&lt;/a&gt;&lt;br&gt;
! 	 * The string node is not correctly identified
! 	 */
! 	public void testTagCharsInStringNode() throws ParserException {
! 		createParser("<a href=\"http://asgard.ch\">[> ASGARD <]</a>");
! 		parser.addScanner(new LinkScanner("-l"));
! 		parseAndAssertNodeCount(1);
! 		assertTrue("Node identified must be a link tag",node[0] instanceof LinkTag);
! 		LinkTag linkTag = (LinkTag) node[0];
! 		assertEquals("[> ASGARD <]",linkTag.getLinkText());
! 		assertEquals("http://asgard.ch",linkTag.getLink());
! 	}
! 	
! 	public void testToPlainTextString() throws ParserException {
! 		createParser("<HTML><HEAD><TITLE>This is the Title</TITLE></HEAD><BODY>Hello World, this is the HTML Parser</BODY></HTML>");
! 		parseAndAssertNodeCount(10);
! 		assertTrue("Fourth Node identified must be a string node",node[3] instanceof StringNode);
! 		StringNode stringNode = (StringNode)node[3];
! 		assertEquals("First String Node","This is the Title",stringNode.toPlainTextString());
! 		assertTrue("Eighth Node identified must be a string node",node[7] instanceof StringNode);
! 		stringNode = (StringNode)node[7];
! 		assertEquals("Second string node","Hello World, this is the HTML Parser",stringNode.toPlainTextString());
! 	}
! 	
! 	public void testToHTML() throws ParserException {
! 		createParser("<HTML><HEAD><TITLE>This is the Title</TITLE></HEAD><BODY>Hello World, this is the HTML Parser</BODY></HTML>");
! 		parseAndAssertNodeCount(10);
! 		assertTrue("Fourth Node identified must be a string node",node[3] instanceof StringNode);
! 		StringNode stringNode = (StringNode)node[3];
! 		assertEquals("First String Node","This is the Title",stringNode.toHtml());
! 		assertTrue("Eighth Node identified must be a string node",node[7] instanceof StringNode);
! 		stringNode = (StringNode)node[7];
! 		assertEquals("Second string node","Hello World, this is the HTML Parser",stringNode.toHtml());
! 	}

! 	public void testEmptyLines() throws ParserException {
! 		createParser(
! 		"David Nirenberg (Center for Advanced Study in the Behavorial Sciences, Stanford).<br>\n"+
! 		"                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      \n"+
! 		"<br>"
! 		);
! 		parseAndAssertNodeCount(4);
! 		assertTrue("Third Node identified must be a string node",node[2] instanceof StringNode);
! 	}

! 	/**
! 	 * This is a bug reported by John Zook (586222), where the first few chars
! 	 * before a remark is being missed, if its on the same line.
! 	 */
! 	public void testStringBeingMissedBug() throws ParserException {
! 		createParser(
! 		"Before Comment  After Comment"
! 		);
! 		parseAndAssertNodeCount(3);
! 		assertTrue("First node should be HTMLStringNode",node[0] instanceof StringNode);
! 		assertTrue("Second node should be HTMLRemarkNode",node[1] instanceof RemarkNode);
! 		assertTrue("Third node should be HTMLStringNode",node[2] instanceof StringNode);
! 		StringNode stringNode = (StringNode)node[0];
! 		assertEquals("First String node contents","Before Comment ",stringNode.getText());
! 		StringNode stringNode2 = (StringNode)node[2];
! 		assertEquals("Second String node contents"," After Comment",stringNode2.getText());
! 		RemarkNode remarkNode = (RemarkNode)node[1];
! 		assertEquals("Remark Node contents"," Comment ",remarkNode.getText());
! 			
! 	}

! 	/**
! 	 * Based on a bug report submitted by Cedric Rosa, if the last line contains a single character,
! 	 * StringNode does not return the string node correctly.
! 	 */
! 	public void testLastLineWithOneChar() throws ParserException {
! 		createParser("a");
! 		parseAndAssertNodeCount(1);
! 		assertTrue("First node should be HTMLStringNode",node[0] instanceof StringNode);
! 		StringNode stringNode = (StringNode)node[0];
! 		assertEquals("First String node contents","a",stringNode.getText());
! 	}
! 	
! 	public void testStringWithEmptyLine() throws ParserException {
! 		createParser("a\n\nb");
! 		parseAndAssertNodeCount(1);
! 		assertTrue("First node should be HTMLStringNode",node[0] instanceof StringNode);
! 		StringNode stringNode = (StringNode)node[0];
! 		assertStringEquals("First String node contents","a\r\n\r\nb",stringNode.getText());
! 	}	
! 	
! 	/**
! 	 * An attempt to reproduce bug 677176, which passes.
! 	 * @throws Exception
! 	 */
! 	public void testStringParserBug() throws Exception {
! 		createParser(
! 			"<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 " +
			"Transitional//EN\">" +
			"<html>" +
			"<head>" +
			"<title>Untitled Document</title>" +
			"<meta http-equiv=\"Content-Type\" content=\"text/html; " +
			"charset=iso-8859-1\">" +
			"</head>" +
			"<script language=\"JavaScript\" type=\"text/JavaScript\">" +
			"// if this fails, output a 'hello' " +
			"if (true) " +
			"{ " +
			"//something good... " +
			"} " +
			"</script>" +
			"<body>" +
			"</body>" +
			"</html>" 
		);	
! 		parser.registerScanners();
! 		parseAndAssertNodeCount(10);
! 		assertType("fourth node",MetaTag.class,node[4]);
! 		MetaTag metaTag = (MetaTag)node[4];
! 		
! 		assertStringEquals(
! 			"content",
! 			"text/html; charset=iso-8859-1",
! 			metaTag.getAttribute("CONTENT")
! 		);
! 	}
! 	
! 	public void testStringWithLineBreaks() throws Exception {
! 		createParser("Testing &\nRefactoring");
! 		parseAndAssertNodeCount(1);
! 		assertType("first node",StringNode.class,node[0]);
! 		StringNode stringNode = (StringNode)node[0];
! 		assertStringEquals("text","Testing &\r\nRefactoring",stringNode.toPlainTextString());
! 	}
! 	
  }
--- 40,227 ----
  public class StringParserTest extends ParserTestCase {

! public StringParserTest(String name) {
! super(name);
! }
! 
! /**
! * The bug being reproduced is this : 
! * &lt;HTML&gt;&lt;HEAD&gt;&lt;TITLE&gt;Google&lt;/TITLE&gt; 
! * The above line is incorrectly parsed in that, the text Google is missed.
! * The presence of this bug is typically when some tag is identified before the string node is. (usually seen
! * with the end tag). The bug lies in NodeReader.readElement().
! * Creation date: (6/17/2001 4:01:06 PM)
! */
! public void testStringNodeBug1() throws ParserException {
! createParser("<HTML><HEAD><TITLE>Google</TITLE>");
! parseAndAssertNodeCount(5);
! // The fourth node should be a HTMLStringNode- with the text - Google
! assertTrue("Fourth node should be a HTMLStringNode",node[3] instanceof StringNode);
! StringNode stringNode = (StringNode)node[3];
! assertEquals("Text of the StringNode","Google",stringNode.getText());
! }
! 
! /**
! * Bug reported by Kaarle Kaila of Nokia 
! * For the following HTML :
! * view these documents, you must have &lt;A href='http://www.adobe.com'&gt;Adobe 
! * Acrobat Reader&lt;/A&gt; installed on your computer. 
! * The first string before the link is not identified, and the space after the link is also not identified
! * Creation date: (8/2/2001 2:07:32 AM)
! */
! public void testStringNodeBug2() throws ParserException {
! // Register the link scanner
! 
! createParser("view these documents, you must have <A href='http://www.adobe.com'>Adobe \n"+
! "Acrobat Reader</A> installed on your computer.");
! Parser.setLineSeparator("\r\n");
! parser.addScanner(new LinkScanner("-l"));
! parseAndAssertNodeCount(3);
! // The first node should be a HTMLStringNode- with the text - view these documents, you must have 
! assertTrue("First node should be a HTMLStringNode",node[0] instanceof StringNode);
! StringNode stringNode = (StringNode)node[0];
! assertEquals("Text of the StringNode","view these documents, you must have ",stringNode.getText());
! assertTrue("Second node should be a link node",node[1] instanceof LinkTag);
! LinkTag linkNode = (LinkTag)node[1];
! assertEquals("Link is","http://www.adobe.com",linkNode.getLink());
! assertEquals("Link text is","Adobe \r\nAcrobat Reader",linkNode.getLinkText());
! 
! assertTrue("Third node should be a string node",node[2] instanceof StringNode);
! StringNode stringNode2 = (StringNode)node[2];
! assertEquals("Contents of third node"," installed on your computer.",stringNode2.getText());
! }
! 
! /**
! * Bug reported by Roger Sollberger 
! * For the following HTML :
! * &lt;a href="http://asgard.ch"&gt;[&lt; ASGARD &gt;&lt;/a&gt;&lt;br&gt;
! * The string node is not correctly identified
! */
! public void testTagCharsInStringNode() throws ParserException {
! createParser("<a href=\"http://asgard.ch\">[> ASGARD <]</a>");
! parser.addScanner(new LinkScanner("-l"));
! parseAndAssertNodeCount(1);
! assertTrue("Node identified must be a link tag",node[0] instanceof LinkTag);
! LinkTag linkTag = (LinkTag) node[0];
! assertEquals("[> ASGARD <]",linkTag.getLinkText());
! assertEquals("http://asgard.ch",linkTag.getLink());
! }
! 
! public void testToPlainTextString() throws ParserException {
! createParser("<HTML><HEAD><TITLE>This is the Title</TITLE></HEAD><BODY>Hello World, this is the HTML Parser</BODY></HTML>");
! parseAndAssertNodeCount(10);
! assertTrue("Fourth Node identified must be a string node",node[3] instanceof StringNode);
! StringNode stringNode = (StringNode)node[3];
! assertEquals("First String Node","This is the Title",stringNode.toPlainTextString());
! assertTrue("Eighth Node identified must be a string node",node[7] instanceof StringNode);
! stringNode = (StringNode)node[7];
! assertEquals("Second string node","Hello World, this is the HTML Parser",stringNode.toPlainTextString());
! }
! 
! public void testToHTML() throws ParserException {
! createParser("<HTML><HEAD><TITLE>This is the Title</TITLE></HEAD><BODY>Hello World, this is the HTML Parser</BODY></HTML>");
! parseAndAssertNodeCount(10);
! assertTrue("Fourth Node identified must be a string node",node[3] instanceof StringNode);
! StringNode stringNode = (StringNode)node[3];
! assertEquals("First String Node","This is the Title",stringNode.toHtml());
! assertTrue("Eighth Node identified must be a string node",node[7] instanceof StringNode);
! stringNode = (StringNode)node[7];
! assertEquals("Second string node","Hello World, this is the HTML Parser",stringNode.toHtml());
! }

!     public void testEmptyLines() throws ParserException {
!         createParser(
!         "David Nirenberg (Center for Advanced Study in the Behavorial Sciences, Stanford).<br>\n"+
!         "                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      \n"+
!         "<br>"
!         );
!         parseAndAssertNodeCount(4);
!         assertTrue("Third Node identified must be a string node",node[2] instanceof StringNode);
!     }

! /**
! * This is a bug reported by John Zook (586222), where the first few chars
! * before a remark is being missed, if its on the same line.
! */
! public void testStringBeingMissedBug() throws ParserException {
! createParser(
! "Before Comment  After Comment"
! );
! parseAndAssertNodeCount(3);
! assertTrue("First node should be HTMLStringNode",node[0] instanceof StringNode);
! assertTrue("Second node should be HTMLRemarkNode",node[1] instanceof RemarkNode);
! assertTrue("Third node should be HTMLStringNode",node[2] instanceof StringNode);
! StringNode stringNode = (StringNode)node[0];
! assertEquals("First String node contents","Before Comment ",stringNode.getText());
! StringNode stringNode2 = (StringNode)node[2];
! assertEquals("Second String node contents"," After Comment",stringNode2.getText());
! RemarkNode remarkNode = (RemarkNode)node[1];
! assertEquals("Remark Node contents"," Comment ",remarkNode.getText());
! 
! }

!     /**
!      * Based on a bug report submitted by Cedric Rosa, if the last line contains a single character,
!      * StringNode does not return the string node correctly.
!      */
!     public void testLastLineWithOneChar() throws ParserException {
!         createParser("a");
!         parseAndAssertNodeCount(1);
!         assertTrue("First node should be HTMLStringNode",node[0] instanceof StringNode);
!         StringNode stringNode = (StringNode)node[0];
!         assertEquals("First String node contents","a",stringNode.getText());
!     }
!     
!     public void testStringWithEmptyLine() throws ParserException {
!         createParser("a\n\nb");
!         parseAndAssertNodeCount(1);
!         assertTrue("First node should be HTMLStringNode",node[0] instanceof StringNode);
!         StringNode stringNode = (StringNode)node[0];
!         assertStringEquals("First String node contents","a\r\n\r\nb",stringNode.getText());
!     }   
!     
!     /**
!      * An attempt to reproduce bug 677176, which passes.
!      * @throws Exception
!      */
!     public void testStringParserBug() throws Exception {
!         createParser(
!             "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 " +
!             "Transitional//EN\">" +
!             "<html>" +
!             "<head>" +
!             "<title>Untitled Document</title>" +
!             "<meta http-equiv=\"Content-Type\" content=\"text/html; " +
!             "charset=iso-8859-1\">" +
!             "</head>" +
!             "<script language=\"JavaScript\" type=\"text/JavaScript\">" +
!             "// if this fails, output a 'hello' " +
!             "if (true) " +
!             "{ " +
!             "//something good... " +
!             "} " +
!             "</script>" +
!             "<body>" +
!             "</body>" +
!             "</html>" 
!         );  
!         parser.registerScanners();
!         parseAndAssertNodeCount(10);
!         assertType("fourth node",MetaTag.class,node[4]);
!         MetaTag metaTag = (MetaTag)node[4];
!         
!         assertStringEquals(
!             "content",
!             "text/html; charset=iso-8859-1",
!             metaTag.getAttribute("CONTENT")
!         );
!     }
!     
!     public void testStringWithLineBreaks() throws Exception {
!         createParser("Testing &\nRefactoring");
!         parseAndAssertNodeCount(1);
!         assertType("first node",StringNode.class,node[0]);
!         StringNode stringNode = (StringNode)node[0];
!         assertStringEquals("text","Testing &\r\nRefactoring",stringNode.toPlainTextString());
!     }
!     
  }

[Htmlparser-cvs] htmlparser/src/org/htmlparser/tests/parserHelperTests AllTests.java,1.24,1.25 Compo

[Htmlparser-cvs] htmlparser/src/org/htmlparser/tests/parserHelperTests AllTests.java,1.24,1.25 CompositeTagScannerHelperTest.java,1.17,1.18 StringParserTest.java,1.34,1.35