htmlparser-cvs Mailing List for HTML Parser (Page 26)

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/lexerTests
In directory sc8-pr-cvs1:/tmp/cvs-serv27740/lexerTests

Modified Files:
	AttributeTests.java LexerTests.java 
Log Message:
Added testcases but was unable to reproduce the following bugs in the version 1.4 codebase:
839264 toHtml() parse error in Javascripts with "form" keyword
833592 DOCTYPE element is not parsed correctly
826764 ParserException occurs only when using setInputHTML() instea
825820 Words conjoined
825645 <input> not getting parsed inside table
813838 links not parsed correctly
and
#851882 zero length alt tag causes bug in ImageScanner
#832530 empty attribute causes parser to fail
#805598 attribute src in tag img sometimes not correctly parsed
(these 3 are all the same bug, duplicates of the following):
#753012 IMG SRC not parsed v1.3 & v1.4
#755929 Empty string attr. value causes attr parsing to be stopped
#778781 SRC-attribute suppression in IMG-tags
Also reviewed these test cases, again, with none reproducible in 1.4:
#788746 parser crashes on comments like <!-- foobar --!>
#772700 Jsp Tags are not parsed correctly when in quoted attributes.


Index: AttributeTests.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/lexerTests/AttributeTests.java,v
retrieving revision 1.8
retrieving revision 1.9
diff -C2 -d -r1.8 -r1.9
*** AttributeTests.java	8 Dec 2003 01:31:53 -0000	1.8
--- AttributeTests.java	2 Jan 2004 05:01:28 -0000	1.9
***************
*** 38,41 ****
--- 38,42 ----
  import org.htmlparser.lexer.nodes.Attribute;
  import org.htmlparser.lexer.nodes.PageAttribute;
+ import org.htmlparser.tags.ImageTag;
  import org.htmlparser.tags.Tag;
  import org.htmlparser.tests.ParserTestCase;
***************
*** 519,522 ****
--- 520,604 ----
          assertTrue ("Attribute missing", table.containsKey ("OTHER"));
          assertEquals ("Attribute has wrong value", "fred", (String)table.get ("OTHER"));
+     }
+     
+     /**
+      * see bug #778781 SRC-attribute suppression in IMG-tags
+      * & #753012 IMG SRC not parsed v1.3 & v1.4
+      * & #755929 Empty string attr. value causes attr parsing to be stopped
+      * & #778781 SRC-attribute suppression in IMG-tags
+      * & #832530 empty attribute causes parser to fail
+      * & #851882 zero length alt tag causes bug in ImageScanner
+      *
+      *    HTML before parse:
+      *    <img src="images/first" alt="first">"
+      *    <img src="images/second" alt="">
+      *    <img alt="third" src="images/third">
+      *    <img alt="" src="images/fourth">
+      *
+      *    HTML after parse:
+      *    <IMG ALT="first" SRC="images/first">
+      *    <IMG ALT="" SRC="images/second">
+      *    <IMG ALT="third" SRC="images/third">
+      *    <IMG ALT="">
+      */
+     public void testSrcAndAlt () throws ParserException
+     {
+         String html = "<img src=\"images/first\" alt=\"first\">";
+ 
+         createParser (html);
+         parseAndAssertNodeCount (1);
+         assertTrue ("Node should be an ImageTag", node[0] instanceof ImageTag);
+         ImageTag img = (ImageTag)node[0];
+         assertTrue ("bad source", "images/first".equals (img.getImageURL ()));
+         assertTrue ("bad alt", "first".equals (img.getAttribute ("alt")));
+         assertStringEquals ("toHtml()", html, img.toHtml ());
+     }
+ 
+     /**
+      * see bug #778781 SRC-attribute suppression in IMG-tags
+      */
+     public void testSrcAndEmptyAlt () throws ParserException
+     {
+         String html = "<img src=\"images/second\" alt=\"\">";
+ 
+         createParser (html);
+         parseAndAssertNodeCount (1);
+         assertTrue ("Node should be an ImageTag", node[0] instanceof ImageTag);
+         ImageTag img = (ImageTag)node[0];
+         assertTrue ("bad source", "images/second".equals (img.getImageURL ()));
+         assertTrue ("bad alt", "".equals (img.getAttribute ("alt")));
+         assertStringEquals ("toHtml()", html, img.toHtml ());
+     }
+ 
+     /**
+      * see bug #778781 SRC-attribute suppression in IMG-tags
+      */
+     public void testAltAndSrc () throws ParserException
+     {
+         String html = "<img alt=\"third\" src=\"images/third\">";
+ 
+         createParser (html);
+         parseAndAssertNodeCount (1);
+         assertTrue ("Node should be an ImageTag", node[0] instanceof ImageTag);
+         ImageTag img = (ImageTag)node[0];
+         assertTrue ("bad source", "images/third".equals (img.getImageURL ()));
+         assertTrue ("bad alt", "third".equals (img.getAttribute ("alt")));
+         assertStringEquals ("toHtml()", html, img.toHtml ());
+     }
+ 
+     /**
+      * see bug #778781 SRC-attribute suppression in IMG-tags
+      */
+     public void testEmptyAltAndSrc () throws ParserException
+     {
+         String html = "<img alt=\"\" src=\"images/third\">";
+ 
+         createParser (html);
+         parseAndAssertNodeCount (1);
+         assertTrue ("Node should be an ImageTag", node[0] instanceof ImageTag);
+         ImageTag img = (ImageTag)node[0];
+         assertTrue ("bad source", "images/third".equals (img.getImageURL ()));
+         assertTrue ("bad alt", "".equals (img.getAttribute ("alt")));
+         assertStringEquals ("toHtml()", html, img.toHtml ());
      }
  }

Index: LexerTests.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/lexerTests/LexerTests.java,v
retrieving revision 1.13
retrieving revision 1.14
diff -C2 -d -r1.13 -r1.14
*** LexerTests.java	31 Dec 2003 14:40:50 -0000	1.13
--- LexerTests.java	2 Jan 2004 05:01:28 -0000	1.14
***************
*** 697,700 ****
--- 697,750 ----
      }
  
+     /**
+      * See bug #825820 Words conjoined
+      */
+     public void testConjoined ()
+         throws
+             ParserException
+     {
+         StringBuffer buffer;
+         NodeIterator iterator;
+         Node node;
+         String expected;
+ 
+         expected = "The Title\nThis is the body.";
+         String html1 = "<html><title>The Title\n</title>" +
+             "<body>This is <a href=\"foo.html\">the body</a>.</body></html>";
+         createParser (html1);
+         buffer = new StringBuffer ();
+         for (iterator = parser.elements (); iterator.hasMoreNodes (); )
+         {
+             node = iterator.nextNode ();
+             String text = node.toPlainTextString ();
+             buffer.append (text);
+         }
+         assertStringEquals ("conjoined text", expected, buffer.toString ());
+ 
+         String html2 = "<html><title>The Title</title>\n" +
+             "<body>This is <a href=\"foo.html\">the body</a>.</body></html>";
+         createParser (html2);
+         buffer = new StringBuffer ();
+         for (iterator = parser.elements (); iterator.hasMoreNodes (); )
+         {
+             node = iterator.nextNode ();
+             String text = node.toPlainTextString ();
+             buffer.append (text);
+         }
+         assertStringEquals ("conjoined text", expected, buffer.toString ());
+         
+         String html3 = "<html><title>The Title</title>" +
+             "<body>\nThis is <a href=\"foo.html\">the body</a>.</body></html>";
+         createParser (html3);
+         buffer = new StringBuffer ();
+         for (iterator = parser.elements (); iterator.hasMoreNodes (); )
+         {
+             node = iterator.nextNode ();
+             String text = node.toPlainTextString ();
+             buffer.append (text);
+         }
+         assertStringEquals ("conjoined text", expected, buffer.toString ());
+     }
+ 
  }

2003	Jan	Feb	Mar	Apr	May (141)	Jun (108)	Jul (66)	Aug (127)	Sep (155)	Oct (149)	Nov (72)	Dec (72)
2004	Jan (100)	Feb (36)	Mar (21)	Apr (3)	May (87)	Jun (28)	Jul (84)	Aug (5)	Sep (14)	Oct	Nov	Dec
2005	Jan (1)	Feb (39)	Mar (26)	Apr (38)	May (14)	Jun (10)	Jul	Aug	Sep (13)	Oct (8)	Nov (10)	Dec
2006	Jan	Feb (1)	Mar (17)	Apr (20)	May (28)	Jun (24)	Jul	Aug	Sep	Oct	Nov	Dec
2015	Jan	Feb	Mar (1)	Apr	May	Jun	Jul	Aug	Sep	Oct	Nov	Dec

htmlparser-cvs Mailing List for HTML Parser (Page 26)

htmlparser-cvs — syncmail email notification of CVS commits