[Htmlparser-cvs] htmlparser/src/org/htmlparser/tests ParserTest.java,1.51,1.52

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests
In directory sc8-pr-cvs1:/tmp/cvs-serv27740

Modified Files:
	ParserTest.java 
Log Message:
Added testcases but was unable to reproduce the following bugs in the version 1.4 codebase:
839264 toHtml() parse error in Javascripts with "form" keyword
833592 DOCTYPE element is not parsed correctly
826764 ParserException occurs only when using setInputHTML() instea
825820 Words conjoined
825645 <input> not getting parsed inside table
813838 links not parsed correctly
and
#851882 zero length alt tag causes bug in ImageScanner
#832530 empty attribute causes parser to fail
#805598 attribute src in tag img sometimes not correctly parsed
(these 3 are all the same bug, duplicates of the following):
#753012 IMG SRC not parsed v1.3 & v1.4
#755929 Empty string attr. value causes attr parsing to be stopped
#778781 SRC-attribute suppression in IMG-tags
Also reviewed these test cases, again, with none reproducible in 1.4:
#788746 parser crashes on comments like <!-- foobar --!>
#772700 Jsp Tags are not parsed correctly when in quoted attributes.


Index: ParserTest.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/ParserTest.java,v
retrieving revision 1.51
retrieving revision 1.52
diff -C2 -d -r1.51 -r1.52
*** ParserTest.java	8 Dec 2003 01:31:53 -0000	1.51
--- ParserTest.java	2 Jan 2004 05:01:28 -0000	1.52
***************
*** 29,33 ****
--- 29,36 ----
  package org.htmlparser.tests;
  
+ import java.io.BufferedInputStream;
+ import java.io.DataInputStream;
  import java.io.File;
+ import java.io.FileInputStream;
  import java.io.FileWriter;
  import java.io.PrintWriter;
***************
*** 749,752 ****
--- 752,851 ----
          }
          assertEquals("Expected nodes",21,i);
+     }
+ 
+     /**
+      * See bug #826764 ParserException occurs only when using setInputHTML() instea
+      */
+     public void testSetInputHTML () throws Exception
+     {
+         String html;
+         String path;
+         File file;
+         PrintWriter out;
+         Node[] nodes;
+ 
+         html = "<html></html>";
+         createParser (html);
+         path = System.getProperty ("user.dir");
+         if (!path.endsWith (File.separator))
+             path += File.separator;
+         file = new File (path + "delete_me.html");
+         try
+         {
+             out = new PrintWriter (new FileWriter (file));
+             out.print ("<html>\r\n");
+             out.print ("<head>\r\n");
+             out.print ("<!-- BEGIN TYPE -->\r\n");
+             out.print ("<!-- NAVIGATION -->\r\n");
+             out.print ("<!-- END TYPE -->\r\n");
+             out.print ("<!-- BEGIN TITLE -->\r\n");
+             out.print ("<title>Einstiegsseite</title>\r\n");
+             out.print ("<!-- END TITLE -->\r\n");
+             out.print ("</head>\r\n");
+             out.print ("<body>\r\n");
+             out.print ("<ul>\r\n");
+             out.print ("<li>\r\n");
+             out.print ("<!-- BEGIN ITEM -->\r\n");
+             out.print ("<!-- BEGIN REF -->\r\n");
+             out.print ("<a href=\"kapitel1/index.html\">\r\n");
+             out.print ("<!-- END REF -->\r\n");
+             out.print ("<!-- BEGIN REFTITLE -->\r\n");
+             out.print ("Kapitel 1\r\n");
+             out.print ("<!-- END REFTITLE -->\r\n");
+             out.print ("</a>\r\n");
+             out.print ("<!-- END ITEM -->\r\n");
+             out.print ("</li>\r\n");
+             out.print ("<li>\r\n");
+             out.print ("<!-- BEGIN ITEM -->\r\n");
+             out.print ("<!-- BEGIN REF -->\r\n");
+             out.print ("<a href=\"kapitel2/index.html\">\r\n");
+             out.print ("<!-- END REF -->\r\n");
+             out.print ("<!-- BEGIN REFTITLE -->\r\n");
+             out.print ("Kapitel 2\r\n");
+             out.print ("<!-- END REFTITLE -->\r\n");
+             out.print ("</a>\r\n");
+             out.print ("<!-- END ITEM -->\r\n");
+             out.print ("</li>\r\n");
+             out.print ("<li>\r\n");
+             out.print ("<!-- BEGIN ITEM -->\r\n");
+             out.print ("<!-- BEGIN REF -->\r\n");
+             out.print ("<a href=\"kapitel3/index.html\">\r\n");
+             out.print ("<!-- END REF -->\r\n");
+             out.print ("<!-- BEGIN REFTITLE -->\r\n");
+             out.print ("Kapitel 3\r\n");
+             out.print ("<!-- END REFTITLE -->\r\n");
+             out.print ("</a>\r\n");
+             out.print ("<!-- END ITEM -->\r\n");
+             out.print ("</li>\r\n");
+             out.print ("</ul>\r\n");
+             out.print ("</body>\r\n");
+             out.print ("</html>");
+             out.close ();
+             DataInputStream stream = new DataInputStream (
+                 new BufferedInputStream (new FileInputStream (file)));
+             byte[] buffer = new byte[(int)file.length ()];
+             stream.readFully (buffer);
+             html = new String (buffer);
+             try
+             {
+                 parser.setInputHTML (html);
+                 nodes = parser.extractAllNodesThatAre (LinkTag.class);
+             }
+             catch (ParserException e)
+             {
+                 e.printStackTrace ();
+                 nodes = new Node[0];
+             }
+             int count = nodes.length;
+             assertTrue ("node count", 3 == nodes.length);
+         }
+         catch (Exception e)
+         {
+             fail (e.toString ());
+         }
+         finally
+         {
+             file.delete ();
+         }
      }
  }