htmlparser-cvs Mailing List for HTML Parser (Page 36)
Brought to you by:
derrickoswald
You can subscribe to this list here.
2003 |
Jan
|
Feb
|
Mar
|
Apr
|
May
(141) |
Jun
(108) |
Jul
(66) |
Aug
(127) |
Sep
(155) |
Oct
(149) |
Nov
(72) |
Dec
(72) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2004 |
Jan
(100) |
Feb
(36) |
Mar
(21) |
Apr
(3) |
May
(87) |
Jun
(28) |
Jul
(84) |
Aug
(5) |
Sep
(14) |
Oct
|
Nov
|
Dec
|
2005 |
Jan
(1) |
Feb
(39) |
Mar
(26) |
Apr
(38) |
May
(14) |
Jun
(10) |
Jul
|
Aug
|
Sep
(13) |
Oct
(8) |
Nov
(10) |
Dec
|
2006 |
Jan
|
Feb
(1) |
Mar
(17) |
Apr
(20) |
May
(28) |
Jun
(24) |
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
2015 |
Jan
|
Feb
|
Mar
(1) |
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
From: <der...@us...> - 2003-10-19 10:57:05
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags In directory sc8-pr-cvs1:/tmp/cvs-serv10542/tags Modified Files: AppletTag.java Log Message: Partition Attribute into a base class and PageAttribute class for the Lexer. Fixed the AppletTag.setAppletParams in a cheesy manner. Clear out the released NodeList entry on remove(). Dropped the HTMLTagParserTest tests, because they really weren't relevant any more. Index: AppletTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/AppletTag.java,v retrieving revision 1.30 retrieving revision 1.31 diff -C2 -d -r1.30 -r1.31 *** AppletTag.java 28 Sep 2003 15:33:58 -0000 1.30 --- AppletTag.java 18 Oct 2003 20:50:37 -0000 1.31 *************** *** 31,36 **** --- 31,39 ---- import java.util.Enumeration; import java.util.Hashtable; + import java.util.Vector; import org.htmlparser.Node; + import org.htmlparser.StringNode; + import org.htmlparser.lexer.nodes.Attribute; import org.htmlparser.tags.data.CompositeTagData; import org.htmlparser.tags.data.TagData; *************** *** 167,172 **** String paramName; String paramValue; ! String s; ! TagData tagData; kids = getChildren (); --- 170,176 ---- String paramName; String paramValue; ! Vector attributes; ! Attribute attribute; ! StringNode string; kids = getChildren (); *************** *** 177,181 **** --- 181,198 ---- if (node instanceof Tag) if (((Tag)node).getTagName ().equals ("PARAM")) + { kids.remove (i); + // remove whitespace too + if (i < kids.size ()) + { + node = kids.elementAt (i); + if (node instanceof StringNode) + { + string = (StringNode)node; + if (0 == string.getText ().trim ().length ()) + kids.remove (i); + } + } + } else i++; *************** *** 187,196 **** for (Enumeration e = newAppletParams.keys (); e.hasMoreElements (); ) { paramName = (String)e.nextElement (); paramValue = (String)newAppletParams.get (paramName); ! s = "PARAM VALUE=\"" + paramValue + "\" NAME=\"" + paramName + "\""; ! throw new IllegalStateException ("not implemented"); ! // tagData = new TagData (0, 0, 0, 0, s, s, "", false); // what, no URL? ! // kids.add (new Tag (tagData)); } --- 204,224 ---- for (Enumeration e = newAppletParams.keys (); e.hasMoreElements (); ) { + attributes = new Vector (); // should the tag copy the attributes? paramName = (String)e.nextElement (); paramValue = (String)newAppletParams.get (paramName); ! attribute = new Attribute ("PARAM", null); ! System.out.println (attribute); ! attributes.addElement (attribute); ! attributes.addElement (new Attribute (" ")); ! attribute = new Attribute ("VALUE", paramValue, '"'); ! System.out.println (attribute); ! attributes.addElement (attribute); ! attributes.addElement (new Attribute (" ")); ! attribute = new Attribute ("NAME", paramName, '"'); ! System.out.println (attribute); ! attributes.addElement (attribute); ! tag = new Tag (null, 0, 0, attributes); ! System.out.println (tag.toHtml ()); ! kids.add (tag); } |
From: <der...@us...> - 2003-10-19 08:18:52
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer In directory sc8-pr-cvs1:/tmp/cvs-serv10542/lexer Modified Files: Lexer.java Log Message: Partition Attribute into a base class and PageAttribute class for the Lexer. Fixed the AppletTag.setAppletParams in a cheesy manner. Clear out the released NodeList entry on remove(). Dropped the HTMLTagParserTest tests, because they really weren't relevant any more. Index: Lexer.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Lexer.java,v retrieving revision 1.13 retrieving revision 1.14 diff -C2 -d -r1.13 -r1.14 *** Lexer.java 13 Oct 2003 21:48:12 -0000 1.13 --- Lexer.java 18 Oct 2003 20:50:36 -0000 1.14 *************** *** 42,46 **** import org.htmlparser.Node; import org.htmlparser.lexer.nodes.AbstractNode; ! import org.htmlparser.lexer.nodes.Attribute; import org.htmlparser.lexer.nodes.NodeFactory; import org.htmlparser.lexer.nodes.RemarkNode; --- 42,46 ---- import org.htmlparser.Node; import org.htmlparser.lexer.nodes.AbstractNode; ! import org.htmlparser.lexer.nodes.PageAttribute; import org.htmlparser.lexer.nodes.NodeFactory; import org.htmlparser.lexer.nodes.RemarkNode; *************** *** 374,409 **** { if (bookmarks[1] > bookmarks[0]) ! attributes.addElement (new Attribute (mPage, -1, -1, bookmarks[0], bookmarks[1], (char)0)); ! //attributes.addElement (new Attribute (null, mPage.getText (bookmarks[0], bookmarks[1]), (char)0)); } private void standalone (Vector attributes, int[] bookmarks) { ! attributes.addElement (new Attribute (mPage, bookmarks[1], bookmarks[2], -1, -1, (char)0)); ! //attributes.addElement (new Attribute (mPage.getText (bookmarks[1], bookmarks[2]), null, (char)0)); } private void empty (Vector attributes, int[] bookmarks) { ! attributes.addElement (new Attribute (mPage, bookmarks[1], bookmarks[2], bookmarks[2] + 1, -1, (char)0)); ! //attributes.addElement (new Attribute (mPage.getText (bookmarks[1], bookmarks[2]), "", (char)0)); } private void naked (Vector attributes, int[] bookmarks) { ! attributes.addElement (new Attribute (mPage, bookmarks[1], bookmarks[2], bookmarks[3], bookmarks[4], (char)0)); ! //attributes.addElement (new Attribute (mPage.getText (bookmarks[1], bookmarks[2]), mPage.getText (bookmarks[3], bookmarks[4]), (char)0)); } private void single_quote (Vector attributes, int[] bookmarks) { ! attributes.addElement (new Attribute (mPage, bookmarks[1], bookmarks[2], bookmarks[4] + 1, bookmarks[5], '\'')); ! //attributes.addElement (new Attribute (mPage.getText (bookmarks[1], bookmarks[2]), mPage.getText (bookmarks[4] + 1, bookmarks[5]), '\'')); } private void double_quote (Vector attributes, int[] bookmarks) { ! attributes.addElement (new Attribute (mPage, bookmarks[1], bookmarks[2], bookmarks[5] + 1, bookmarks[6], '"')); ! //attributes.addElement (new Attribute (mPage.getText (bookmarks[1], bookmarks[2]), mPage.getText (bookmarks[5] + 1, bookmarks[6]), '"')); } --- 374,403 ---- { if (bookmarks[1] > bookmarks[0]) ! attributes.addElement (new PageAttribute (mPage, -1, -1, bookmarks[0], bookmarks[1], (char)0)); } private void standalone (Vector attributes, int[] bookmarks) { ! attributes.addElement (new PageAttribute (mPage, bookmarks[1], bookmarks[2], -1, -1, (char)0)); } private void empty (Vector attributes, int[] bookmarks) { ! attributes.addElement (new PageAttribute (mPage, bookmarks[1], bookmarks[2], bookmarks[2] + 1, -1, (char)0)); } private void naked (Vector attributes, int[] bookmarks) { ! attributes.addElement (new PageAttribute (mPage, bookmarks[1], bookmarks[2], bookmarks[3], bookmarks[4], (char)0)); } private void single_quote (Vector attributes, int[] bookmarks) { ! attributes.addElement (new PageAttribute (mPage, bookmarks[1], bookmarks[2], bookmarks[4] + 1, bookmarks[5], '\'')); } private void double_quote (Vector attributes, int[] bookmarks) { ! attributes.addElement (new PageAttribute (mPage, bookmarks[1], bookmarks[2], bookmarks[5] + 1, bookmarks[6], '"')); } *************** *** 541,545 **** break; case 3: // within naked attribute value ! if ('>' == ch) { naked (attributes, bookmarks); --- 535,539 ---- break; case 3: // within naked attribute value ! if ((0 == ch) || ('>' == ch)) { naked (attributes, bookmarks); *************** *** 631,640 **** protected void fixAttributes (Vector attributes) throws ParserException { ! Attribute attribute; Cursor cursor; char ch1; // name starting character char ch2; // name ending character ! Attribute prev1; // attribute prior to the current ! Attribute prev2; // attribute prior but one to the current char quote; --- 625,634 ---- protected void fixAttributes (Vector attributes) throws ParserException { ! PageAttribute attribute; Cursor cursor; char ch1; // name starting character char ch2; // name ending character ! PageAttribute prev1; // attribute prior to the current ! PageAttribute prev2; // attribute prior but one to the current char quote; *************** *** 645,649 **** for (int i = 2; i < attributes.size (); ) { ! attribute = (Attribute)attributes.elementAt (i); if (!attribute.isWhitespace ()) { --- 639,643 ---- for (int i = 2; i < attributes.size (); ) { ! attribute = (PageAttribute)attributes.elementAt (i); if (!attribute.isWhitespace ()) { |
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests In directory sc8-pr-cvs1:/tmp/cvs-serv10542/tests/tagTests Modified Files: AppletTagTest.java FormTagTest.java JspTagTest.java ScriptTagTest.java TagTest.java Log Message: Partition Attribute into a base class and PageAttribute class for the Lexer. Fixed the AppletTag.setAppletParams in a cheesy manner. Clear out the released NodeList entry on remove(). Dropped the HTMLTagParserTest tests, because they really weren't relevant any more. Index: AppletTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/AppletTagTest.java,v retrieving revision 1.30 retrieving revision 1.31 diff -C2 -d -r1.30 -r1.31 *** AppletTagTest.java 6 Oct 2003 01:43:28 -0000 1.30 --- AppletTagTest.java 18 Oct 2003 20:50:37 -0000 1.31 *************** *** 170,179 **** // Check the data in the applet tag String expectedRawString = ! "<APPLET CODE=MyOtherClass.class ARCHIVE=htmlparser.jar CODEBASE=htmlparser.sourceforge.net>\n"+ "<PARAM VALUE=\"One\" NAME=\"First\">"+ - "<PARAM VALUE=\"Two\" NAME=\"Second\">"+ "<PARAM VALUE=\"3\" NAME=\"Third\">"+ "</APPLET>"; ! assertStringEquals("toHTML()",expectedRawString,appletTag.toHtml()); } } --- 170,180 ---- // Check the data in the applet tag String expectedRawString = ! "<APPLET CODE=Myclass.class ARCHIVE=test.jar CODEBASE=www.kizna.com>\n"+ ! "<PARAM VALUE=\"Two\" NAME=\"Second\">"+ // note these are out of orer because of the hashtable "<PARAM VALUE=\"One\" NAME=\"First\">"+ "<PARAM VALUE=\"3\" NAME=\"Third\">"+ "</APPLET>"; ! String actual = appletTag.toHtml(); ! assertStringEquals("toHTML()",expectedRawString,actual); } } Index: FormTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/FormTagTest.java,v retrieving revision 1.34 retrieving revision 1.35 diff -C2 -d -r1.34 -r1.35 *** FormTagTest.java 13 Oct 2003 21:48:14 -0000 1.34 --- FormTagTest.java 18 Oct 2003 20:50:37 -0000 1.35 *************** *** 57,61 **** String expected = FormScannerTest.FORM_HTML.substring (0, FormScannerTest.FORM_HTML.indexOf ("\"do_login.php\"")) ! + "http://www.yahoo.com/yahoo/do_not_login.jsp" + FormScannerTest.FORM_HTML.substring (FormScannerTest.FORM_HTML.indexOf ("\"do_login.php\"") + 14); assertStringEquals("Raw String",expected,formTag.toHtml()); --- 57,61 ---- String expected = FormScannerTest.FORM_HTML.substring (0, FormScannerTest.FORM_HTML.indexOf ("\"do_login.php\"")) ! + "\"http://www.yahoo.com/yahoo/do_not_login.jsp\"" + FormScannerTest.FORM_HTML.substring (FormScannerTest.FORM_HTML.indexOf ("\"do_login.php\"") + 14); assertStringEquals("Raw String",expected,formTag.toHtml()); Index: JspTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/JspTagTest.java,v retrieving revision 1.33 retrieving revision 1.34 diff -C2 -d -r1.33 -r1.34 *** JspTagTest.java 13 Oct 2003 21:48:14 -0000 1.33 --- JspTagTest.java 18 Oct 2003 20:50:37 -0000 1.34 *************** *** 62,69 **** public void testJspTag() throws ParserException { ! createParser( ! "<%@ taglib uri=\"/WEB-INF/struts.tld\" prefix=\"struts\" %>\n"+ ! "<jsp:useBean id=\"transfer\" scope=\"session\" class=\"com.bank.PageBean\"/>\n"+ ! "<%\n"+ " org.apache.struts.util.BeanUtils.populate(transfer, request);\n"+ " if(request.getParameter(\"marker\") == null)\n"+ --- 62,68 ---- public void testJspTag() throws ParserException { ! String contents = "jsp:useBean id=\"transfer\" scope=\"session\" class=\"com.bank.PageBean\"/"; ! String jsp = "<" + contents + ">"; ! String contents2 = "%\n"+ " org.apache.struts.util.BeanUtils.populate(transfer, request);\n"+ " if(request.getParameter(\"marker\") == null)\n"+ *************** *** 73,78 **** " else \n"+ " if(transfer.validate(request))\n"+ ! " %><jsp:forward page=\"transferConfirm.jsp\"/><%\n"+ ! "%>\n"); Parser.setLineSeparator("\r\n"); // Register the Jsp Scanner --- 72,81 ---- " else \n"+ " if(transfer.validate(request))\n"+ ! " %"; ! createParser( ! "<%@ taglib uri=\"/WEB-INF/struts.tld\" prefix=\"struts\" %>\n"+ ! jsp + "\n" + ! "<" + contents2 + ">\n<jsp:forward page=\"transferConfirm.jsp\"/><%\n"+ ! "%>"); Parser.setLineSeparator("\r\n"); // Register the Jsp Scanner *************** *** 87,105 **** assertTrue("Node 3 should be a normal Tag",node[2] instanceof Tag); Tag htag = (Tag)node[2]; ! assertStringEquals("Contents of the tag","jsp:useBean id=\"transfer\" scope=\"session\" class=\"com.bank.PageBean\"",htag.getText()); ! assertStringEquals("html","<JSP:USEBEAN ID=\"transfer\" SCOPE=\"session\" CLASS=\"com.bank.PageBean\"/>",htag.toHtml()); // The third node should be an JspTag assertTrue("Node 5 should be an JspTag",node[4] instanceof JspTag); JspTag tag2 = (JspTag)node[4]; ! String expected = "\r\n"+ ! " org.apache.struts.util.BeanUtils.populate(transfer, request);\r\n"+ ! " if(request.getParameter(\"marker\") == null)\r\n"+ ! " // initialize a pseudo-property\r\n"+ ! " transfer.set(\"days\", java.util.Arrays.asList(\r\n"+ ! " new String[] {\"1\", \"2\", \"3\", \"4\", \"31\"}));\r\n"+ ! " else \r\n"+ ! " if(transfer.validate(request))\r\n"+ ! " "; ! assertEquals("Contents of the tag",expected,tag2.getText()); } --- 90,99 ---- assertTrue("Node 3 should be a normal Tag",node[2] instanceof Tag); Tag htag = (Tag)node[2]; ! assertStringEquals("Contents of the tag",contents,htag.getText()); ! assertStringEquals("html",jsp,htag.toHtml()); // The third node should be an JspTag assertTrue("Node 5 should be an JspTag",node[4] instanceof JspTag); JspTag tag2 = (JspTag)node[4]; ! assertEquals("Contents of the tag",contents2,tag2.getText()); } Index: ScriptTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/ScriptTagTest.java,v retrieving revision 1.32 retrieving revision 1.33 diff -C2 -d -r1.32 -r1.33 *** ScriptTagTest.java 6 Oct 2003 01:43:28 -0000 1.32 --- ScriptTagTest.java 18 Oct 2003 20:50:37 -0000 1.33 *************** *** 53,76 **** } ! public void testCreation() { ! fail ("not implemented"); ! // StringNode stringNode = ! // new StringNode(new StringBuffer("Script Code"),0,0); ! // NodeList childVector = new NodeList(); ! // childVector.add(stringNode); ! // ScriptTag scriptTag = ! // new ScriptTag( ! // new TagData(0,10,"Tag Contents","tagline"), ! // new CompositeTagData(null,null,childVector) ! // ); ! // ! // assertNotNull("Script Tag object creation",scriptTag); ! // assertEquals("Script Tag Begin",0,scriptTag.elementBegin()); ! // assertEquals("Script Tag End",10,scriptTag.elementEnd()); ! // assertEquals("Script Tag Contents","Tag Contents",scriptTag.getText()); ! // assertEquals("Script Tag Code","Script Code",scriptTag.getScriptCode()); ! // assertEquals("Script Tag Line","tagline",scriptTag.getTagLine()); } - public void testToHTML() throws ParserException { --- 53,69 ---- } ! public void testCreation() throws ParserException ! { ! String testHtml = "<SCRIPT>Script Code</SCRIPT>"; ! createParser(testHtml,"http://localhost/index.html"); ! // Register the script scanner ! parser.addScanner(new ScriptScanner("-s")); ! parseAndAssertNodeCount(1); ! assertTrue("Node should be a script tag",node[0] instanceof ScriptTag); ! ScriptTag scriptTag = (ScriptTag)node[0]; ! assertEquals("Script Tag Begin",0,scriptTag.elementBegin()); ! assertEquals("Script Tag End",28,scriptTag.elementEnd()); ! assertEquals("Script Tag Code","Script Code",scriptTag.getScriptCode()); } public void testToHTML() throws ParserException { Index: TagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/TagTest.java,v retrieving revision 1.47 retrieving revision 1.48 diff -C2 -d -r1.47 -r1.48 *** TagTest.java 13 Oct 2003 21:48:14 -0000 1.47 --- TagTest.java 18 Oct 2003 20:50:37 -0000 1.48 *************** *** 588,615 **** assertStringEquals("Expected HTML",expectedHTML,htmlTag.toHtml()); } - public void testIgnoreState() throws ParserException { - fail ("not implemented"); - // String testHTML = "<A \n"+ - // "HREF=\"/a?b=c>d&e=f&g=h&i=http://localhost/Testing/Report1.html\">20020702 Report 1</A>"; - // createParser(testHTML); - // Node node = Tag.find(parser.getReader(),testHTML,0); - // assertTrue("Node should be a tag",node instanceof Tag); - // Tag tag = (Tag)node; - // String href = tag.getAttribute("HREF"); - // assertStringEquals("Resolved Link","/a?b=c>d&e=f&g=h&i=http://localhost/Testing/Report1.html",href); ! } ! public void testExtractWord() { ! fail ("not implemented"); ! // String line = "Abc DEF GHHI"; ! // assertEquals("Word expected","ABC",Tag.extractWord(line)); ! // String line2= "%\n "; ! // assertEquals("Word expected for line 2","%",Tag.extractWord(line2)); ! // String line3 = "%\n%>"; ! // assertEquals("Word expected for line 3","%",Tag.extractWord(line3)); ! // String line4 = "%=abc%>"; ! // assertEquals("Word expected for line 4","%",Tag.extractWord(line4)); ! // String line5 = "OPTION"; ! // assertEquals("Word expected for line 5","OPTION",Tag.extractWord(line5)); } --- 588,603 ---- assertStringEquals("Expected HTML",expectedHTML,htmlTag.toHtml()); } ! public void testIgnoreState() throws ParserException ! { ! String testHTML = "<A \n"+ ! "HREF=\"/a?b=c>d&e=f&g=h&i=http://localhost/Testing/Report1.html\">20020702 Report 1</A>"; ! createParser(testHTML); ! parser.registerScanners (); ! parseAndAssertNodeCount(1); ! assertTrue("Node should be a tag",node[0] instanceof Tag); ! Tag tag = (Tag)node[0]; ! String href = tag.getAttribute("HREF"); ! assertStringEquals("Resolved Link","/a?b=c>d&e=f&g=h&i=http://localhost/Testing/Report1.html",href); } |
From: <der...@us...> - 2003-10-19 05:51:56
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes In directory sc8-pr-cvs1:/tmp/cvs-serv10542/lexer/nodes Modified Files: Attribute.java TagNode.java Added Files: PageAttribute.java Log Message: Partition Attribute into a base class and PageAttribute class for the Lexer. Fixed the AppletTag.setAppletParams in a cheesy manner. Clear out the released NodeList entry on remove(). Dropped the HTMLTagParserTest tests, because they really weren't relevant any more. --- NEW FILE: PageAttribute.java --- // HTMLParser Library v1_4_20030921 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // // For any questions or suggestions, you can write to me at : // Email :so...@in... // // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com // // This class was contributed by // Derrick Oswald // package org.htmlparser.lexer.nodes; import org.htmlparser.lexer.Page; /** * An attribute within a tag on a page. * This attribute is similar to Attribute but 'lazy loaded' from the * <code>Page</code> by providing the page and cursor offsets * into the page for the name and value. This is done for speed, since * if the name and value are not been needed we can avoid the cost and memory * overhead of creating the strings. * <p> * Thus the property getters, defer to the base class unless the property * is null, in which case an attempt is made to read it from the underlying * page. Optimizations in the predicates and length calculation defer the * actual instantiation of strings until absolutely needed. */ public class PageAttribute extends Attribute { /** * The page this attribute is extracted from. */ protected Page mPage; /** * The starting offset of the name within the page. * If negative, the name is considered <code>null</code>. */ protected int mNameStart; /** * The ending offset of the name within the page. */ protected int mNameEnd; /** * The starting offset of the value within the page. * If negative, the value is considered <code>null</code>. */ protected int mValueStart; /** * The ending offset of the name within the page. */ protected int mValueEnd; /** * Create an attribute. * @param page The page containing the attribute. * @param name_start The starting offset of the name within the page. * If this is negative, the name is considered null. * @param name_end The ending offset of the name within the page. * @param value_start he starting offset of the value within the page. * If this is negative, the value is considered null. * @param value_end The ending offset of the value within the page. * @param quote The quote, if any, surrounding the value of the attribute, * (i.e. ' or "), or zero if none. */ public PageAttribute (Page page, int name_start, int name_end, int value_start, int value_end, char quote) { mPage = page; mNameStart = name_start; mNameEnd = name_end; mValueStart = value_start; mValueEnd = value_end; setName (null); setAssignment (null); setValue (null); setQuote (quote); } // // provide same constructors as super class // private void init () { mPage = null; mNameStart = -1; mNameEnd = -1; mValueStart = -1; mValueEnd = -1; } /** * Create an attribute with the name, assignment string, value and quote given. * If the quote value is zero, assigns the value using {@link #setRawValue} * which sets the quote character to a proper value if necessary. * @param name The name of this attribute. * @param assignment The assignment string of this attribute. * @param value The value of this attribute. * @param quote The quote around the value of this attribute. */ public PageAttribute (String name, String assignment, String value, char quote) { super (name, assignment, value, quote); init (); } /** * Create an attribute with the name, value and quote given. * Uses an equals sign as the assignment string if the value is not * <code>null</code>, and calls {@link #setRawValue} to get the * correct quoting if <code>quote</code> is zero. * @param name The name of this attribute. * @param value The value of this attribute. * @param quote The quote around the value of this attribute. */ public PageAttribute (String name, String value, char quote) { super (name, value, quote); init (); } /** * Create a whitespace attribute with the value given. * @param value The value of this attribute. * @exception IllegalArgumentException if the value contains other than * whitespace. To set a real value use {@link #Attribute(String,String)}. */ public PageAttribute (String value) { super (value); init (); } /** * Create an attribute with the name and value given. * Uses an equals sign as the assignment string if the value is not * <code>null</code>, and calls {@link #setRawValue} to get the * correct quoting. * @param name The name of this attribute. * @param value The value of this attribute. */ public PageAttribute (String name, String value) { super (name, value); init (); } /** * Create an attribute with the name, assignment string and value given. * Calls {@link #setRawValue} to get the correct quoting. * @param name The name of this attribute. * @param assignment The assignment string of this attribute. * @param value The value of this attribute. */ public PageAttribute (String name, String assignment, String value) { super (name, assignment, value); init (); } /** * Create an empty attribute. * This will provide "" from the {@link #toString} and * {@link #toString(StringBuffer)} methods. */ public PageAttribute () { super (); init (); } /** * Get the name of this attribute. * The part before the equals sign, or the contents of the * stand-alone attribute. * @return The name, or <code>null</code> if it's just a whitepace * 'attribute'. */ public String getName () { String ret; ret = super.getName (); if (null == ret) { if ((null != mPage) && (0 <= mNameStart)) { ret = mPage.getText (mNameStart, mNameEnd); setName (ret); // cache the value } } return (ret); } /** * Get the name of this attribute. * @param buffer The buffer to place the name in. * @see #getName() */ public void getName (StringBuffer buffer) { String name; name = super.getName (); if (null == name) { if ((null != mPage) && (0 <= mNameStart)) mPage.getText (buffer, mNameStart, mNameEnd); } else buffer.append (name); } /** * Get the assignment string of this attribute. * This is usually just an equals sign, but in poorly formed attributes it * can include whitespace on either or both sides of an equals sign. * @return The assignment string. */ public String getAssignment () { int end; String ret; ret = super.getAssignment (); if (null == ret) { if ((null != mPage) && (0 <= mNameEnd) && (0 <= mValueStart)) { end = mValueStart; if (0 != getQuote ()) end--; ret = mPage.getText (mNameEnd, end); setAssignment (ret); // cache the value } } return (ret); } /** * Get the assignment string of this attribute. * @param buffer The buffer to place the assignment string in. * @see #getAssignment() */ public void getAssignment (StringBuffer buffer) { int end; String assignment; assignment = super.getAssignment (); if (null == assignment) { if ((null != mPage) && (0 <= mNameEnd) && (0 <= mValueStart)) { end = mValueStart; if (0 != getQuote ()) end--; mPage.getText (buffer, mNameEnd, end); } } else buffer.append (assignment); } /** * Get the value of the attribute. * The part after the equals sign, or the text if it's just a whitepace * 'attribute'. * <em>NOTE:</em> This does not include any quotes that may have enclosed * the value when it was read. To get the un-stripped value use * {@link #getRawValue}. * @return The value, or <code>null</code> if it's a stand-alone or * empty attribute, or the text if it's just a whitepace 'attribute'. */ public String getValue () { String ret; ret = super.getValue (); if (null == ret) { if ((null != mPage) && (0 <= mValueEnd)) { ret = mPage.getText (mValueStart, mValueEnd); setValue (ret); // cache the value } } return (ret); } /** * Get the value of the attribute. * @param buffer The buffer to place the value in. * @see #getValue() */ public void getValue (StringBuffer buffer) { String value; value = super.getValue (); if (null == value) { if ((null != mPage) && (0 <= mValueEnd)) mPage.getText (buffer, mNameStart, mNameEnd); } else buffer.append (value); } /** * Get the raw value of the attribute. * The part after the equals sign, or the text if it's just a whitepace * 'attribute'. This includes the quotes around the value if any. * @return The value, or <code>null</code> if it's a stand-alone attribute, * or the text if it's just a whitepace 'attribute'. */ public String getRawValue () { char quote; StringBuffer buffer; String ret; ret = getValue (); if (null != ret && (0 != (quote = getQuote ()))) { buffer = new StringBuffer (ret.length() + 2); buffer.append (quote); buffer.append (ret); buffer.append (quote); ret = buffer.toString (); } return (ret); } /** * Get the raw value of the attribute. * The part after the equals sign, or the text if it's just a whitepace * 'attribute'. This includes the quotes around the value if any. * @return The value, or <code>null</code> if it's a stand-alone attribute, * or the text if it's just a whitepace 'attribute'. * @see #getRawValue() */ public void getRawValue (StringBuffer buffer) { char quote; if (null == mValue) { if (0 <= mValueEnd) { if (0 != (quote = getQuote ())) buffer.append (quote); if (mValueStart != mValueEnd) mPage.getText (buffer, mValueStart, mValueEnd); if (0 != quote) buffer.append (quote); } } else { if (0 != (quote = getQuote ())) buffer.append (quote); buffer.append (mValue); if (0 != quote) buffer.append (quote); } } /** * Get the page this attribute is anchored to, if any. * @return The page used to construct this attribute, or null if this * is just a regular attribute. */ public Page getPage () { return (mPage); } /** * Set the page this attribute is anchored to. * @param page The page to be used to construct this attribute. * Note: If you set this you probably also want to uncache the property * values by setting them to null. */ public void setPage (Page page) { mPage = page; } /** * Get the starting position of the attribute name. * @return The offset into the page at which the name begins. */ public int getNameStartPosition () { return (mNameStart); } /** * Set the starting position of the attribute name. * @param start The new offset into the page at which the name begins. */ public void setNameStartPosition (int start) { mNameStart = start; setName (null); // uncache value } /** * Get the ending position of the attribute name. * @return The offset into the page at which the name ends. */ public int getNameEndPosition () { return (mNameEnd); } /** * Set the ending position of the attribute name. * @param start The new offset into the page at which the name ends. */ public void setNameEndPosition (int end) { mNameEnd = end; setName (null); // uncache value setAssignment (null); // uncache value } /** * Get the starting position of the attribute value. * @return The offset into the page at which the value begins. */ public int getValueStartPosition () { return (mValueStart); } /** * Set the starting position of the attribute value. * @param start The new offset into the page at which the value begins. */ public void setValueStartPosition (int start) { mValueStart = start; setAssignment (null); // uncache value setValue (null); // uncache value } /** * Get the ending position of the attribute value. * @return The offset into the page at which the value ends. */ public int getValueEndPosition () { return (mValueEnd); } /** * Set the ending position of the attribute value. * @param start The new offset into the page at which the value ends. */ public void setValueEndPosition (int end) { mValueEnd = end; setValue (null); // uncache value } /** * Predicate to determine if this attribute is whitespace. * @return <code>true</code> if this attribute is whitespace, * <code>false</code> if it is a real attribute. */ public boolean isWhitespace () { return (((null == super.getName ()) && (null == mPage)) || ((null != mPage) && (0 > mNameStart))); } /** * Predicate to determine if this attribute has no equals sign (or value). * @return <code>true</code> if this attribute is a standalone attribute. * <code>false</code> if has an equals sign. */ public boolean isStandAlone () { return ((null != super.getName ()) && (null == super.getAssignment ()) || ((null != mPage) && (0 <= mNameEnd) && (0 > mValueStart))); } /** * Predicate to determine if this attribute has an equals sign but no value. * @return <code>true</code> if this attribute is an empty attribute. * <code>false</code> if has an equals sign and a value. */ public boolean isEmpty () { return (((null != super.getAssignment ()) && (null == super.getValue ())) || ((null != mPage) && ((0 <= mValueStart) && (0 > mValueEnd)))); } /** * Predicate to determine if this attribute has a value. * @return <code>true</code> if this attribute has a value. * <code>false</code> if it is empty or standalone. */ public boolean isValued () { return ((null != super.getValue ()) || ((null != mPage) && ((0 <= mValueStart) && (0 <= mValueEnd)))); } /** * Get the length of the string value of this attribute. * @return The number of characters required to express this attribute. */ public int getLength () { String name; String assignment; String value; char quote; int ret; ret = 0; name = super.getName (); if (null != name) ret += name.length (); else if ((null != mPage) && (0 <= mNameStart) && (0 <= mNameEnd)) ret += mNameEnd - mNameStart; assignment = super.getAssignment (); if (null != assignment) ret += assignment.length (); else if ((null != mPage) && (0 <= mNameEnd) && (0 <= mValueStart)) ret += mValueStart - mNameEnd; value = super.getValue (); if (null != value) ret += value.length (); else if ((null != mPage) && (0 <= mValueStart) && (0 <= mValueEnd)) ret += mValueEnd - mValueStart; quote = getQuote (); if (0 != quote) ret += 2; return (ret); } } Index: Attribute.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes/Attribute.java,v retrieving revision 1.11 retrieving revision 1.12 diff -C2 -d -r1.11 -r1.12 *** Attribute.java 13 Oct 2003 21:48:12 -0000 1.11 --- Attribute.java 18 Oct 2003 20:50:37 -0000 1.12 *************** *** 34,281 **** import org.htmlparser.lexer.Page; /** * An attribute within a tag. ! * <br>If Name is null, it is whitepace and Value has the text. ! * <br>If Name is not null, and Value is null it's a standalone attribute. ! * <br>If Name is not null, and Value is "", and Quote is zero it's an empty attribute. ! * <br>If Name is not null, and Value is "", and Quote is ' it's an empty single quoted attribute. ! * <br>If Name is not null, and Value is "", and Quote is " it's an empty double quoted attribute. [...1022 lines suppressed...] toString (ret); *************** *** 423,425 **** --- 704,719 ---- return (ret.toString ()); } + + /** + * Get a text representation of this attribute. + * @param buffer The accumulator for placing the text into. + * @see #toString() + */ + public void toString (StringBuffer buffer) + { + getName (buffer); + getAssignment (buffer); + getRawValue (buffer); + } + } Index: TagNode.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes/TagNode.java,v retrieving revision 1.16 retrieving revision 1.17 diff -C2 -d -r1.16 -r1.17 *** TagNode.java 13 Oct 2003 21:48:12 -0000 1.16 --- TagNode.java 18 Oct 2003 20:50:37 -0000 1.17 *************** *** 285,289 **** // add whitespace between attributes if (!((Attribute)attributes.elementAt (length - 1)).isWhitespace ()) ! attributes.addElement (new Attribute ((String)null, " ", (char)0)); attributes.addElement (attribute); } --- 285,289 ---- // add whitespace between attributes if (!((Attribute)attributes.elementAt (length - 1)).isWhitespace ()) ! attributes.addElement (new Attribute (" ")); attributes.addElement (attribute); } *************** *** 486,490 **** { // add whitespace between attributes ! attribute = new Attribute ((String)null, " ", (char)0); att.addElement (attribute); attribute = new Attribute (key, value, quote); --- 486,490 ---- { // add whitespace between attributes ! attribute = new Attribute (" "); att.addElement (attribute); attribute = new Attribute (key, value, quote); *************** *** 725,729 **** // from the previous attribute name = name.substring (0, length - 1); ! attribute = new Attribute (name); attributes.removeElementAt (size - 1); attributes.addElement (attribute); --- 725,729 ---- // from the previous attribute name = name.substring (0, length - 1); ! attribute = new Attribute (name, null); attributes.removeElementAt (size - 1); attributes.addElement (attribute); *************** *** 735,741 **** if (emptyXmlTag) { ! attribute = new Attribute ((String)null, " ", (char)0); attributes.addElement (attribute); ! attribute = new Attribute ("/"); attributes.addElement (attribute); } --- 735,741 ---- if (emptyXmlTag) { ! attribute = new Attribute (" "); attributes.addElement (attribute); ! attribute = new Attribute ("/", null); attributes.addElement (attribute); } *************** *** 746,752 **** if (emptyXmlTag) { ! attribute = new Attribute ((String)null, " ", (char)0); attributes.addElement (attribute); ! attribute = new Attribute ("/"); attributes.addElement (attribute); } --- 746,752 ---- if (emptyXmlTag) { ! attribute = new Attribute (" "); attributes.addElement (attribute); ! attribute = new Attribute ("/", null); attributes.addElement (attribute); } *************** *** 758,762 **** if (emptyXmlTag) { ! attribute = new Attribute ("/"); attributes.addElement (attribute); } --- 758,762 ---- if (emptyXmlTag) { ! attribute = new Attribute ("/", null); attributes.addElement (attribute); } *************** *** 767,771 **** if (emptyXmlTag) { ! attribute = new Attribute ("/"); attributes.addElement (attribute); } --- 767,771 ---- if (emptyXmlTag) { ! attribute = new Attribute ("/", null); attributes.addElement (attribute); } |
From: <der...@us...> - 2003-10-19 05:45:24
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util In directory sc8-pr-cvs1:/tmp/cvs-serv10542/util Modified Files: NodeList.java Log Message: Partition Attribute into a base class and PageAttribute class for the Lexer. Fixed the AppletTag.setAppletParams in a cheesy manner. Clear out the released NodeList entry on remove(). Dropped the HTMLTagParserTest tests, because they really weren't relevant any more. Index: NodeList.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/NodeList.java,v retrieving revision 1.45 retrieving revision 1.46 diff -C2 -d -r1.45 -r1.46 *** NodeList.java 5 Oct 2003 13:49:54 -0000 1.45 --- NodeList.java 18 Oct 2003 20:50:38 -0000 1.46 *************** *** 156,159 **** --- 156,160 ---- public void remove(int index) { System.arraycopy(nodeData, index+1, nodeData, index, size-index-1); + nodeData[size-1] = null; size--; } |
From: <der...@us...> - 2003-10-19 04:04:48
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/lexerTests In directory sc8-pr-cvs1:/tmp/cvs-serv10542/tests/lexerTests Modified Files: AttributeTests.java Log Message: Partition Attribute into a base class and PageAttribute class for the Lexer. Fixed the AppletTag.setAppletParams in a cheesy manner. Clear out the released NodeList entry on remove(). Dropped the HTMLTagParserTest tests, because they really weren't relevant any more. Index: AttributeTests.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/lexerTests/AttributeTests.java,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** AttributeTests.java 13 Oct 2003 21:48:13 -0000 1.1 --- AttributeTests.java 18 Oct 2003 20:50:37 -0000 1.2 *************** *** 35,38 **** --- 35,40 ---- import org.htmlparser.Parser; + import org.htmlparser.lexer.nodes.Attribute; + import org.htmlparser.lexer.nodes.PageAttribute; import org.htmlparser.tags.Tag; import org.htmlparser.tags.data.TagData; *************** *** 85,88 **** --- 87,258 ---- fail (pe.getMessage ()); } + } + + /** + * Test constructors. + */ + public void testConstructors () + { + Vector attributes; + String html; + + attributes = new Vector (); + // String, null + attributes.add (new Attribute ("wombat", null)); + // String + attributes.add (new Attribute (" ")); + // String, String + attributes.add (new Attribute ("label", "The civil war.")); + attributes.add (new Attribute (" ")); + // String, String, String + attributes.add (new Attribute ("frameborder", "= ", "no")); + attributes.add (new Attribute (" ")); + // String String, String, char + attributes.add (new Attribute ("name", "=", "topFrame", '"')); + tag = new Tag (null, 0, 0, attributes); + html = "<wombat label=\"The civil war.\" frameborder= no name=\"topFrame\">"; + assertStringEquals ("tag contents", html, tag.toHtml ()); + } + + /** + * Test bean properties. + */ + public void testProperties () + { + Attribute attribute; + Attribute space; + Vector attributes; + String html; + + attributes = new Vector (); + attribute = new Attribute (); + attribute.setName ("wombat"); + assertTrue ("should be standalone", attribute.isStandAlone ()); + assertTrue ("should not be whitespace", !attribute.isWhitespace ()); + assertTrue ("should not be valued", !attribute.isValued ()); + assertTrue ("should not be empty", !attribute.isEmpty ()); + attributes.add (attribute); + space = new Attribute (); + space.setValue (" "); + assertTrue ("should not be standalone", !space.isStandAlone ()); + assertTrue ("should be whitespace", space.isWhitespace ()); + assertTrue ("should be valued", space.isValued ()); + assertTrue ("should not be empty", !space.isEmpty ()); + attributes.add (space); + attribute = new Attribute (); + attribute.setName ("label"); + attribute.setAssignment ("="); + attribute.setRawValue ("The civil war."); + assertTrue ("should not be standalone", !attribute.isStandAlone ()); + assertTrue ("should not be whitespace", !attribute.isWhitespace ()); + assertTrue ("should be valued", attribute.isValued ()); + assertTrue ("should not be empty", !attribute.isEmpty ()); + attributes.add (attribute); + attributes.add (space); + attribute = new Attribute (); + attribute.setName ("frameborder"); + attribute.setAssignment ("= "); + attribute.setRawValue ("no"); + attributes.add (attribute); + attributes.add (space); + attribute = new Attribute (); + attribute.setName ("name"); + attribute.setAssignment ("="); + attribute.setValue ("topFrame"); + attribute.setQuote ('"'); + assertTrue ("should not be standalone", !attribute.isStandAlone ()); + assertTrue ("should not be whitespace", !attribute.isWhitespace ()); + assertTrue ("should be valued", attribute.isValued ()); + assertTrue ("should not be empty", !attribute.isEmpty ()); + attributes.add (attribute); + tag = new Tag (null, 0, 0, attributes); + html = "<wombat label=\"The civil war.\" frameborder= no name=\"topFrame\">"; + assertStringEquals ("tag contents", html, tag.toHtml ()); + } + + /** + * Test constructors. + */ + public void testConstructors2 () + { + Vector attributes; + String html; + + attributes = new Vector (); + // String, null + attributes.add (new PageAttribute ("wombat", null)); + // String + attributes.add (new PageAttribute (" ")); + // String, String + attributes.add (new PageAttribute ("label", "The civil war.")); + attributes.add (new PageAttribute (" ")); + // String, String, String + attributes.add (new PageAttribute ("frameborder", "= ", "no")); + attributes.add (new PageAttribute (" ")); + // String String, String, char + attributes.add (new PageAttribute ("name", "=", "topFrame", '"')); + tag = new Tag (null, 0, 0, attributes); + html = "<wombat label=\"The civil war.\" frameborder= no name=\"topFrame\">"; + assertStringEquals ("tag contents", html, tag.toHtml ()); + } + + /** + * Test bean properties. + */ + public void testProperties2 () + { + Attribute attribute; + Attribute space; + Vector attributes; + String html; + + attributes = new Vector (); + attribute = new PageAttribute (); + attribute.setName ("wombat"); + assertTrue ("should be standalone", attribute.isStandAlone ()); + assertTrue ("should not be whitespace", !attribute.isWhitespace ()); + assertTrue ("should not be valued", !attribute.isValued ()); + assertTrue ("should not be empty", !attribute.isEmpty ()); + attributes.add (attribute); + space = new PageAttribute (); + space.setValue (" "); + assertTrue ("should not be standalone", !space.isStandAlone ()); + assertTrue ("should be whitespace", space.isWhitespace ()); + assertTrue ("should be valued", space.isValued ()); + assertTrue ("should not be empty", !space.isEmpty ()); + attributes.add (space); + attribute = new PageAttribute (); + attribute.setName ("label"); + attribute.setAssignment ("="); + attribute.setRawValue ("The civil war."); + assertTrue ("should not be standalone", !attribute.isStandAlone ()); + assertTrue ("should not be whitespace", !attribute.isWhitespace ()); + assertTrue ("should be valued", attribute.isValued ()); + assertTrue ("should not be empty", !attribute.isEmpty ()); + attributes.add (attribute); + attributes.add (space); + attribute = new PageAttribute (); + attribute.setName ("frameborder"); + attribute.setAssignment ("= "); + attribute.setRawValue ("no"); + assertTrue ("should not be standalone", !attribute.isStandAlone ()); + assertTrue ("should not be whitespace", !attribute.isWhitespace ()); + assertTrue ("should be valued", attribute.isValued ()); + assertTrue ("should not be empty", !attribute.isEmpty ()); + attributes.add (attribute); + attributes.add (space); + attribute = new PageAttribute (); + attribute.setName ("name"); + attribute.setAssignment ("="); + attribute.setValue ("topFrame"); + attribute.setQuote ('"'); + assertTrue ("should not be standalone", !attribute.isStandAlone ()); + assertTrue ("should not be whitespace", !attribute.isWhitespace ()); + assertTrue ("should be valued", attribute.isValued ()); + assertTrue ("should not be empty", !attribute.isEmpty ()); + attributes.add (attribute); + tag = new Tag (null, 0, 0, attributes); + html = "<wombat label=\"The civil war.\" frameborder= no name=\"topFrame\">"; + assertStringEquals ("tag contents", html, tag.toHtml ()); } |
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests In directory sc8-pr-cvs1:/tmp/cvs-serv16902/tests/tagTests Modified Files: FormTagTest.java InputTagTest.java JspTagTest.java MetaTagTest.java TagTest.java TextareaTagTest.java Log Message: Eliminated ParserHelper static class. Add fixAttributes() to handle bad tags. Provide for more than just an equals sign between the attribute name and the value. Unquote the values in getAttributes() hashtable. Fixed a bug regarding factory creation in script scanner. Returned temporaryFailures classes to servicability. Skip JSP testing, fix tests broken because of unquoted attribute values. Some JavaDoc cleanup. Index: FormTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/FormTagTest.java,v retrieving revision 1.33 retrieving revision 1.34 diff -C2 -d -r1.33 -r1.34 *** FormTagTest.java 6 Oct 2003 01:43:28 -0000 1.33 --- FormTagTest.java 13 Oct 2003 21:48:14 -0000 1.34 *************** *** 57,61 **** String expected = FormScannerTest.FORM_HTML.substring (0, FormScannerTest.FORM_HTML.indexOf ("\"do_login.php\"")) ! + "http://www.google.com/test/do_login.php" + FormScannerTest.FORM_HTML.substring (FormScannerTest.FORM_HTML.indexOf ("\"do_login.php\"") + 14); assertStringEquals("Raw String",expected,formTag.toHtml()); --- 57,61 ---- String expected = FormScannerTest.FORM_HTML.substring (0, FormScannerTest.FORM_HTML.indexOf ("\"do_login.php\"")) ! + "http://www.yahoo.com/yahoo/do_not_login.jsp" + FormScannerTest.FORM_HTML.substring (FormScannerTest.FORM_HTML.indexOf ("\"do_login.php\"") + 14); assertStringEquals("Raw String",expected,formTag.toHtml()); Index: InputTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/InputTagTest.java,v retrieving revision 1.32 retrieving revision 1.33 diff -C2 -d -r1.32 -r1.33 *** InputTagTest.java 6 Oct 2003 01:43:28 -0000 1.32 --- InputTagTest.java 13 Oct 2003 21:48:14 -0000 1.33 *************** *** 68,72 **** InputTag InputTag; InputTag = (InputTag) node[0]; ! assertEquals("HTML Raw String","INPUT TAG\n--------\nNAME : \"Google\"\nTYPE : \"text\"\n",InputTag.toString()); } --- 68,72 ---- InputTag InputTag; InputTag = (InputTag) node[0]; ! assertEquals("HTML Raw String","INPUT TAG\n--------\nNAME : Google\nTYPE : text\n",InputTag.toString()); } Index: JspTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/JspTagTest.java,v retrieving revision 1.32 retrieving revision 1.33 diff -C2 -d -r1.32 -r1.33 *** JspTagTest.java 5 Oct 2003 13:49:54 -0000 1.32 --- JspTagTest.java 13 Oct 2003 21:48:14 -0000 1.33 *************** *** 37,40 **** --- 37,41 ---- public class JspTagTest extends ParserTestCase { + private static final boolean JSP_TESTS_ENABLED = false; public JspTagTest(String name) { *************** *** 81,85 **** assertTrue("Node 1 should be an JspTag",node[0] instanceof JspTag); JspTag tag = (JspTag)node[0]; ! assertStringEquals("Contents of the tag","@ taglib uri=\"/WEB-INF/struts.tld\" prefix=\"struts\" ",tag.getText()); // The second node should be a normal tag --- 82,86 ---- assertTrue("Node 1 should be an JspTag",node[0] instanceof JspTag); JspTag tag = (JspTag)node[0]; ! assertStringEquals("Contents of the tag","%@ taglib uri=\"/WEB-INF/struts.tld\" prefix=\"struts\" %",tag.getText()); // The second node should be a normal tag *************** *** 123,169 **** public void testToHTML() throws ParserException { ! createParser( ! "<%@ taglib uri=\"/WEB-INF/struts.tld\" prefix=\"struts\" %>\n"+ ! "<jsp:useBean id=\"transfer\" scope=\"session\" class=\"com.bank.PageBean\"/>\n"+ ! "<%\n"+ ! " org.apache.struts.util.BeanUtils.populate(transfer, request);\n"+ ! " if(request.getParameter(\"marker\") == null)\n"+ ! " // initialize a pseudo-property\n"+ ! " transfer.set(\"days\", java.util.Arrays.asList(\n"+ ! " new String[] {\"1\", \"2\", \"3\", \"4\", \"31\"}));\n"+ ! " else \n"+ ! " if(transfer.validate(request))\n"+ ! " %><jsp:forward page=\"transferConfirm.jsp\"/><%\n"+ ! "%>\n"); ! Parser.setLineSeparator("\r\n"); ! // Register the Jsp Scanner ! parser.addScanner(new JspScanner("-j")); ! parseAndAssertNodeCount(8); ! // The first node should be an JspTag ! assertTrue("Node 1 should be an JspTag",node[0] instanceof JspTag); ! JspTag tag = (JspTag)node[0]; ! assertEquals("Raw String of the first JSP tag","<%@ taglib uri=\"/WEB-INF/struts.tld\" prefix=\"struts\" %>",tag.toHtml()); ! - // The third node should be an JspTag - assertTrue("Node 5 should be an JspTag",node[5] instanceof JspTag); - JspTag tag2 = (JspTag)node[8]; - String expected = "<%\r\n"+ - " org.apache.struts.util.BeanUtils.populate(transfer, request);\r\n"+ - " if(request.getParameter(\"marker\") == null)\r\n"+ - " // initialize a pseudo-property\r\n"+ - " transfer.set(\"days\", java.util.Arrays.asList(\r\n"+ - " new String[] {\"1\", \"2\", \"3\", \"4\", \"31\"}));\r\n"+ - " else \r\n"+ - " if(transfer.validate(request))\r\n"+ - " %>"; - assertEquals("Raw String of the second JSP tag",expected,tag2.toHtml()); - assertTrue("Node 4 should be an HTMLJspTag",node[4] instanceof JspTag); - JspTag tag4 = (JspTag)node[4]; - expected = "<%\r\n"+ - "%>"; - assertEquals("Raw String of the fourth JSP tag",expected,tag4.toHtml()); } public void testSpecialCharacters() throws ParserException { StringBuffer sb1 = new StringBuffer(); --- 124,173 ---- public void testToHTML() throws ParserException { ! if (JSP_TESTS_ENABLED) ! { ! createParser( ! "<%@ taglib uri=\"/WEB-INF/struts.tld\" prefix=\"struts\" %>\n"+ ! "<jsp:useBean id=\"transfer\" scope=\"session\" class=\"com.bank.PageBean\"/>\n"+ ! "<%\n"+ ! " org.apache.struts.util.BeanUtils.populate(transfer, request);\n"+ ! " if(request.getParameter(\"marker\") == null)\n"+ ! " // initialize a pseudo-property\n"+ ! " transfer.set(\"days\", java.util.Arrays.asList(\n"+ ! " new String[] {\"1\", \"2\", \"3\", \"4\", \"31\"}));\n"+ ! " else \n"+ ! " if(transfer.validate(request))\n"+ ! " %><jsp:forward page=\"transferConfirm.jsp\"/><%\n"+ ! "%>\n"); ! Parser.setLineSeparator("\r\n"); ! // Register the Jsp Scanner ! parser.addScanner(new JspScanner("-j")); ! parseAndAssertNodeCount(8); ! // The first node should be an JspTag ! assertTrue("Node 1 should be an JspTag",node[0] instanceof JspTag); ! JspTag tag = (JspTag)node[0]; ! assertEquals("Raw String of the first JSP tag","<%@ taglib uri=\"/WEB-INF/struts.tld\" prefix=\"struts\" %>",tag.toHtml()); + // The third node should be an JspTag + assertTrue("Node 5 should be an JspTag",node[5] instanceof JspTag); + JspTag tag2 = (JspTag)node[8]; + String expected = "<%\r\n"+ + " org.apache.struts.util.BeanUtils.populate(transfer, request);\r\n"+ + " if(request.getParameter(\"marker\") == null)\r\n"+ + " // initialize a pseudo-property\r\n"+ + " transfer.set(\"days\", java.util.Arrays.asList(\r\n"+ + " new String[] {\"1\", \"2\", \"3\", \"4\", \"31\"}));\r\n"+ + " else \r\n"+ + " if(transfer.validate(request))\r\n"+ + " %>"; + assertEquals("Raw String of the second JSP tag",expected,tag2.toHtml()); + assertTrue("Node 4 should be an HTMLJspTag",node[4] instanceof JspTag); + JspTag tag4 = (JspTag)node[4]; + expected = "<%\r\n"+ + "%>"; + assertEquals("Raw String of the fourth JSP tag",expected,tag4.toHtml()); + } } + public void testSpecialCharacters() throws ParserException { StringBuffer sb1 = new StringBuffer(); *************** *** 191,212 **** * See bug #772700 Jsp Tags are not parsed correctly when in quoted attributes. */ ! // public void testJspTagsInQuotedAttribes() throws ParserException { // // this test seems to mess up.... // testJspTagsInAttributes("<img alt=\"<%=altText1%>\" src=\"<%=imgUrl1%>\" border=\"<%=borderToggle%>\">"); // } ! private void testJspTagsInAttributes(String html) throws ParserException { ! createParser(html); ! parser.addScanner(new JspScanner()); ! parseAndAssertNodeCount(7); ! ! assertTrue("Should be a Jsp tag but was "+node[1].getClass().getName(),node[1] instanceof JspTag); ! assertTrue("Should be a Jsp tag but was "+node[3].getClass().getName(),node[3] instanceof JspTag); ! assertTrue("Should be a Jsp tag but was "+node[5].getClass().getName(),node[5] instanceof JspTag); ! assertTrue("Text Should be '<%=altText1%>'but was '" + node[1].toHtml() + "'" ,node[1].toHtml().equals("<%=altText1%>")); ! assertTrue("Text Should be '<%=imgUrl1%>' but was '" + node[3].toHtml() + "'" ,node[3].toHtml().equals("<%=imgUrl1%>")); ! assertTrue("Text Should be '<%=borderToggle%>' but was '" + node[5].toHtml() + "'" ,node[5].toHtml().equals("<%=borderToggle%>")); ! } } --- 195,220 ---- * See bug #772700 Jsp Tags are not parsed correctly when in quoted attributes. */ ! // public void testJspTagsInQuotedAttribes() throws ParserException ! // { // // this test seems to mess up.... // testJspTagsInAttributes("<img alt=\"<%=altText1%>\" src=\"<%=imgUrl1%>\" border=\"<%=borderToggle%>\">"); // } ! private void testJspTagsInAttributes(String html) throws ParserException ! { ! if (JSP_TESTS_ENABLED) ! { ! createParser(html); ! parser.addScanner(new JspScanner()); ! parseAndAssertNodeCount(7); ! assertTrue("Should be a Jsp tag but was "+node[1].getClass().getName(),node[1] instanceof JspTag); ! assertTrue("Should be a Jsp tag but was "+node[3].getClass().getName(),node[3] instanceof JspTag); ! assertTrue("Should be a Jsp tag but was "+node[5].getClass().getName(),node[5] instanceof JspTag); ! assertTrue("Text Should be '<%=altText1%>'but was '" + node[1].toHtml() + "'" ,node[1].toHtml().equals("<%=altText1%>")); ! assertTrue("Text Should be '<%=imgUrl1%>' but was '" + node[3].toHtml() + "'" ,node[3].toHtml().equals("<%=imgUrl1%>")); ! assertTrue("Text Should be '<%=borderToggle%>' but was '" + node[5].toHtml() + "'" ,node[5].toHtml().equals("<%=borderToggle%>")); ! } ! } } Index: MetaTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/MetaTagTest.java,v retrieving revision 1.31 retrieving revision 1.32 diff -C2 -d -r1.31 -r1.32 *** MetaTagTest.java 6 Oct 2003 01:43:28 -0000 1.31 --- MetaTagTest.java 13 Oct 2003 21:48:14 -0000 1.32 *************** *** 40,46 **** public void testToHTML() throws ParserException { ! String description = "\"description\""; ! String content = "\"Protecting the internet community through technology, not legislation. SpamCop eliminates spam. Automatically file spam reports with the network administrators who can stop spam at the source. Subscribe, and filter your email through powerful statistical analysis before it reaches your inbox.\""; ! String tag = "<META name=" + description + " content=" + content + ">"; createParser( "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0//EN\">\n"+ --- 40,46 ---- public void testToHTML() throws ParserException { ! String description = "description"; ! String content = "Protecting the internet community through technology, not legislation. SpamCop eliminates spam. Automatically file spam reports with the network administrators who can stop spam at the source. Subscribe, and filter your email through powerful statistical analysis before it reaches your inbox."; ! String tag = "<META name=\"" + description + "\" content=\"" + content + "\">"; createParser( "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0//EN\">\n"+ Index: TagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/TagTest.java,v retrieving revision 1.46 retrieving revision 1.47 diff -C2 -d -r1.46 -r1.47 *** TagTest.java 6 Oct 2003 01:43:28 -0000 1.46 --- TagTest.java 13 Oct 2003 21:48:14 -0000 1.47 *************** *** 44,47 **** --- 44,49 ---- public class TagTest extends ParserTestCase { + private static final boolean JSP_TESTS_ENABLED = false; + public TagTest(String name) { super(name); *************** *** 94,105 **** * Nested JSP Tags were not working */ ! public void testNestedTags() throws ParserException { ! String s = "input type=\"text\" value=\"<%=\"test\"%>\" name=\"text\""; ! String line = "<"+s+">"; ! createParser(line); ! parseAndAssertNodeCount(1); ! assertTrue("The node found should have been an Tag",node[0] instanceof Tag); ! Tag tag = (Tag) node[0]; ! assertEquals("Tag Contents",s,tag.getText()); } --- 96,111 ---- * Nested JSP Tags were not working */ ! public void testNestedTags() throws ParserException ! { ! if (JSP_TESTS_ENABLED) ! { ! String s = "input type=\"text\" value=\"<%=\"test\"%>\" name=\"text\""; ! String line = "<"+s+">"; ! createParser(line); ! parseAndAssertNodeCount(1); ! assertTrue("The node found should have been an Tag",node[0] instanceof Tag); ! Tag tag = (Tag) node[0]; ! assertEquals("Tag Contents",s,tag.getText()); ! } } *************** *** 125,129 **** h = tag.getAttributes(); String classValue= (String)h.get("CLASS"); ! assertEquals ("The class value should be ","\"userData\"",classValue); } --- 131,135 ---- h = tag.getAttributes(); String classValue= (String)h.get("CLASS"); ! assertEquals ("The class value should be ","userData",classValue); } *************** *** 162,168 **** nice = (String)h.get("YOURPARAMETER"); assertEquals ("Link tag (A)","A",a); ! assertEquals ("href value","\"http://www.iki.fi/kaila\"",href); assertEquals ("myparameter value",null,myValue); ! assertEquals ("yourparameter value","\"Kaarle Kaaila\"",nice); } if (!(node instanceof LinkTag)) { --- 168,174 ---- nice = (String)h.get("YOURPARAMETER"); assertEquals ("Link tag (A)","A",a); ! assertEquals ("href value","http://www.iki.fi/kaila",href); assertEquals ("myparameter value",null,myValue); ! assertEquals ("yourparameter value","Kaarle Kaaila",nice); } if (!(node instanceof LinkTag)) { *************** *** 232,236 **** nice = (String)h.get("YOURPARAMETER"); assertEquals ("The tagname should be G",a,"G"); ! assertEquals ("Check the http address",href,"\"http://www.iki.fi/kaila\""); assertEquals ("myValue is not null",myValue,null); assertEquals ("The second parameter value",nice,"Kaila"); --- 238,242 ---- nice = (String)h.get("YOURPARAMETER"); assertEquals ("The tagname should be G",a,"G"); ! assertEquals ("Check the http address",href,"http://www.iki.fi/kaila"); assertEquals ("myValue is not null",myValue,null); assertEquals ("The second parameter value",nice,"Kaila"); *************** *** 350,359 **** Div div = (Div)node[0]; Tag fontTag = (Tag)div.children().nextNode(); ! assertEquals("Second tag should be corrected","font face=\"Arial,helvetica,\" sans-serif=\"sans-serif\" size=\"2\" color=\"#FFFFFF\"",fontTag.getText()); // Try to parse the parameters from this tag. Hashtable table = fontTag.getAttributes(); assertNotNull("Parameters table",table); assertEquals("font sans-serif parameter","sans-serif",table.get("SANS-SERIF")); ! assertEquals("font face parameter","Arial,helvetica,",table.get("FACE")); } --- 356,367 ---- Div div = (Div)node[0]; Tag fontTag = (Tag)div.children().nextNode(); ! // an alternate interpretation: assertEquals("Second tag should be corrected","font face=\"Arial,helvetica,\" sans-serif=\"sans-serif\" size=\"2\" color=\"#FFFFFF\"",fontTag.getText()); ! assertEquals("Second tag should be corrected","font face=\"Arial,\"helvetica,\" sans-serif=\"sans-serif\" size=\"2\" color=\"#FFFFFF\"",fontTag.getText()); // Try to parse the parameters from this tag. Hashtable table = fontTag.getAttributes(); assertNotNull("Parameters table",table); assertEquals("font sans-serif parameter","sans-serif",table.get("SANS-SERIF")); ! // an alternate interpretation: assertEquals("font face parameter","Arial,helvetica,",table.get("FACE")); ! assertEquals("font face parameter","Arial,\"helvetica,",table.get("FACE")); } *************** *** 454,459 **** public void testIncorrectInvertedCommas() throws ParserException { ! String content = "\"DORIER-APPRILL E., GERVAIS-LAMBONY P., MORICONI-EBRARD F., NAVEZ-BOUCHANINE F.\""; ! String guts = "META NAME=\"Author\" CONTENT = " + content + " \""; String testHTML = "<" + guts + ">"; createParser(testHTML); --- 462,468 ---- public void testIncorrectInvertedCommas() throws ParserException { ! String content = "DORIER-APPRILL E., GERVAIS-LAMBONY P., MORICONI-EBRARD F., NAVEZ-BOUCHANINE F."; ! String author = "Author"; ! String guts = "META NAME=\"" + author + "\" CONTENT = \"" + content + "\""; String testHTML = "<" + guts + ">"; createParser(testHTML); *************** *** 462,465 **** --- 471,475 ---- Tag tag = (Tag)node[0]; assertStringEquals("Node contents",guts,tag.getText()); + assertEquals("Meta Content",author,tag.getAttribute("NAME")); assertEquals("Meta Content",content,tag.getAttribute("CONTENT")); *************** *** 482,486 **** assertTrue("Node should be a tag",node[0] instanceof Tag); Tag tag = (Tag)node[0]; ! assertEquals("Node contents","meta name=\"description\" content=\"Une base de données sur les thèses de gographie soutenues en France\"",tag.getText()); } --- 492,496 ---- assertTrue("Node should be a tag",node[0] instanceof Tag); Tag tag = (Tag)node[0]; ! assertEquals("Node contents","meta name=\"description\" content=\"Une base de données sur les thèses de g\"ographie soutenues en France \"",tag.getText()); } Index: TextareaTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/TextareaTagTest.java,v retrieving revision 1.30 retrieving revision 1.31 diff -C2 -d -r1.30 -r1.31 *** TextareaTagTest.java 6 Oct 2003 01:43:28 -0000 1.30 --- TextareaTagTest.java 13 Oct 2003 21:48:14 -0000 1.31 *************** *** 92,96 **** TextareaTag textareaTag; textareaTag = (TextareaTag) node[0]; ! assertStringEquals("HTML Raw String 1","TEXTAREA TAG\n--------\nNAME : \"Remarks\"\nVALUE : The intervention by the UN proved beneficial\n",textareaTag.toString()); textareaTag = (TextareaTag) node[1]; assertStringEquals("HTML Raw String 2","TEXTAREA TAG\n--------\nVALUE : The capture of the Somali warloard was elusive\n",textareaTag.toString()); --- 92,96 ---- TextareaTag textareaTag; textareaTag = (TextareaTag) node[0]; ! assertStringEquals("HTML Raw String 1","TEXTAREA TAG\n--------\nNAME : Remarks\nVALUE : The intervention by the UN proved beneficial\n",textareaTag.toString()); textareaTag = (TextareaTag) node[1]; assertStringEquals("HTML Raw String 2","TEXTAREA TAG\n--------\nVALUE : The capture of the Somali warloard was elusive\n",textareaTag.toString()); *************** *** 98,105 **** assertStringEquals("HTML Raw String 3","TEXTAREA TAG\n--------\nVALUE : \n",textareaTag.toString()); textareaTag = (TextareaTag) node[3]; ! assertStringEquals("HTML Raw String 4","TEXTAREA TAG\n--------\nNAME : \"Remarks\"\nVALUE : The death threats of the organization\n"+ "refused to intimidate the soldiers\n",textareaTag.toString()); textareaTag = (TextareaTag) node[4]; ! assertStringEquals("HTML Raw String 5","TEXTAREA TAG\n--------\nNAME : \"Remarks\"\nVALUE : The death threats of the LTTE\n"+ "refused to intimidate the Tamilians\n\n",textareaTag.toString()); } --- 98,105 ---- assertStringEquals("HTML Raw String 3","TEXTAREA TAG\n--------\nVALUE : \n",textareaTag.toString()); textareaTag = (TextareaTag) node[3]; ! assertStringEquals("HTML Raw String 4","TEXTAREA TAG\n--------\nNAME : Remarks\nVALUE : The death threats of the organization\n"+ "refused to intimidate the soldiers\n",textareaTag.toString()); textareaTag = (TextareaTag) node[4]; ! assertStringEquals("HTML Raw String 5","TEXTAREA TAG\n--------\nNAME : Remarks\nVALUE : The death threats of the LTTE\n"+ "refused to intimidate the Tamilians\n\n",textareaTag.toString()); } |
From: <der...@us...> - 2003-10-13 21:48:52
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests In directory sc8-pr-cvs1:/tmp/cvs-serv16902/tests/scannersTests Modified Files: JspScannerTest.java MetaTagScannerTest.java ScriptScannerTest.java Log Message: Eliminated ParserHelper static class. Add fixAttributes() to handle bad tags. Provide for more than just an equals sign between the attribute name and the value. Unquote the values in getAttributes() hashtable. Fixed a bug regarding factory creation in script scanner. Returned temporaryFailures classes to servicability. Skip JSP testing, fix tests broken because of unquoted attribute values. Some JavaDoc cleanup. Index: JspScannerTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests/JspScannerTest.java,v retrieving revision 1.29 retrieving revision 1.30 diff -C2 -d -r1.29 -r1.30 *** JspScannerTest.java 5 Oct 2003 13:49:54 -0000 1.29 --- JspScannerTest.java 13 Oct 2003 21:48:13 -0000 1.30 *************** *** 36,39 **** --- 36,40 ---- public class JspScannerTest extends ParserTestCase { + private static final boolean JSP_TESTS_ENABLED = false; public JspScannerTest(String name) { *************** *** 57,61 **** assertTrue("Third should be an JspTag",node[2] instanceof JspTag); JspTag tag = (JspTag)node[2]; ! assertEquals("tag contents","=object",tag.getText()); } --- 58,62 ---- assertTrue("Third should be an JspTag",node[2] instanceof JspTag); JspTag tag = (JspTag)node[2]; ! assertEquals("tag contents","%=object%",tag.getText()); } *************** *** 67,86 **** */ public void testUnclosedTagInsideJsp() throws ParserException { ! createParser( ! "<%\n" + ! "public String getHref(String value) \n" + ! "{ \n" + ! "int indexs = value.indexOf(\"<A HREF=\");\n" + ! "int indexe = value.indexOf(\">\");\n" + ! "if (indexs != -1) {\n" + ! "return value.substring(indexs+9,indexe-2);\n" + ! "}\n" + ! "return value;\n" + ! "}\n" + ! "%>"); ! Parser.setLineSeparator("\r\n"); ! // Register the Jsp Scanner ! parser.addScanner(new JspScanner("-j")); ! parseAndAssertNodeCount(1); } } --- 68,90 ---- */ public void testUnclosedTagInsideJsp() throws ParserException { ! if (JSP_TESTS_ENABLED) ! { ! createParser( ! "<%\n" + ! "public String getHref(String value) \n" + ! "{ \n" + ! "int indexs = value.indexOf(\"<A HREF=\");\n" + ! "int indexe = value.indexOf(\">\");\n" + ! "if (indexs != -1) {\n" + ! "return value.substring(indexs+9,indexe-2);\n" + ! "}\n" + ! "return value;\n" + ! "}\n" + ! "%>"); ! Parser.setLineSeparator("\r\n"); ! // Register the Jsp Scanner ! parser.addScanner(new JspScanner("-j")); ! parseAndAssertNodeCount(1); ! } } } Index: MetaTagScannerTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests/MetaTagScannerTest.java,v retrieving revision 1.31 retrieving revision 1.32 diff -C2 -d -r1.31 -r1.32 *** MetaTagScannerTest.java 6 Oct 2003 01:43:28 -0000 1.31 --- MetaTagScannerTest.java 13 Oct 2003 21:48:13 -0000 1.32 *************** *** 42,48 **** public void testScan() throws ParserException { ! String description = "\"description\""; ! String content = "\"Protecting the internet community through technology, not legislation. SpamCop eliminates spam. Automatically file spam reports with the network administrators who can stop spam at the source. Subscribe, and filter your email through powerful statistical analysis before it reaches your inbox.\""; ! String tag = "<META name=" + description + " content=" + content + ">"; createParser( "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0//EN\">\n"+ --- 42,48 ---- public void testScan() throws ParserException { ! String description = "description"; ! String content = "Protecting the internet community through technology, not legislation. SpamCop eliminates spam. Automatically file spam reports with the network administrators who can stop spam at the source. Subscribe, and filter your email through powerful statistical analysis before it reaches your inbox."; ! String tag = "<META name=\"" + description + "\" content=\"" + content + "\">"; createParser( "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0//EN\">\n"+ *************** *** 72,93 **** metaTag = (MetaTag) node[11]; ! assertEquals("Meta Tag 12 Name","\"keywords\"",metaTag.getMetaTagName()); ! assertEquals("Meta Tag 12 Contents","\"SpamCop spam cop email filter abuse header headers parse parser utility script net net-abuse filter mail program system trace traceroute dns\"",metaTag.getMetaContent()); assertNull("Meta Tag 12 Http-Equiv",metaTag.getHttpEquiv()); metaTag = (MetaTag) node[13]; ! assertEquals("Meta Tag 14 Name","\"language\"",metaTag.getMetaTagName()); ! assertEquals("Meta Tag 14 Contents","\"en\"",metaTag.getMetaContent()); assertNull("Meta Tag 14 Http-Equiv",metaTag.getHttpEquiv()); metaTag = (MetaTag) node[15]; ! assertEquals("Meta Tag 16 Name","\"owner\"",metaTag.getMetaTagName()); ! assertEquals("Meta Tag 16 Contents","\"se...@ad...\"",metaTag.getMetaContent()); assertNull("Meta Tag 16 Http-Equiv",metaTag.getHttpEquiv()); metaTag = (MetaTag) node[17]; assertNull("Meta Tag 18 Name",metaTag.getMetaTagName()); ! assertEquals("Meta Tag 18 Contents","\"text/html; charset=ISO-8859-1\"",metaTag.getMetaContent()); ! assertEquals("Meta Tag 18 Http-Equiv","\"content-type\"",metaTag.getHttpEquiv()); assertEquals("This Scanner",scanner,metaTag.getThisScanner()); --- 72,93 ---- metaTag = (MetaTag) node[11]; ! assertEquals("Meta Tag 12 Name","keywords",metaTag.getMetaTagName()); ! assertEquals("Meta Tag 12 Contents","SpamCop spam cop email filter abuse header headers parse parser utility script net net-abuse filter mail program system trace traceroute dns",metaTag.getMetaContent()); assertNull("Meta Tag 12 Http-Equiv",metaTag.getHttpEquiv()); metaTag = (MetaTag) node[13]; ! assertEquals("Meta Tag 14 Name","language",metaTag.getMetaTagName()); ! assertEquals("Meta Tag 14 Contents","en",metaTag.getMetaContent()); assertNull("Meta Tag 14 Http-Equiv",metaTag.getHttpEquiv()); metaTag = (MetaTag) node[15]; ! assertEquals("Meta Tag 16 Name","owner",metaTag.getMetaTagName()); ! assertEquals("Meta Tag 16 Contents","se...@ad...",metaTag.getMetaContent()); assertNull("Meta Tag 16 Http-Equiv",metaTag.getHttpEquiv()); metaTag = (MetaTag) node[17]; assertNull("Meta Tag 18 Name",metaTag.getMetaTagName()); ! assertEquals("Meta Tag 18 Contents","text/html; charset=ISO-8859-1",metaTag.getMetaContent()); ! assertEquals("Meta Tag 18 Http-Equiv","content-type",metaTag.getHttpEquiv()); assertEquals("This Scanner",scanner,metaTag.getThisScanner()); *************** *** 95,102 **** public void testScanTagsInMeta() throws ParserException { ! String description = "\"Description\""; ! String content = "\"Ethnoburb </I>versus Chinatown: Two Types of Urban Ethnic Communities in Los Angeles\""; createParser( ! "<META NAME=" + description + "CONTENT=" + content + ">", "http://www.google.com/test/index.html" ); --- 95,102 ---- public void testScanTagsInMeta() throws ParserException { ! String description = "Description"; ! String content = "Ethnoburb </I>versus Chinatown: Two Types of Urban Ethnic Communities in Los Angeles"; createParser( ! "<META NAME=\"" + description + "\" CONTENT=\"" + content + "\">", "http://www.google.com/test/index.html" ); *************** *** 115,125 **** */ public void testMetaTagBug() throws ParserException { ! String equiv = "\"content-type\""; ! String content = "\"text/html; charset=windows-1252\""; createParser( "<html>" + "<head>" + ! "<meta http-equiv=" + equiv + ! " content=" + content + ">" + "</head>" + "</html>" --- 115,125 ---- */ public void testMetaTagBug() throws ParserException { ! String equiv = "content-type"; ! String content = "text/html; charset=windows-1252"; createParser( "<html>" + "<head>" + ! "<meta http-equiv=\"" + equiv + "\" " + ! "content=\"" + content + "\">" + "</head>" + "</html>" *************** *** 139,148 **** */ public void testMetaTagWithOpenTagSymbol() throws ParserException { ! String content = "\"a<b\""; createParser( "<html>" + "<head>" + "<title>Parser Test 2</title>" + ! "<meta name=\"foo\" content=" + content + ">" + "</head>" + "<body>" + --- 139,148 ---- */ public void testMetaTagWithOpenTagSymbol() throws ParserException { ! String content = "a<b"; createParser( "<html>" + "<head>" + "<title>Parser Test 2</title>" + ! "<meta name=\"foo\" content=\"" + content + "\">" + "</head>" + "<body>" + Index: ScriptScannerTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests/ScriptScannerTest.java,v retrieving revision 1.41 retrieving revision 1.42 diff -C2 -d -r1.41 -r1.42 *** ScriptScannerTest.java 5 Oct 2003 13:49:54 -0000 1.41 --- ScriptScannerTest.java 13 Oct 2003 21:48:14 -0000 1.42 *************** *** 75,80 **** public void testScanBug() throws ParserException { ! String src = "\"../js/DetermineBrowser.js\""; ! createParser("<SCRIPT LANGUAGE=\"JavaScript\" SRC=" + src + "></SCRIPT>","http://www.google.com/test/index.html"); // Register the image scanner parser.addScanner(new ScriptScanner("-s")); --- 75,80 ---- public void testScanBug() throws ParserException { ! String src = "../js/DetermineBrowser.js"; ! createParser("<SCRIPT LANGUAGE=\"JavaScript\" SRC=\"" + src + "\"></SCRIPT>","http://www.google.com/test/index.html"); // Register the image scanner parser.addScanner(new ScriptScanner("-s")); |
From: <der...@us...> - 2003-10-13 21:48:52
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/lexerTests In directory sc8-pr-cvs1:/tmp/cvs-serv16902/tests/lexerTests Modified Files: AllTests.java Added Files: AttributeTests.java TagTests.java Log Message: Eliminated ParserHelper static class. Add fixAttributes() to handle bad tags. Provide for more than just an equals sign between the attribute name and the value. Unquote the values in getAttributes() hashtable. Fixed a bug regarding factory creation in script scanner. Returned temporaryFailures classes to servicability. Skip JSP testing, fix tests broken because of unquoted attribute values. Some JavaDoc cleanup. --- NEW FILE: AttributeTests.java --- // HTMLParser Library v1_4_20030921 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // // For any questions or suggestions, you can write to me at : // Email :so...@in... // // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com package org.htmlparser.tests.lexerTests; import java.util.Hashtable; import java.util.Vector; import junit.framework.TestSuite; import org.htmlparser.Node; import org.htmlparser.Parser; import org.htmlparser.tags.Tag; import org.htmlparser.tags.data.TagData; import org.htmlparser.tests.ParserTestCase; import org.htmlparser.util.NodeIterator; import org.htmlparser.util.ParserException; import org.htmlparser.util.SpecialHashtable; public class AttributeTests extends ParserTestCase { private static final boolean JSP_TESTS_ENABLED = false; private Tag tag; private Hashtable table; public AttributeTests (String name) { super(name); } public void getParameterTableFor(String tagContents) { String html; NodeIterator iterator; Node node; Tag tag; Vector attributes; html = "<" + tagContents + ">"; createParser (html); try { iterator = parser.elements (); node = iterator.nextNode (); if (node instanceof Tag) { tag = (Tag)node; attributes = tag.getAttributesEx (); // for (int i = 0; i < attributes.size (); i++) // System.out.print ("|" + attributes.elementAt (i)); // System.out.println ("|"); table = tag.getAttributes (); } else table = null; String string = node.toHtml (); assertEquals ("toHtml differs", html, string); assertTrue ("shouldn't be any more nodes", !iterator.hasMoreNodes ()); } catch (ParserException pe) { fail (pe.getMessage ()); } } /** * Test simple value. */ public void testParseParameters() { getParameterTableFor("a b = \"c\""); assertEquals("Value","c",table.get("B")); } /** * Test quote value. */ public void testParseTokenValues() { getParameterTableFor("a b = \"'\""); assertEquals("Value","'",table.get("B")); } /** * Test empty value. */ public void testParseEmptyValues() { getParameterTableFor("a b = \"\""); assertEquals("Value","",table.get("B")); } /** * Test no equals or whitespace. * This might be reason for another rule, since another interpretation * would have an attribute called B with a value of "C". */ public void testParseMissingEqual() { getParameterTableFor("a b\"c\""); assertEquals("ValueB",null,table.get("B")); assertTrue("NameC",table.containsKey("B\"C\"")); } /** * Test multiple attributes. */ public void testTwoParams(){ getParameterTableFor("PARAM NAME=\"Param1\" VALUE=\"Somik\""); assertEquals("Param1","Param1",table.get("NAME")); assertEquals("Somik","Somik",table.get("VALUE")); } /** * Test unquoted attributes. */ public void testPlainParams(){ getParameterTableFor("PARAM NAME=Param1 VALUE=Somik"); assertEquals("Param1","Param1",table.get("NAME")); assertEquals("Somik","Somik",table.get("VALUE")); } /** * Test standalone attribute. */ public void testValueMissing() { getParameterTableFor("INPUT type=\"checkbox\" name=\"Authorize\" value=\"Y\" checked"); assertEquals("Name of Tag","INPUT",table.get(SpecialHashtable.TAGNAME)); assertEquals("Type","checkbox",table.get("TYPE")); assertEquals("Name","Authorize",table.get("NAME")); assertEquals("Value","Y",table.get("VALUE")); assertEquals("Checked",null,table.get("CHECKED")); } /** * This is a simulation of a bug reported by Dhaval Udani - wherein * a space before the end of the tag causes a problem - there is a key * in the table with just a space in it and an empty value */ public void testIncorrectSpaceKeyBug() { getParameterTableFor("TEXTAREA name=\"Remarks\" "); // There should only be two keys.. assertEquals("There should only be two keys",2,table.size()); // The first key is name String key1 = "NAME"; String value1 = (String)table.get(key1); assertEquals("Expected value 1", "Remarks",value1); String key2 = SpecialHashtable.TAGNAME; assertEquals("Expected Value 2","TEXTAREA",table.get(key2)); } /** * Test empty attribute. */ public void testNullTag(){ getParameterTableFor("INPUT type="); assertEquals("Name of Tag","INPUT",table.get(SpecialHashtable.TAGNAME)); assertEquals("Type","",table.get("TYPE")); } /** * Test attribute containing an equals sign. */ public void testAttributeWithSpuriousEqualTo() { getParameterTableFor( "a class=rlbA href=/news/866201.asp?0sl=-32" ); assertStringEquals( "href", "/news/866201.asp?0sl=-32", (String)table.get("HREF") ); } /** * Test attribute containing a question mark. */ public void testQuestionMarksInAttributes() { getParameterTableFor( "a href=\"mailto:sa...@ne...?subject=Site Comments\"" ); assertStringEquals( "href", "mailto:sa...@ne...?subject=Site Comments", (String)table.get("HREF") ); assertStringEquals( "tag name", "A", (String)table.get(SpecialHashtable.TAGNAME) ); } /** * Check that an empty tag is considered a string node. * Believe it or not Moi (vincent_aumont) wants htmlparser to parse a text file * containing something that looks nearly like a tag: * <pre> * "basic_string<char, string_char_traits<char>, <>>::basic_string()" * </pre> * This was throwing a null pointer exception when the empty <> was encountered. * Bug #725420 NPE in StringBean.visitTag **/ public void testEmptyTag () { getParameterTableFor(""); assertNull ("<> is not a tag",table); } /** * Test attributes when they contain scriptlets. * Submitted by Cory Seefurth * See also feature request #725376 Handle script in attributes. */ public void testJspWithinAttributes() { if (JSP_TESTS_ENABLED) { getParameterTableFor( "a href=\"<%=Application(\"sURL\")%>/literature/index.htm" ); assertStringEquals( "href", "<%=Application(\"sURL\")%>/literature/index.htm", (String)table.get("HREF") ); } } /** * Test Script in attributes. * See feature request #725376 Handle script in attributes. */ public void testScriptedTag () { getParameterTableFor("body onLoad=defaultStatus=''"); String name = (String)table.get(SpecialHashtable.TAGNAME); assertNotNull ("No Tag.TAGNAME", name); assertStringEquals("tag name parsed incorrectly", "BODY", name); String value = (String)table.get ("ONLOAD"); assertStringEquals ("parameter parsed incorrectly", "defaultStatus=''", value); } /** * Test that stand-alone attributes are kept that way, rather than being * given empty values. * -Joe Robins, 6/19/03 */ public void testStandaloneAttribute () { getParameterTableFor ("INPUT DISABLED"); assertTrue ("Standalone attribute has no entry in table keyset",table.containsKey("DISABLED")); assertNull ("Standalone attribute has non-null value",(String)table.get("DISABLED")); } /** * Test missing value. */ public void testMissingAttribute () { getParameterTableFor ("INPUT DISABLED="); assertTrue ("Attribute has no entry in table keyset",table.containsKey("DISABLED")); assertEquals ("Attribute has non-blank value","",(String)table.get("DISABLED")); } /** * Test Rule 1. */ public void testRule1 () { getParameterTableFor ("tag att = other=fred"); assertTrue ("Attribute missing", table.containsKey ("ATT")); assertEquals ("Attribute has wrong value", "", (String)table.get ("ATT")); assertTrue ("No attribute should be called equal sign", !table.containsKey ("=")); assertTrue ("Attribute missing", table.containsKey ("OTHER")); assertEquals ("Attribute has wrong value", "fred", (String)table.get ("OTHER")); } /** * Test Rule 2. */ public void testRule2 () { getParameterTableFor ("tag att =value other=fred"); assertTrue ("Attribute missing", table.containsKey ("ATT")); assertEquals ("Attribute has wrong value", "value", (String)table.get ("ATT")); assertTrue ("No attribute should be called =value", !table.containsKey ("=VALUE")); assertTrue ("Attribute missing", table.containsKey ("OTHER")); assertEquals ("Attribute has wrong value", "fred", (String)table.get ("OTHER")); } /** * Test Rule 3. */ public void testRule3 () { getParameterTableFor ("tag att= \"value\" other=fred"); assertTrue ("Attribute missing", table.containsKey ("ATT")); assertEquals ("Attribute has wrong value", "value", (String)table.get ("ATT")); assertTrue ("No attribute should be called \"value\"", !table.containsKey ("\"VALUE\"")); assertTrue ("Attribute missing", table.containsKey ("OTHER")); assertEquals ("Attribute has wrong value", "fred", (String)table.get ("OTHER")); } /** * Test Rule 4. */ public void testRule4 () { getParameterTableFor ("tag att=\"va\"lue\" other=fred"); assertTrue ("Attribute missing", table.containsKey ("ATT")); assertEquals ("Attribute has wrong value", "va\"lue", (String)table.get ("ATT")); assertTrue ("No attribute should be called va\"lue", !table.containsKey ("VA\"LUE")); assertTrue ("Attribute missing", table.containsKey ("OTHER")); assertEquals ("Attribute has wrong value", "fred", (String)table.get ("OTHER")); } /** * Test Rule 5. */ public void testRule5 () { getParameterTableFor ("tag att='va'lue' other=fred"); assertTrue ("Attribute missing", table.containsKey ("ATT")); assertEquals ("Attribute has wrong value", "va'lue", (String)table.get ("ATT")); assertTrue ("No attribute should be called va'lue", !table.containsKey ("VA'LUE")); assertTrue ("Attribute missing", table.containsKey ("OTHER")); assertEquals ("Attribute has wrong value", "fred", (String)table.get ("OTHER")); } /** * Mainline for all suites of tests. * @param args Command line arguments. The following options * are understood: * <pre> * -text -- use junit.textui.TestRunner * -awt -- use junit.awtui.TestRunner * -swing -- use junit.swingui.TestRunner (default) * </pre> * All other options are passed on to the junit framework. */ public static void main(String[] args) { String runner; int i; String arguments[]; Class cls; runner = null; for (i = 0; (i < args.length) && (null == runner); i++) { if (args[i].equalsIgnoreCase ("-text")) runner = "junit.textui.TestRunner"; else if (args[i].equalsIgnoreCase ("-awt")) runner = "junit.awtui.TestRunner"; else if (args[i].equalsIgnoreCase ("-swing")) runner = "junit.swingui.TestRunner"; } if (null != runner) { // remove it from the arguments arguments = new String[args.length - 1]; System.arraycopy (args, 0, arguments, 0, i - 1); System.arraycopy (args, i, arguments, i - 1, args.length - i); args = arguments; } else runner = "junit.swingui.TestRunner"; // append the test class arguments = new String[args.length + 1]; System.arraycopy (args, 0, arguments, 0, args.length); arguments[args.length] = "org.htmlparser.tests.lexerTests.AttributeTests"; // invoke main() of the test runner try { cls = Class.forName (runner); java.lang.reflect.Method method = cls.getDeclaredMethod ( "main", new Class[] { String[].class }); method.invoke ( null, new Object[] { arguments }); } catch (Throwable t) { System.err.println ( "cannot run unit test (" + t.getMessage () + ")"); } } } --- NEW FILE: TagTests.java --- // HTMLParser Library v1_4_20030921 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // // For any questions or suggestions, you can write to me at : // Email :so...@in... // // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic, Inc. // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com // ---- IMPORTANT: This class has failing tests ---- // Original Location: org.htmlparser.tests.parserHelperTests; // Pls rememeber to add test back to org.htmlparser.tests.parserHelperTests.AllTests.suite() // and delete these comments when you're done. // ---- NEEDS FIXING ---- package org.htmlparser.tests.lexerTests; import java.util.HashMap; import java.util.Map; import org.htmlparser.Node; import org.htmlparser.Parser; import org.htmlparser.tags.LinkTag; import org.htmlparser.tags.MetaTag; import org.htmlparser.tags.Tag; import org.htmlparser.tests.ParserTestCase; import org.htmlparser.util.ParserException; public class TagTests extends ParserTestCase { private static final String TEST_HTML = "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\">" + "<!-- Server: sf-web2 -->" + "<html lang=\"en\">" + " <head><link rel=\"stylesheet\" type=\"text/css\" href=\"http://sourceforge.net/cssdef.php\">" + " <meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-1\">" + " <TITLE>SourceForge.net: Modify: 711073 - HTMLTagParser not threadsafe as a static variable in Tag</TITLE>" + " <SCRIPT language=\"JavaScript\" type=\"text/javascript\">" + " <!--" + " function help_window(helpurl) {" + " HelpWin = window.open( 'http://sourceforge.net' + helpurl,'HelpWindow','scrollbars=yes,resizable=yes,toolbar=no,height=400,width=400');" + " }" + " // -->" + " </SCRIPT>" + " <link rel=\"SHORTCUT ICON\" href=\"/images/favicon.ico\">" + "<!-- This is temp javascript for the jump button. If we could actually have a jump script on the server side that would be ideal -->" + "<script language=\"JavaScript\" type=\"text/javascript\">" + "<!--" + " function jump(targ,selObj,restore){ //v3.0" + " if (selObj.options[selObj.selectedIndex].value) " + " eval(targ+\".location='\"+selObj.options[selObj.selectedIndex].value+\"'\");" + " if (restore) selObj.selectedIndex=0;" + " }" + " //-->" + "</script>" + "<a href=\"http://normallink.com/sometext.html\">" + "<style type=\"text/css\">" + "<!--" + "A:link { text-decoration:none }" + "A:visited { text-decoration:none }" + "A:active { text-decoration:none }" + "A:hover { text-decoration:underline; color:#0066FF; }" + "-->" + "</style>" + "</head>" + "<body bgcolor=\"#FFFFFF\" text=\"#000000\" leftmargin=\"0\" topmargin=\"0\" marginwidth=\"0\" marginheight=\"0\" link=\"#003399\" vlink=\"#003399\" alink=\"#003399\">"; private Map results; private int testProgress; public TagTests (String name) { super(name); } public void testTagWithQuotes() throws Exception { String testHtml = "<img src=\"http://g-images.amazon.com/images/G/01/merchants/logos/marshall-fields-logo-20.gif\" width=87 height=20 border=0 alt=\"Marshall Field's\">"; createParser(testHtml); parseAndAssertNodeCount(1); assertType("should be Tag",Tag.class,node[0]); Tag tag = (Tag)node[0]; assertStringEquals("alt","Marshall Field's",tag.getAttribute("ALT")); assertStringEquals( "html", testHtml, tag.toHtml() ); } public void testEmptyTag() throws Exception { createParser("<custom/>"); parseAndAssertNodeCount(1); assertType("should be Tag",Tag.class,node[0]); Tag tag = (Tag)node[0]; assertStringEquals("tag name","CUSTOM",tag.getTagName()); assertTrue("empty tag",tag.isEmptyXmlTag()); assertStringEquals( "html", "<CUSTOM/>", tag.toHtml() ); } public void testTagWithCloseTagSymbolInAttribute() throws ParserException { createParser("<tag att=\"a>b\">"); parseAndAssertNodeCount(1); assertType("should be Tag",Tag.class,node[0]); Tag tag = (Tag)node[0]; assertStringEquals("attribute","a>b",tag.getAttribute("att")); } public void testTagWithOpenTagSymbolInAttribute() throws ParserException { createParser("<tag att=\"a<b\">"); parseAndAssertNodeCount(1); assertType("should be Tag",Tag.class,node[0]); Tag tag = (Tag)node[0]; assertStringEquals("attribute","a<b",tag.getAttribute("att")); } public void testTagWithSingleQuote() throws ParserException { String html = "<tag att=\'a<b\'>"; createParser(html); parseAndAssertNodeCount(1); assertType("should be Tag",Tag.class,node[0]); Tag tag = (Tag)node[0]; assertStringEquals("html",html,tag.toHtml()); assertStringEquals("attribute","a<b",tag.getAttribute("att")); } /** * The following multi line test cases are from * bug #725749 Parser does not handle < and > in multi-line attributes * submitted by Joe Robins (zorblak) */ public void testMultiLine1 () throws ParserException { String html = "<meta name=\"foo\" content=\"foo<bar>\">"; createParser(html); parseAndAssertNodeCount (1); assertType ("should be MetaTag", MetaTag.class, node[0]); Tag tag = (Tag)node[0]; assertStringEquals ("html",html, tag.toHtml ()); String attribute1 = tag.getAttribute ("NAME"); assertStringEquals ("attribute 1","foo", attribute1); String attribute2 = tag.getAttribute ("CONTENT"); assertStringEquals ("attribute 2","foo<bar>", attribute2); } public void testMultiLine2 () throws ParserException { String html = "<meta name=\"foo\" content=\"foo<bar\">"; createParser(html); parseAndAssertNodeCount (1); assertType ("should be MetaTag", MetaTag.class, node[0]); Tag tag = (Tag)node[0]; assertStringEquals ("html",html, tag.toHtml ()); String attribute1 = tag.getAttribute ("NAME"); assertStringEquals ("attribute 1","foo", attribute1); String attribute2 = tag.getAttribute ("CONTENT"); assertStringEquals ("attribute 2","foo<bar", attribute2); } public void testMultiLine3 () throws ParserException { String html = "<meta name=\"foo\" content=\"foobar>\">"; createParser(html); parseAndAssertNodeCount (1); assertType ("should be MetaTag", MetaTag.class, node[0]); Tag tag = (Tag)node[0]; assertStringEquals ("html",html, tag.toHtml ()); String attribute1 = tag.getAttribute ("NAME"); assertStringEquals ("attribute 1","foo", attribute1); String attribute2 = tag.getAttribute ("CONTENT"); assertStringEquals ("attribute 2","foobar>", attribute2); } public void testMultiLine4 () throws ParserException { String html = "<meta name=\"foo\" content=\"foo\nbar>\">"; createParser(html); parseAndAssertNodeCount (1); assertType ("should be MetaTag", MetaTag.class, node[0]); Tag tag = (Tag)node[0]; assertStringEquals ("html",html, tag.toHtml ()); String attribute1 = tag.getAttribute ("NAME"); assertStringEquals ("attribute 1","foo", attribute1); String attribute2 = tag.getAttribute ("CONTENT"); assertStringEquals ("attribute 2","foo\nbar>", attribute2); } /** * Test multiline tag like attribute. * See feature request #725749 Handle < and > in multi-line attributes. */ public void testMultiLine5 () throws ParserException { // <meta name="foo" content="<foo> // bar"> String html = "<meta name=\"foo\" content=\"<foo>\nbar\">"; createParser(html); parseAndAssertNodeCount (1); assertType ("should be MetaTag", MetaTag.class, node[0]); Tag tag = (Tag)node[0]; assertStringEquals ("html",html, tag.toHtml ()); String attribute1 = tag.getAttribute ("NAME"); assertStringEquals ("attribute 1","foo", attribute1); String attribute2 = tag.getAttribute ("CONTENT"); assertStringEquals ("attribute 2","<foo>\nbar", attribute2); } /** * Test multiline broken tag like attribute. * See feature request #725749 Handle < and > in multi-line attributes. */ public void testMultiLine6 () throws ParserException { // <meta name="foo" content="foo> // bar"> String html = "<meta name=\"foo\" content=\"foo>\nbar\">"; createParser(html); parseAndAssertNodeCount (1); assertType ("should be MetaTag", MetaTag.class, node[0]); Tag tag = (Tag)node[0]; assertStringEquals ("html",html, tag.toHtml ()); String attribute1 = tag.getAttribute ("NAME"); assertStringEquals ("attribute 1","foo", attribute1); String attribute2 = tag.getAttribute ("CONTENT"); assertStringEquals ("attribute 2","foo>\nbar", attribute2); } /** * Test multiline split tag like attribute. * See feature request #725749 Handle < and > in multi-line attributes. */ public void testMultiLine7 () throws ParserException { // <meta name="foo" content="<foo // bar"> String html = "<meta name=\"foo\" content=\"<foo\nbar\""; createParser(html); parseAndAssertNodeCount (1); assertType ("should be MetaTag", MetaTag.class, node[0]); Tag tag = (Tag)node[0]; assertStringEquals ("html",html + ">", tag.toHtml ()); String attribute1 = tag.getAttribute ("NAME"); assertStringEquals ("attribute 1","foo", attribute1); String attribute2 = tag.getAttribute ("CONTENT"); assertStringEquals ("attribute 2","<foo\nbar", attribute2); } /** * End of multi line test cases. */ /** * Test multiple threads running against the parser. * See feature request #736144 Handle multi-threaded operation. */ public void testThreadSafety() throws Exception { createParser("<html></html>"); String testHtml1 = "<a HREF=\"/cgi-bin/view_search?query_text=postdate>20020701&txt_clr=White&bg_clr=Red&url=http://localhost/Testing/Report1.html\">20020702 Report 1</A>" + TEST_HTML; String testHtml2 = "<a href=\"http://normallink.com/sometext.html\">" + TEST_HTML; ParsingThread parsingThread [] = new ParsingThread[100]; results = new HashMap(); testProgress = 0; for (int i=0;i<parsingThread.length;i++) { if (i<parsingThread.length/2) parsingThread[i] = new ParsingThread(i,testHtml1,parsingThread.length); else parsingThread[i] = new ParsingThread(i,testHtml2,parsingThread.length); Thread thread = new Thread(parsingThread[i]); thread.start(); } int completionValue = computeCompletionValue(parsingThread.length); do { try { Thread.sleep(500); } catch (InterruptedException e) { } } while (testProgress!=completionValue); for (int i=0;i<parsingThread.length;i++) { if (!parsingThread[i].passed()) { assertNotNull("Thread "+i+" link 1",parsingThread[i].getLink1()); assertNotNull("Thread "+i+" link 2",parsingThread[i].getLink2()); if (i<parsingThread.length/2) { assertStringEquals( "Thread "+i+", link 1:", "/cgi-bin/view_search?query_text=postdate>20020701&txt_clr=White&bg_clr=Red&url=http://localhost/Testing/Report1.html", parsingThread[i].getLink1().getLink() ); assertStringEquals( "Thread "+i+", link 2:", "http://normallink.com/sometext.html", parsingThread[i].getLink2().getLink() ); } else { assertStringEquals( "Thread "+i+", link 1:", "http://normallink.com/sometext.html", parsingThread[i].getLink1().getLink() ); assertNotNull("Thread "+i+" link 2",parsingThread[i].getLink2()); assertStringEquals( "Thread "+i+", link 2:", "/cgi-bin/view_search?query_text=postdate>20020701&txt_clr=White&bg_clr=Red&url=http://localhost/Testing/Report1.html", parsingThread[i].getLink2().getLink() ); } } } } private int computeCompletionValue(int numThreads) { return numThreads * (numThreads - 1) / 2; } class ParsingThread implements Runnable { Parser parser; int id; LinkTag link1, link2; boolean result; int max; ParsingThread(int id, String testHtml, int max) { this.id = id; this.max = max; this.parser = Parser.createParser(testHtml); parser.registerScanners(); } public void run() { try { result = false; Node linkTag [] = parser.extractAllNodesThatAre(LinkTag.class); link1 = (LinkTag)linkTag[0]; link2 = (LinkTag)linkTag[1]; if (id<max/2) { if (link1.getLink().equals("/cgi-bin/view_search?query_text=postdate>20020701&txt_clr=White&bg_clr=Red&url=http://localhost/Testing/Report1.html") && link2.getLink().equals("http://normallink.com/sometext.html")) result = true; } else { if (link1.getLink().equals("http://normallink.com/sometext.html") && link2.getLink().equals("http://normallink.com/sometext.html")) result = true; } } catch (ParserException e) { System.err.println("Parser Exception"); e.printStackTrace(); } finally { testProgress += id; } } public LinkTag getLink1() { return link1; } public LinkTag getLink2() { return link2; } public boolean passed() { return result; } } /** * Test the toHTML method for a standalone attribute. */ public void testStandAloneToHTML () throws ParserException { createParser("<input disabled>"); parseAndAssertNodeCount (1); assertType ("should be Tag", Tag.class, node[0]); Tag tag = (Tag)node[0]; String html = tag.toHtml (); assertStringEquals ("html","<INPUT DISABLED>", html); } /** * Test the toHTML method for a missing value attribute. */ public void testMissingValueToHTML () throws ParserException { createParser("<input disabled=>"); parseAndAssertNodeCount (1); assertType ("should be Tag", Tag.class, node[0]); Tag tag = (Tag)node[0]; String html = tag.toHtml (); assertStringEquals ("html","<INPUT DISABLED=>", html); } /** * Mainline for all suites of tests. * @param args Command line arguments. The following options * are understood: * <pre> * -text -- use junit.textui.TestRunner * -awt -- use junit.awtui.TestRunner * -swing -- use junit.swingui.TestRunner (default) * </pre> * All other options are passed on to the junit framework. */ public static void main(String[] args) { String runner; int i; String arguments[]; Class cls; System.out.println (System.getProperty ("testclass")); runner = null; for (i = 0; (i < args.length) && (null == runner); i++) { if (args[i].equalsIgnoreCase ("-text")) runner = "junit.textui.TestRunner"; else if (args[i].equalsIgnoreCase ("-awt")) runner = "junit.awtui.TestRunner"; else if (args[i].equalsIgnoreCase ("-swing")) runner = "junit.swingui.TestRunner"; } if (null != runner) { // remove it from the arguments arguments = new String[args.length - 1]; System.arraycopy (args, 0, arguments, 0, i - 1); System.arraycopy (args, i, arguments, i - 1, args.length - i); args = arguments; } else runner = "junit.swingui.TestRunner"; // append the test class arguments = new String[args.length + 1]; System.arraycopy (args, 0, arguments, 0, args.length); arguments[args.length] = "org.htmlparser.tests.lexerTests.TagTests"; // invoke main() of the test runner try { cls = Class.forName (runner); java.lang.reflect.Method method = cls.getDeclaredMethod ( "main", new Class[] { String[].class }); method.invoke ( null, new Object[] { arguments }); } catch (Throwable t) { System.err.println ( "cannot run unit test (" + t.getMessage () + ")"); } } } Index: AllTests.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/lexerTests/AllTests.java,v retrieving revision 1.11 retrieving revision 1.12 diff -C2 -d -r1.11 -r1.12 *** AllTests.java 22 Sep 2003 02:40:05 -0000 1.11 --- AllTests.java 13 Oct 2003 21:48:13 -0000 1.12 *************** *** 47,50 **** --- 47,52 ---- suite.addTestSuite (PageIndexTests.class); suite.addTestSuite (LexerTests.class); + suite.addTestSuite (AttributeTests.class); + suite.addTestSuite (TagTests.class); return suite; } |
From: <der...@us...> - 2003-10-13 21:48:52
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes In directory sc8-pr-cvs1:/tmp/cvs-serv16902/lexer/nodes Modified Files: Attribute.java TagNode.java Log Message: Eliminated ParserHelper static class. Add fixAttributes() to handle bad tags. Provide for more than just an equals sign between the attribute name and the value. Unquote the values in getAttributes() hashtable. Fixed a bug regarding factory creation in script scanner. Returned temporaryFailures classes to servicability. Skip JSP testing, fix tests broken because of unquoted attribute values. Some JavaDoc cleanup. Index: Attribute.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes/Attribute.java,v retrieving revision 1.10 retrieving revision 1.11 diff -C2 -d -r1.10 -r1.11 *** Attribute.java 5 Oct 2003 13:49:42 -0000 1.10 --- Attribute.java 13 Oct 2003 21:48:12 -0000 1.11 *************** *** 37,54 **** /** * An attribute within a tag. ! * <p>If Name is null, it is whitepace and Value has the text. ! * <p>If Name is not null, and Value is null it's a standalone attribute. ! * <p>If Name is not null, and Value is "", and Quote is zero it's an empty attribute. ! * <p>If Name is not null, and Value is "", and Quote is ' it's an empty single quoted attribute. ! * <p>If Name is not null, and Value is "", and Quote is " it's an empty double quoted attribute. ! * <p>If Name is not null, and Value is something, and Quote is zero it's a naked attribute. ! * <p>If Name is not null, and Value is something, and Quote is ' it's a single quoted attribute. ! * <p>If Name is not null, and Value is something, and Quote is " it's a double quoted attribute. ! * <p>All other states are illegal. * <p> * The attribute can be 'lazy loaded' by providing the page and cursor offsets ! * into the page for the name and value. In this case if the starting offset is ! * less than zero, the element is null. This is done for speed, since if the name ! * and value are not been needed we can avoid the cost of creating the strings. */ public class Attribute --- 37,56 ---- /** * An attribute within a tag. ! * <br>If Name is null, it is whitepace and Value has the text. ! * <br>If Name is not null, and Value is null it's a standalone attribute. ! * <br>If Name is not null, and Value is "", and Quote is zero it's an empty attribute. ! * <br>If Name is not null, and Value is "", and Quote is ' it's an empty single quoted attribute. ! * <br>If Name is not null, and Value is "", and Quote is " it's an empty double quoted attribute. ! * <br>If Name is not null, and Value is something, and Quote is zero it's a naked attribute. ! * <br>If Name is not null, and Value is something, and Quote is ' it's a single quoted attribute. ! * <br>If Name is not null, and Value is something, and Quote is " it's a double quoted attribute. ! * <br>All other states are illegal. * <p> * The attribute can be 'lazy loaded' by providing the page and cursor offsets ! * into the page for the name and value. In this case if the starting offset of ! * the name is less than zero, the name is null, and if the ending offset of the ! * value is less than zero, the value is null.. This is done for speed, since ! * if the name and value are not been needed we can avoid the cost and memory ! * overhead of creating the strings. */ public class Attribute *************** *** 176,180 **** return (null == getName ()); } ! /** * Get the value of the attribute. --- 178,212 ---- return (null == getName ()); } ! ! /** ! * Predicate to determine if this attribute has no equals sign (or value). ! * @return <code>true</code> if this attribute is a standalone attribute. ! * <code>false</code> if has an equals sign. ! */ ! public boolean isStandAlone () ! { ! return (-1 == mValueStart); ! } ! ! /** ! * Predicate to determine if this attribute has an equals sign but no value. ! * @return <code>true</code> if this attribute is an empty attribute. ! * <code>false</code> if has an equals sign and a value. ! */ ! public boolean isEmpty () ! { ! return ((-1 != mValueStart) && (-1 == mValueEnd)); ! } ! ! /** ! * Predicate to determine if this attribute has a value. ! * @return <code>true</code> if this attribute has a value. ! * <code>false</code> if it is empty or standalone. ! */ ! public boolean isValued () ! { ! return ((-1 != mValueStart) && (-1 != mValueEnd)); ! } ! /** * Get the value of the attribute. *************** *** 187,191 **** { if (null == mValue) ! if (0 <= mValueStart) mValue = mPage.getText (mValueStart, mValueEnd); return (mValue); --- 219,223 ---- { if (null == mValue) ! if ((null != mPage) && (0 <= mValueEnd)) mValue = mPage.getText (mValueStart, mValueEnd); return (mValue); *************** *** 229,233 **** if (null == mValue) { ! if (0 <= mValueStart) { if (0 != (quote = getQuote ())) --- 261,265 ---- if (null == mValue) { ! if (0 <= mValueEnd) { if (0 != (quote = getQuote ())) *************** *** 259,262 **** --- 291,352 ---- /** + * Set the quote surrounding the value of the attribute. + * @param quote The new quote value. + */ + public void setQuote (char quote) + { + mQuote = quote; + } + + public Page getPage () + { + return (mPage); + } + + public int getNameStartPosition () + { + return (mNameStart); + } + + public void setNameStartPosition (int start) + { + mNameStart = start; + mName = null; + } + + public int getNameEndPosition () + { + return (mNameEnd); + } + + public void setNameEndPosition (int end) + { + mNameEnd = end; + mName = null; + } + + public int getValueStartPosition () + { + return (mValueStart); + } + + public void setValueStartPosition (int start) + { + mValueStart = start; + mValue = null; + } + + public int getValueEndPosition () + { + return (mValueEnd); + } + + public void setValueEndPosition (int end) + { + mValueEnd = end; + mValue = null; + } + + /** * Get a text representation of this attribute. * Suitable for insertion into a start tag, the output is one of *************** *** 266,272 **** * value * name ! * name= value ! * name= 'value' ! * name= "value" * </pre> * </code> --- 356,362 ---- * value * name ! * name=value ! * name='value' ! * name="value" * </pre> * </code> *************** *** 284,291 **** buffer.append (name); if (0 <= mValueStart) ! { ! buffer.append ("="); getRawValue (buffer); - } } } --- 374,383 ---- buffer.append (name); if (0 <= mValueStart) ! if (null == mPage) ! buffer.append ("="); ! else ! mPage.getText (buffer, mNameEnd, mValueStart - (0 == getQuote () ? 0 : 1)); ! if (0 <= mValueEnd) getRawValue (buffer); } } *************** *** 317,321 **** if (null != value) { ! length += 1; length += value.length (); if (0 != getQuote ()) --- 409,416 ---- if (null != value) { ! if (null == mPage) ! length += 1; ! else ! length += mValueStart - (0 == getQuote () ? 1 : 0) - mNameEnd; length += value.length (); if (0 != getQuote ()) Index: TagNode.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes/TagNode.java,v retrieving revision 1.15 retrieving revision 1.16 diff -C2 -d -r1.15 -r1.16 *** TagNode.java 5 Oct 2003 13:49:43 -0000 1.15 --- TagNode.java 13 Oct 2003 21:48:12 -0000 1.16 *************** *** 302,309 **** /** * Gets the attributes in the tag. ! * NOTE: Values of the extended hashtable are two element arrays of String, ! * with the first element being the original name (not uppercased), ! * and the second element being the value. ! * @return Returns a special hashtable of attributes in two element String arrays. */ public Vector getAttributesEx () --- 302,306 ---- /** * Gets the attributes in the tag. ! * @return Returns the list of {@link Attribute Attributes} in the tag. */ public Vector getAttributesEx () *************** *** 314,318 **** /** * Gets the attributes in the tag. ! * @return Returns a Hashtable of attributes. */ public Hashtable getAttributes () --- 311,324 ---- /** * Gets the attributes in the tag. ! * This is not the preferred method to get attributes, see {@link ! * #getAttributesEx getAttributesEx} which returns a list of {@link ! * Attribute} objects, which offer more information than the simple ! * <code>String</code> objects available from this <code>Hashtable</code>. ! * @return Returns a list of name/value pairs representing the attributes. ! * These are not in order, the keys (names) are capitalized and the values ! * are not quoted, even if they need to be. The table <em>will</em> return ! * <code>null</code> if there was no value for an attribute (no equals ! * sign or nothing to the right of the equals sign). A special entry with ! * a key of SpecialHashtable.TAGNAME ("$<TAGNAME>$") holds the tag name. */ public Hashtable getAttributes () *************** *** 337,348 **** if (!attribute.isWhitespace ()) { ! if (0 != attribute.getQuote ()) ! value = attribute.getRawValue (); ! else ! { ! value = attribute.getValue (); ! if ((null != value) && value.equals ("")) ! value = SpecialHashtable.NOTHING; ! } if (null == value) value = SpecialHashtable.NULLVALUE; --- 343,349 ---- if (!attribute.isWhitespace ()) { ! value = attribute.getValue (); ! if (attribute.isEmpty ()) ! value = SpecialHashtable.NOTHING; if (null == value) value = SpecialHashtable.NULLVALUE; *************** *** 689,693 **** * Set this tag to be an empty xml node, or not. * Adds or removes an ending slash on the tag. ! * @param If true, ensures there is an ending slash in the node, * i.e. <tag/>, otherwise removes it. */ --- 690,694 ---- * Set this tag to be an empty xml node, or not. * Adds or removes an ending slash on the tag. ! * @param emptyXmlTag If true, ensures there is an ending slash in the node, * i.e. <tag/>, otherwise removes it. */ |
From: <der...@us...> - 2003-10-13 21:48:52
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests In directory sc8-pr-cvs1:/tmp/cvs-serv16902/tests Modified Files: AllTests.java Log Message: Eliminated ParserHelper static class. Add fixAttributes() to handle bad tags. Provide for more than just an equals sign between the attribute name and the value. Unquote the values in getAttributes() hashtable. Fixed a bug regarding factory creation in script scanner. Returned temporaryFailures classes to servicability. Skip JSP testing, fix tests broken because of unquoted attribute values. Some JavaDoc cleanup. Index: AllTests.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/AllTests.java,v retrieving revision 1.52 retrieving revision 1.53 diff -C2 -d -r1.52 -r1.53 *** AllTests.java 3 Oct 2003 02:15:20 -0000 1.52 --- AllTests.java 13 Oct 2003 21:48:13 -0000 1.53 *************** *** 113,116 **** --- 113,117 ---- sub.addTestSuite (LineNumberAssignedByNodeReaderTest.class); suite.addTest (sub); + suite.addTest (org.htmlparser.tests.lexerTests.AllTests.suite ()); suite.addTest (org.htmlparser.tests.scannersTests.AllTests.suite ()); suite.addTest (org.htmlparser.tests.utilTests.AllTests.suite ()); |
From: <der...@us...> - 2003-10-13 21:48:51
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners In directory sc8-pr-cvs1:/tmp/cvs-serv16902/scanners Modified Files: ScriptScanner.java Log Message: Eliminated ParserHelper static class. Add fixAttributes() to handle bad tags. Provide for more than just an equals sign between the attribute name and the value. Unquote the values in getAttributes() hashtable. Fixed a bug regarding factory creation in script scanner. Returned temporaryFailures classes to servicability. Skip JSP testing, fix tests broken because of unquoted attribute values. Some JavaDoc cleanup. Index: ScriptScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/ScriptScanner.java,v retrieving revision 1.41 retrieving revision 1.42 diff -C2 -d -r1.41 -r1.42 *** ScriptScanner.java 5 Oct 2003 13:49:53 -0000 1.41 --- ScriptScanner.java 13 Oct 2003 21:48:13 -0000 1.42 *************** *** 127,131 **** else // TODO: need to remove this cast ! last = (StringNode)lexer.createStringNode (lexer, node.elementBegin (), node.elementEnd ()); } else if (node instanceof RemarkNode) --- 127,131 ---- else // TODO: need to remove this cast ! last = (StringNode)factory.createStringNode (lexer, node.elementBegin (), node.elementEnd ()); } else if (node instanceof RemarkNode) *************** *** 134,139 **** last.setEndPosition (node.getEndPosition ()); else // TODO: need to remove this cast ! last = (StringNode)lexer.createStringNode (lexer, node.elementBegin (), node.elementEnd ()); } else // StringNode --- 134,142 ---- last.setEndPosition (node.getEndPosition ()); else + { // TODO: need to remove this cast ! // last = (StringNode)factory.createStringNode (lexer, node.elementBegin (), node.elementEnd ()); ! last = (StringNode)factory.createStringNode (lexer, node.elementBegin (), node.elementEnd ()); ! } } else // StringNode |
From: <der...@us...> - 2003-10-13 21:48:50
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer In directory sc8-pr-cvs1:/tmp/cvs-serv16902/lexer Modified Files: Cursor.java Lexer.java Page.java Log Message: Eliminated ParserHelper static class. Add fixAttributes() to handle bad tags. Provide for more than just an equals sign between the attribute name and the value. Unquote the values in getAttributes() hashtable. Fixed a bug regarding factory creation in script scanner. Returned temporaryFailures classes to servicability. Skip JSP testing, fix tests broken because of unquoted attribute values. Some JavaDoc cleanup. Index: Cursor.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Cursor.java,v retrieving revision 1.11 retrieving revision 1.12 diff -C2 -d -r1.11 -r1.12 *** Cursor.java 5 Oct 2003 13:49:41 -0000 1.11 --- Cursor.java 13 Oct 2003 21:48:12 -0000 1.12 *************** *** 87,91 **** /** * Set the position of this cursor. ! * @param The new cursor position. */ public void setPosition (int position) --- 87,91 ---- /** * Set the position of this cursor. ! * @param position The new cursor position. */ public void setPosition (int position) Index: Lexer.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Lexer.java,v retrieving revision 1.12 retrieving revision 1.13 diff -C2 -d -r1.12 -r1.13 *** Lexer.java 5 Oct 2003 13:49:42 -0000 1.12 --- Lexer.java 13 Oct 2003 21:48:12 -0000 1.13 *************** *** 386,390 **** private void empty (Vector attributes, int[] bookmarks) { ! attributes.addElement (new Attribute (mPage, bookmarks[1], bookmarks[2], bookmarks[2] + 1, bookmarks[2] + 1, (char)0)); //attributes.addElement (new Attribute (mPage.getText (bookmarks[1], bookmarks[2]), "", (char)0)); } --- 386,390 ---- private void empty (Vector attributes, int[] bookmarks) { ! attributes.addElement (new Attribute (mPage, bookmarks[1], bookmarks[2], bookmarks[2] + 1, -1, (char)0)); //attributes.addElement (new Attribute (mPage.getText (bookmarks[1], bookmarks[2]), "", (char)0)); } *************** *** 500,504 **** } break; ! case 1: // within attributre name if ((0 == ch) || ('>' == ch)) { --- 500,504 ---- } break; ! case 1: // within attribute name if ((0 == ch) || ('>' == ch)) { *************** *** 531,534 **** --- 531,540 ---- bookmarks[5] = bookmarks[3]; } + else if (Character.isWhitespace (ch)) + { + empty (attributes, bookmarks); + bookmarks[0] = bookmarks[3]; + state = 0; + } else state = 3; *************** *** 577,582 **** } } ! return (makeTag (cursor, attributes)); } --- 583,721 ---- } } ! ! // OK, before constructing the node, fix up erroneous attributes ! fixAttributes (attributes); ! return (makeTag (cursor, attributes)); + } + + /** + * Try to resolve bad attributes. + * Look for the following patterns and assume what they meant was the + * construct on the right: + * <p>Rule 1. + * <pre> + * att = -> att= + * </pre> + * An attribute named "=", converts a previous standalone attribute into + * an empty attribute. + * <p>Rule 2. + * <pre> + * att =value -> att=value + * </pre> + * An attribute name beginning with an equals sign, is the value of + * a previous standalone attribute. + * <p>Rule 3. + * <pre> + * att= "value" -> att="value" + * </pre> + * A quoted attribute name, is the value of a previous empty + * attribute. + * <p>Rule 4 and Rule 5. + * <pre> + * att="va"lue" -> att='va"lue' + * att='val'ue' -> att="val'ue" + * </pre> + * An attribute name ending in a quote is a second part of a + * similarly quoted value of a previous attribute. Note, this doesn't + * change the quote value but it should, or the contained quote should be + * removed. + * <p>Note: + * <pre> + * att = "value" -> att="value" + * </pre> + * A quoted attribute name, is the value of a previous standalone + * attribute separated by an attribute named "=" will be handled by + * sequential application of rule 1 and 3. + */ + protected void fixAttributes (Vector attributes) throws ParserException + { + Attribute attribute; + Cursor cursor; + char ch1; // name starting character + char ch2; // name ending character + Attribute prev1; // attribute prior to the current + Attribute prev2; // attribute prior but one to the current + char quote; + + cursor = new Cursor (getPage (), 0); + prev1 = null; + prev2 = null; + // leave the name alone & start with second attribute + for (int i = 2; i < attributes.size (); ) + { + attribute = (Attribute)attributes.elementAt (i); + if (!attribute.isWhitespace ()) + { + cursor.setPosition (attribute.getNameStartPosition ()); + ch1 = attribute.getPage ().getCharacter (cursor); + cursor.setPosition (attribute.getNameEndPosition () - 1); + ch2 = attribute.getPage ().getCharacter (cursor); + if ('=' == ch1) + { // possible rule 1 or 2 + // check for a previous standalone, both rules need it, also check prev1 as a sanity check + if (null != prev2 && prev2.isStandAlone () && prev1.isWhitespace ()) + { + if (1 == attribute.getNameEndPosition () - attribute.getNameStartPosition ()) + { // rule 1, an isolated equals sign + prev2.setValueStartPosition (attribute.getNameEndPosition ()); + attributes.removeElementAt (i); // current + attributes.removeElementAt (i - 1); // whitespace + prev1 = prev2; + prev2 = null; + i--; + continue; + } + else + { + // rule 2, name starts with equals + prev2.setValueStartPosition (attribute.getNameStartPosition () + 1); // past the equals sign + prev2.setValueEndPosition (attribute.getNameEndPosition ()); + attributes.removeElementAt (i); // current + attributes.removeElementAt (i - 1); // whitespace + prev1 = prev2; + prev2 = null; + i--; + continue; + } + } + } + else if ((('\'' == ch1) && ('\'' == ch2)) || (('"' == ch1) && ('"' == ch2))) + { // possible rule 3 + // check for a previous empty, also check prev1 as a sanity check + if (null != prev2 && prev2.isEmpty () && prev1.isWhitespace ()) + { // TODO check that name has more than one character + prev2.setValueStartPosition (attribute.getNameStartPosition () + 1); + prev2.setValueEndPosition (attribute.getNameEndPosition () - 1); + prev2.setQuote (ch1); + attributes.removeElementAt (i); // current + attributes.removeElementAt (i - 1); // whitespace + prev1 = prev2; + prev2 = null; + i--; + continue; + } + } + else if (('\'' == ch2) || ('"' == ch2)) + { // possible rule 4 or 5 + // check for a previous valued attribute + if (null != prev1 && prev1.isValued ()) + { // check for a terminating quote of the same type + cursor.setPosition (prev1.getValueEndPosition ()); + ch1 = prev1.getPage ().getCharacter (cursor); // crossing pages with cursor? + if (ch1 == ch2) + { + prev1.setValueEndPosition (attribute.getNameEndPosition () - 1); + attributes.removeElementAt (i); // current + continue; + } + } + } + } + // shift and go on to next attribute + prev2 = prev1; + prev1 = attribute; + i++; + } } Index: Page.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Page.java,v retrieving revision 1.18 retrieving revision 1.19 diff -C2 -d -r1.18 -r1.19 *** Page.java 30 Sep 2003 02:12:34 -0000 1.18 --- Page.java 13 Oct 2003 21:48:12 -0000 1.19 *************** *** 213,217 **** /** * Deserialize the page. ! * @see #writeObject * @param in The object stream to decode. */ --- 213,217 ---- /** * Deserialize the page. ! * For details see <code>writeObject()</code>. * @param in The object stream to decode. */ *************** *** 802,806 **** /** * Get the text line the position of the cursor lies on. ! * @param cursor The position to calculate for. * @return The contents of the URL or file corresponding to the line number * containg the cursor position. --- 802,806 ---- /** * Get the text line the position of the cursor lies on. ! * @param position The position to calculate for. * @return The contents of the URL or file corresponding to the line number * containg the cursor position. |
From: <der...@us...> - 2003-10-13 21:48:50
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser In directory sc8-pr-cvs1:/tmp/cvs-serv16902 Modified Files: Parser.java Log Message: Eliminated ParserHelper static class. Add fixAttributes() to handle bad tags. Provide for more than just an equals sign between the attribute name and the value. Unquote the values in getAttributes() hashtable. Fixed a bug regarding factory creation in script scanner. Returned temporaryFailures classes to servicability. Skip JSP testing, fix tests broken because of unquoted attribute values. Some JavaDoc cleanup. Index: Parser.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/Parser.java,v retrieving revision 1.65 retrieving revision 1.66 diff -C2 -d -r1.65 -r1.66 *** Parser.java 3 Oct 2003 00:20:44 -0000 1.65 --- Parser.java 13 Oct 2003 21:48:11 -0000 1.66 *************** *** 28,43 **** package org.htmlparser; ! ////////////////// ! // Java Imports // ! ////////////////// ! import java.io.BufferedInputStream; import java.io.IOException; - import java.io.InputStream; - import java.io.InputStreamReader; - import java.io.ObjectInputStream; - import java.io.ObjectOutputStream; import java.io.Serializable; ! import java.io.StringReader; ! import java.io.UnsupportedEncodingException; import java.net.URLConnection; import java.util.HashMap; --- 28,37 ---- package org.htmlparser; ! ! import java.io.File; import java.io.IOException; import java.io.Serializable; ! import java.net.MalformedURLException; ! import java.net.URL; import java.net.URLConnection; import java.util.HashMap; *************** *** 45,56 **** import java.util.Map; import java.util.Vector; ! import org.htmlparser.RemarkNode; ! import org.htmlparser.StringNode; import org.htmlparser.lexer.Lexer; import org.htmlparser.lexer.Page; import org.htmlparser.lexer.nodes.NodeFactory; import org.htmlparser.lexer.nodes.TagNode; ! ! import org.htmlparser.parserHelper.ParserHelper; import org.htmlparser.scanners.AppletScanner; import org.htmlparser.scanners.BodyScanner; --- 39,51 ---- import java.util.Map; import java.util.Vector; ! ! import org.htmlparser.Node; import org.htmlparser.lexer.Lexer; import org.htmlparser.lexer.Page; import org.htmlparser.lexer.nodes.NodeFactory; import org.htmlparser.lexer.nodes.TagNode; ! import org.htmlparser.nodeDecorators.DecodingNode; ! import org.htmlparser.nodeDecorators.EscapeCharacterRemovingNode; ! import org.htmlparser.nodeDecorators.NonBreakingSpaceConvertingNode; import org.htmlparser.scanners.AppletScanner; import org.htmlparser.scanners.BodyScanner; *************** *** 76,79 **** --- 71,75 ---- import org.htmlparser.util.DefaultParserFeedback; import org.htmlparser.util.IteratorImpl; + import org.htmlparser.util.LinkProcessor; import org.htmlparser.util.NodeIterator; import org.htmlparser.util.NodeList; *************** *** 326,330 **** public Parser(String resourceLocn, ParserFeedback feedback) throws ParserException { ! this (ParserHelper.openConnection (resourceLocn, feedback), feedback); } --- 322,326 ---- public Parser(String resourceLocn, ParserFeedback feedback) throws ParserException { ! this (openConnection (resourceLocn, feedback), feedback); } *************** *** 419,423 **** { if ((null != url) && !"".equals (url)) ! setConnection (ParserHelper.openConnection (url, getFeedback ())); } --- 415,419 ---- { if ((null != url) && !"".equals (url)) ! setConnection (openConnection (url, getFeedback ())); } *************** *** 520,533 **** } - public TagScanner getPreviousOpenScanner () - { - return (mScanner); - } - - public void setPreviousOpenScanner (TagScanner scanner) - { - mScanner = scanner; - } - // // Public methods --- 516,519 ---- *************** *** 583,592 **** public NodeIterator elements() throws ParserException { - return (createIteratorImpl ()); - } - - public IteratorImpl createIteratorImpl() - throws ParserException - { boolean remove_scanner; Node node; --- 569,572 ---- *************** *** 783,786 **** --- 763,856 ---- /** + * Opens a connection using the given url. + * @param url The url to open. + * @param feedback The ibject to use for messages or <code>null</code>. + * @exception ParserException if an i/o exception occurs accessing the url. + */ + public static URLConnection openConnection (URL url, ParserFeedback feedback) + throws + ParserException + { + URLConnection ret; + + try + { + ret = url.openConnection (); + } + catch (IOException ioe) + { + String msg = "HTMLParser.openConnection() : Error in opening a connection to " + url.toExternalForm (); + ParserException ex = new ParserException (msg, ioe); + if (null != feedback) + feedback.error (msg, ex); + throw ex; + } + + return (ret); + } + + /** + * Opens a connection based on a given string. + * The string is either a file, in which case <code>file://localhost</code> + * is prepended to a canonical path derived from the string, or a url that + * begins with one of the known protocol strings, i.e. <code>http://</code>. + * Embedded spaces are silently converted to %20 sequences. + * @param string The name of a file or a url. + * @param feedback The object to use for messages or <code>null</code> for no feedback. + * @exception ParserException if the string is not a valid url or file. + */ + public static URLConnection openConnection (String string, ParserFeedback feedback) + throws + ParserException + { + final String prefix = "file://localhost"; + String resource; + URL url; + StringBuffer buffer; + URLConnection ret; + + try + { + url = new URL (LinkProcessor.fixSpaces (string)); + ret = openConnection (url, feedback); + } + catch (MalformedURLException murle) + { // try it as a file + try + { + File file = new File (string); + resource = file.getCanonicalPath (); + buffer = new StringBuffer (prefix.length () + resource.length ()); + buffer.append (prefix); + if (!resource.startsWith ("/")) + buffer.append ("/"); + buffer.append (resource); + url = new URL (LinkProcessor.fixSpaces (buffer.toString ())); + ret = openConnection (url, feedback); + if (null != feedback) + feedback.info (url.toExternalForm ()); + } + catch (MalformedURLException murle2) + { + String msg = "HTMLParser.openConnection() : Error in opening a connection to " + string; + ParserException ex = new ParserException (msg, murle2); + if (null != feedback) + feedback.error (msg, ex); + throw ex; + } + catch (IOException ioe) + { + String msg = "HTMLParser.openConnection() : Error in opening a connection to " + string; + ParserException ex = new ParserException (msg, ioe); + if (null != feedback) + feedback.error (msg, ex); + throw ex; + } + } + + return (ret); + } + + /** * The main program, which can be executed from the command line */ *************** *** 917,921 **** public Node createStringNode (Lexer lexer, int start, int end) { ! return (new StringNode (lexer.getPage (), start, end)); } --- 987,1004 ---- public Node createStringNode (Lexer lexer, int start, int end) { ! Node ret; ! ! ret = new StringNode (lexer.getPage (), start, end); ! if (null != stringNodeFactory) ! { ! if (stringNodeFactory.shouldDecodeNodes ()) ! ret = new DecodingNode (ret); ! if (stringNodeFactory.shouldRemoveEscapeCharacters ()) ! ret = new EscapeCharacterRemovingNode (ret); ! if (stringNodeFactory.shouldConvertNonBreakingSpace ()) ! ret = new NonBreakingSpaceConvertingNode (ret); ! } ! ! return (ret); } *************** *** 933,936 **** --- 1016,1021 ---- /** * Create a new tag node. + * This recurses into the created tag by calling the tag's scanner, + * if it is in the list of registered scanners. * @param lexer The lexer parsing this tag. * @param start The beginning position of the tag. *************** *** 953,960 **** name = ret.getTagName (); scanner = (TagScanner)scanners.get (name); ! save = getPreviousOpenScanner (); if ((null != scanner) && scanner.evaluate (ret.getText (), save)) { ! setPreviousOpenScanner (scanner); try { --- 1038,1045 ---- name = ret.getTagName (); scanner = (TagScanner)scanners.get (name); ! save = mScanner; if ((null != scanner) && scanner.evaluate (ret.getText (), save)) { ! mScanner = scanner; try { *************** *** 963,967 **** finally { ! setPreviousOpenScanner (save); } } --- 1048,1052 ---- finally { ! mScanner = save; } } |
From: <der...@us...> - 2003-10-13 21:48:21
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/temporaryFailures In directory sc8-pr-cvs1:/tmp/cvs-serv16902/tests/temporaryFailures Removed Files: AttributeParserTest.java TagParserTest.java Log Message: Eliminated ParserHelper static class. Add fixAttributes() to handle bad tags. Provide for more than just an equals sign between the attribute name and the value. Unquote the values in getAttributes() hashtable. Fixed a bug regarding factory creation in script scanner. Returned temporaryFailures classes to servicability. Skip JSP testing, fix tests broken because of unquoted attribute values. Some JavaDoc cleanup. --- AttributeParserTest.java DELETED --- --- TagParserTest.java DELETED --- |
From: <der...@us...> - 2003-10-13 21:48:20
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserHelper In directory sc8-pr-cvs1:/tmp/cvs-serv16902/parserHelper Removed Files: ParserHelper.java Log Message: Eliminated ParserHelper static class. Add fixAttributes() to handle bad tags. Provide for more than just an equals sign between the attribute name and the value. Unquote the values in getAttributes() hashtable. Fixed a bug regarding factory creation in script scanner. Returned temporaryFailures classes to servicability. Skip JSP testing, fix tests broken because of unquoted attribute values. Some JavaDoc cleanup. --- ParserHelper.java DELETED --- |
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests In directory sc8-pr-cvs1:/tmp/cvs-serv2828/tests/tagTests Modified Files: AppletTagTest.java BaseHrefTagTest.java EndTagTest.java FormTagTest.java FrameSetTagTest.java FrameTagTest.java ImageTagTest.java InputTagTest.java LinkTagTest.java MetaTagTest.java OptionTagTest.java ScriptTagTest.java SelectTagTest.java StyleTagTest.java TagTest.java TextareaTagTest.java Log Message: Updated tests to correspond to new behaviour. Mostly due to changes in order and case of tag contents. Of the forty odd remaining failing tests, the majority comprise altered functionality that needs to be resolved. Index: AppletTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/AppletTagTest.java,v retrieving revision 1.29 retrieving revision 1.30 diff -C2 -d -r1.29 -r1.30 *** AppletTagTest.java 5 Oct 2003 13:49:54 -0000 1.29 --- AppletTagTest.java 6 Oct 2003 01:43:28 -0000 1.30 *************** *** 99,103 **** String [][]paramsData = {{"Param1","Value1"},{"Name","Somik"},{"Age","23"}}; Hashtable paramsMap = new Hashtable(); ! String testHTML = new String("<APPLET CODE=Myclass.class ARCHIVE=test.jar CODEBASE=www.kizna.com>\n"); for (int i = 0;i<paramsData.length;i++) { --- 99,103 ---- String [][]paramsData = {{"Param1","Value1"},{"Name","Somik"},{"Age","23"}}; Hashtable paramsMap = new Hashtable(); ! String testHTML = "<APPLET CODE=Myclass.class ARCHIVE=test.jar CODEBASE=www.kizna.com>\n"; for (int i = 0;i<paramsData.length;i++) { *************** *** 105,112 **** paramsMap.put(paramsData[i][0],paramsData[i][1]); } ! testHTML+= ! "</APPLET>\n"+ ! "</HTML>"; ! createParser(testHTML); parser.registerScanners(); parseAndAssertNodeCount(3); --- 105,111 ---- paramsMap.put(paramsData[i][0],paramsData[i][1]); } ! testHTML += ! "</APPLET>"; ! createParser(testHTML + "\n</HTML>"); parser.registerScanners(); parseAndAssertNodeCount(3); *************** *** 115,125 **** appletTag.setArchive ("htmlparser.jar"); // Check the data in the applet tag ! String expectedRawString = ! "<APPLET CODE=Myclass.class ARCHIVE=htmlparser.jar CODEBASE=htmlparser.sourceforge.net>\n"+ ! "<PARAM NAME=\"Param1\" VALUE=\"Value1\">\n"+ ! "<PARAM NAME=\"Name\" VALUE=\"Somik\">\n"+ ! "<PARAM NAME=\"Age\" VALUE=\"23\">\n"+ ! "</APPLET>"; ! assertStringEquals("toHTML()",expectedRawString,appletTag.toHtml()); } --- 114,121 ---- appletTag.setArchive ("htmlparser.jar"); // Check the data in the applet tag ! testHTML = testHTML.substring (0, testHTML.indexOf ("test.jar")) ! + "htmlparser.jar" ! + testHTML.substring (testHTML.indexOf ("test.jar") + 8); ! assertStringEquals("toHTML()",testHTML,appletTag.toHtml()); } *************** *** 134,140 **** } testHTML+= ! "</APPLET>\n"+ ! "</HTML>"; ! createParser(testHTML); parser.registerScanners(); parseAndAssertNodeCount(3); --- 130,135 ---- } testHTML+= ! "</APPLET>"; ! createParser(testHTML + "\n</HTML>"); parser.registerScanners(); parseAndAssertNodeCount(3); *************** *** 143,153 **** appletTag.setAppletClass ("MyOtherClass.class"); // Check the data in the applet tag ! String expectedRawString = ! "<APPLET CODE=MyOtherClass.class ARCHIVE=htmlparser.jar CODEBASE=htmlparser.sourceforge.net>\n"+ ! "<PARAM NAME=\"Param1\" VALUE=\"Value1\">\n"+ ! "<PARAM NAME=\"Name\" VALUE=\"Somik\">\n"+ ! "<PARAM NAME=\"Age\" VALUE=\"23\">\n"+ ! "</APPLET>"; ! assertStringEquals("toHTML()",expectedRawString,appletTag.toHtml()); } --- 138,145 ---- appletTag.setAppletClass ("MyOtherClass.class"); // Check the data in the applet tag ! testHTML = testHTML.substring (0, testHTML.indexOf ("Myclass.class")) ! + "MyOtherClass.class" ! + testHTML.substring (testHTML.indexOf ("Myclass.class") + 13); ! assertStringEquals("toHTML()",testHTML,appletTag.toHtml()); } Index: BaseHrefTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/BaseHrefTagTest.java,v retrieving revision 1.28 retrieving revision 1.29 diff -C2 -d -r1.28 -r1.29 *** BaseHrefTagTest.java 28 Sep 2003 15:33:59 -0000 1.28 --- BaseHrefTagTest.java 6 Oct 2003 01:43:28 -0000 1.29 *************** *** 29,32 **** --- 29,33 ---- package org.htmlparser.tests.tagTests; + import java.util.Vector; import org.htmlparser.tags.BaseHrefTag; import org.htmlparser.tags.data.TagData; *************** *** 41,47 **** public void testConstruction() { ! fail ("not implemented"); ! // BaseHrefTag baseRefTag = new BaseHrefTag(new TagData(0,0,"",""),"http://www.abc.com"); ! // assertEquals("Expected Base URL","http://www.abc.com",baseRefTag.getBaseUrl()); } --- 42,47 ---- public void testConstruction() { ! BaseHrefTag baseRefTag = new BaseHrefTag(new TagData("BASE", 0, new Vector (), "http://www.abc.com", false),"http://www.abc.com"); ! assertEquals("Expected Base URL","http://www.abc.com",baseRefTag.getBaseUrl()); } Index: EndTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/EndTagTest.java,v retrieving revision 1.30 retrieving revision 1.31 diff -C2 -d -r1.30 -r1.31 *** EndTagTest.java 28 Sep 2003 15:33:59 -0000 1.30 --- EndTagTest.java 6 Oct 2003 01:43:28 -0000 1.31 *************** *** 58,63 **** int pos = testHtml.indexOf("</SCRIPT>"); parseAndAssertNodeCount(4); ! assertTrue("Node should be a Tag",node[1] instanceof Tag); ! Tag endTag = (Tag)node[1]; assertTrue("Node should be an end Tag",endTag.isEndTag ()); assertEquals("endtag element begin",pos,endTag.elementBegin()); --- 58,63 ---- int pos = testHtml.indexOf("</SCRIPT>"); parseAndAssertNodeCount(4); ! assertTrue("Node should be a Tag",node[2] instanceof Tag); ! Tag endTag = (Tag)node[2]; assertTrue("Node should be an end Tag",endTag.isEndTag ()); assertEquals("endtag element begin",pos,endTag.elementBegin()); Index: FormTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/FormTagTest.java,v retrieving revision 1.32 retrieving revision 1.33 diff -C2 -d -r1.32 -r1.33 *** FormTagTest.java 22 Sep 2003 02:40:12 -0000 1.32 --- FormTagTest.java 6 Oct 2003 01:43:28 -0000 1.33 *************** *** 55,62 **** formTag.setFormLocation("http://www.yahoo.com/yahoo/do_not_login.jsp"); ! ! String expectedHTML = "<FORM ACTION=\"http://www.yahoo.com/yahoo/do_not_login.jsp\" NAME=\"login_form\" ONSUBMIT=\"return CheckData()\" METHOD=\""+FormTag.POST+"\">\r\n"+ ! FormScannerTest.EXPECTED_FORM_HTML_REST_OF_FORM; ! assertStringEquals("Raw String",expectedHTML,formTag.toHtml()); } --- 55,63 ---- formTag.setFormLocation("http://www.yahoo.com/yahoo/do_not_login.jsp"); ! String expected = ! FormScannerTest.FORM_HTML.substring (0, FormScannerTest.FORM_HTML.indexOf ("\"do_login.php\"")) ! + "http://www.google.com/test/do_login.php" ! + FormScannerTest.FORM_HTML.substring (FormScannerTest.FORM_HTML.indexOf ("\"do_login.php\"") + 14); ! assertStringEquals("Raw String",expected,formTag.toHtml()); } *************** *** 68,72 **** assertTrue("Node 0 should be Form Tag",node[0] instanceof FormTag); FormTag formTag = (FormTag)node[0]; ! assertStringEquals("Form Tag string representation"," User NamePassword Contents of TextArea",formTag.toPlainTextString()); } --- 69,73 ---- assertTrue("Node 0 should be Form Tag",node[0] instanceof FormTag); FormTag formTag = (FormTag)node[0]; ! assertStringEquals("Form Tag string representation","\n \nUser Name\n\nPassword\n\n \n\n \nContents of TextArea\n\n\n", formTag.toPlainTextString()); } Index: FrameSetTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/FrameSetTagTest.java,v retrieving revision 1.29 retrieving revision 1.30 diff -C2 -d -r1.29 -r1.30 *** FrameSetTagTest.java 22 Sep 2003 02:40:12 -0000 1.29 --- FrameSetTagTest.java 6 Oct 2003 01:43:28 -0000 1.30 *************** *** 42,50 **** public void testToHTML() throws ParserException{ ! createParser( ! "<frameset rows=\"115,*\" frameborder=\"NO\" border=\"0\" framespacing=\"0\">\n"+ "<frame name=\"topFrame\" noresize src=\"demo_bc_top.html\" scrolling=\"NO\" frameborder=\"NO\">\n"+ "<frame name=\"mainFrame\" src=\"http://www.kizna.com/web_e/\" scrolling=\"AUTO\">\n"+ ! "</frameset>"); parser.addScanner(new FrameSetScanner("")); --- 42,50 ---- public void testToHTML() throws ParserException{ ! String html = "<frameset rows=\"115,*\" frameborder=\"NO\" border=\"0\" framespacing=\"0\">\n"+ "<frame name=\"topFrame\" noresize src=\"demo_bc_top.html\" scrolling=\"NO\" frameborder=\"NO\">\n"+ "<frame name=\"mainFrame\" src=\"http://www.kizna.com/web_e/\" scrolling=\"AUTO\">\n"+ ! "</frameset>"; ! createParser(html); parser.addScanner(new FrameSetScanner("")); *************** *** 52,63 **** parseAndAssertNodeCount(1); ! assertTrue("Node 0 should be End Tag",node[0] instanceof FrameSetTag); FrameSetTag frameSetTag = (FrameSetTag)node[0]; ! assertStringEquals("HTML Contents", ! "<FRAMESET BORDER=\"0\" ROWS=\"115,*\" FRAMESPACING=\"0\" FRAMEBORDER=\"NO\">\r\n"+ ! "<FRAME SCROLLING=\"NO\" FRAMEBORDER=\"NO\" SRC=\"demo_bc_top.html\" NAME=\"topFrame\" NORESIZE>\r\n"+ ! "<FRAME SCROLLING=\"AUTO\" SRC=\"http://www.kizna.com/web_e/\" NAME=\"mainFrame\">\r\n"+ ! "</FRAMESET>", ! frameSetTag.toHtml()); } } --- 52,58 ---- parseAndAssertNodeCount(1); ! assertTrue("Node 0 should be a FrameSetTag",node[0] instanceof FrameSetTag); FrameSetTag frameSetTag = (FrameSetTag)node[0]; ! assertStringEquals("HTML Contents", html, frameSetTag.toHtml()); } } Index: FrameTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/FrameTagTest.java,v retrieving revision 1.29 retrieving revision 1.30 diff -C2 -d -r1.29 -r1.30 *** FrameTagTest.java 22 Sep 2003 02:40:12 -0000 1.29 --- FrameTagTest.java 6 Oct 2003 01:43:28 -0000 1.30 *************** *** 41,60 **** public void testToHTML() throws ParserException { createParser( "<frameset rows=\"115,*\" frameborder=\"NO\" border=\"0\" framespacing=\"0\">\n"+ ! "<frame name=\"topFrame\" noresize src=\"demo_bc_top.html\" scrolling=\"NO\" frameborder=\"NO\">\n"+ ! "<frame name=\"mainFrame\" src=\"http://www.kizna.com/web_e/\" scrolling=\"AUTO\">\n"+ "</frameset>"); parser.addScanner(new FrameScanner("")); ! parseAndAssertNodeCount(4); ! assertTrue("Node 1 should be Frame Tag",node[1] instanceof FrameTag); ! assertTrue("Node 2 should be Frame Tag",node[2] instanceof FrameTag); ! FrameTag frameTag1 = (FrameTag)node[1]; ! FrameTag frameTag2 = (FrameTag)node[2]; ! assertStringEquals("Frame 1 toHTML()","<FRAME SCROLLING=\"NO\" FRAMEBORDER=\"NO\" SRC=\"demo_bc_top.html\" NAME=\"topFrame\" NORESIZE>",frameTag1.toHtml()); ! assertStringEquals("Frame 2 toHTML()","<FRAME SCROLLING=\"AUTO\" SRC=\"http://www.kizna.com/web_e/\" NAME=\"mainFrame\">",frameTag2.toHtml()); } } --- 41,62 ---- public void testToHTML() throws ParserException { + String frame1 = "<frame name=\"topFrame\" noresize src=\"demo_bc_top.html\" scrolling=\"NO\" frameborder=\"NO\">"; + String frame2 = "<frame name=\"mainFrame\" src=\"http://www.kizna.com/web_e/\" scrolling=\"AUTO\">"; createParser( "<frameset rows=\"115,*\" frameborder=\"NO\" border=\"0\" framespacing=\"0\">\n"+ ! frame1 + "\n"+ ! frame2 + "\n"+ "</frameset>"); parser.addScanner(new FrameScanner("")); ! parseAndAssertNodeCount(7); ! assertTrue("Node 3 should be Frame Tag",node[2] instanceof FrameTag); ! assertTrue("Node 5 should be Frame Tag",node[4] instanceof FrameTag); ! FrameTag frameTag1 = (FrameTag)node[2]; ! FrameTag frameTag2 = (FrameTag)node[4]; ! assertStringEquals("Frame 1 toHTML()",frame1,frameTag1.toHtml()); ! assertStringEquals("Frame 2 toHTML()",frame2,frameTag2.toHtml()); } } Index: ImageTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/ImageTagTest.java,v retrieving revision 1.31 retrieving revision 1.32 diff -C2 -d -r1.31 -r1.32 *** ImageTagTest.java 22 Sep 2003 02:40:12 -0000 1.31 --- ImageTagTest.java 6 Oct 2003 01:43:28 -0000 1.32 *************** *** 142,146 **** public void testToHTML() throws ParserException { ! createParser("<IMG alt=Google height=115 src=\"../../goo/title_homepage4.gif\" width=305>","http://www.google.com/test/test/index.html"); // Register the image scanner parser.addScanner(new ImageScanner("-i",new LinkProcessor())); --- 142,147 ---- public void testToHTML() throws ParserException { ! String img = "<IMG alt=Google height=115 src=\"../../goo/title_homepage4.gif\" width=305>"; ! createParser(img,"http://www.google.com/test/test/index.html"); // Register the image scanner parser.addScanner(new ImageScanner("-i",new LinkProcessor())); *************** *** 150,154 **** assertTrue("Node should be a ImageTag",node[0] instanceof ImageTag); ImageTag imageTag = (ImageTag)node[0]; ! assertStringEquals("The image locn","<IMG WIDTH=\"305\" ALT=\"Google\" SRC=\"../../goo/title_homepage4.gif\" HEIGHT=\"115\">",imageTag.toHtml()); assertEquals("Alt","Google",imageTag.getAttribute("alt")); assertEquals("Height","115",imageTag.getAttribute("height")); --- 151,155 ---- assertTrue("Node should be a ImageTag",node[0] instanceof ImageTag); ImageTag imageTag = (ImageTag)node[0]; ! assertStringEquals("toHtml",img,imageTag.toHtml()); assertEquals("Alt","Google",imageTag.getAttribute("alt")); assertEquals("Height","115",imageTag.getAttribute("height")); Index: InputTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/InputTagTest.java,v retrieving revision 1.31 retrieving revision 1.32 diff -C2 -d -r1.31 -r1.32 *** InputTagTest.java 22 Sep 2003 02:40:12 -0000 1.31 --- InputTagTest.java 6 Oct 2003 01:43:28 -0000 1.32 *************** *** 54,62 **** public void testToHTML() throws ParserException { parseAndAssertNodeCount(1); assertTrue("Node 1 should be INPUT Tag",node[0] instanceof InputTag); InputTag InputTag; InputTag = (InputTag) node[0]; ! assertStringEquals ("HTML String","<INPUT NAME=\"Google\" TYPE=\"text\">",InputTag.toHtml()); } --- 54,63 ---- public void testToHTML() throws ParserException { + String testHTML = "<INPUT type=\"text\" name=\"Google\">"; parseAndAssertNodeCount(1); assertTrue("Node 1 should be INPUT Tag",node[0] instanceof InputTag); InputTag InputTag; InputTag = (InputTag) node[0]; ! assertStringEquals ("HTML String",testHTML,InputTag.toHtml()); } *************** *** 67,71 **** InputTag InputTag; InputTag = (InputTag) node[0]; ! assertEquals("HTML Raw String","INPUT TAG\n--------\nNAME : Google\nTYPE : text\n",InputTag.toString()); } --- 68,72 ---- InputTag InputTag; InputTag = (InputTag) node[0]; ! assertEquals("HTML Raw String","INPUT TAG\n--------\nNAME : \"Google\"\nTYPE : \"text\"\n",InputTag.toString()); } *************** *** 76,81 **** public void testToHTML2() throws ParserException { ! String testHTML = new String("<INPUT type=\"checkbox\" " ! +"name=\"cbCheck\" checked>"); createParser(testHTML); parser.addScanner(new InputTagScanner("-i")); --- 77,82 ---- public void testToHTML2() throws ParserException { ! String testHTML ="<INPUT type=\"checkbox\" " ! +"name=\"cbCheck\" checked>"; createParser(testHTML); parser.addScanner(new InputTagScanner("-i")); *************** *** 86,92 **** InputTag InputTag; InputTag = (InputTag) node[0]; ! assertStringEquals("HTML String", ! "<INPUT CHECKED NAME=\"cbCheck\" TYPE=\"checkbox\">", ! InputTag.toHtml()); } --- 87,91 ---- InputTag InputTag; InputTag = (InputTag) node[0]; ! assertStringEquals("HTML String", testHTML, InputTag.toHtml()); } Index: LinkTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/LinkTagTest.java,v retrieving revision 1.36 retrieving revision 1.37 diff -C2 -d -r1.36 -r1.37 *** LinkTagTest.java 5 Oct 2003 13:49:54 -0000 1.36 --- LinkTagTest.java 6 Oct 2003 01:43:28 -0000 1.37 *************** *** 29,32 **** --- 29,33 ---- package org.htmlparser.tests.tagTests; + import java.util.Vector; import org.htmlparser.Parser; import org.htmlparser.scanners.LinkScanner; *************** *** 124,130 **** public void testLinkNodeBug5() throws ParserException { ! createParser("<a href=http://note.kimo.com.tw/>µ§°O</a> <a \n"+ ! "href=http://photo.kimo.com.tw/>¬Ûï</a> <a\n"+ ! "href=http://address.kimo.com.tw/>³q°T¿ý</a> ","http://www.cj.com"); Parser.setLineSeparator("\r\n"); // Register the image scanner --- 125,134 ---- public void testLinkNodeBug5() throws ParserException { ! String link1 = "http://note.kimo.com.tw/"; ! String link2 = "http://photo.kimo.com.tw/"; ! String link3 = "http://address.kimo.com.tw/"; ! createParser("<a href=" + link1 + ">µ§°O</a> <a \n"+ ! "href=" + link2 + ">¬Ûï</a> <a\n"+ ! "href=" + link3 + ">³q°T¿ý</a> ","http://www.cj.com"); Parser.setLineSeparator("\r\n"); // Register the image scanner *************** *** 132,146 **** parseAndAssertNodeCount(6); ! // The node should be an LinkTag ! assertTrue("Node should be a LinkTag",node[0] instanceof LinkTag); LinkTag linkNode = (LinkTag)node[2]; ! assertStringEquals("Link incorrect!!","http://photo.kimo.com.tw",linkNode.getLink()); ! assertEquals("Link beginning",new Integer(48),new Integer(linkNode.elementBegin())); ! assertEquals("Link ending",new Integer(38),new Integer(linkNode.elementEnd())); ! ! LinkTag linkNode2 = (LinkTag)node[4]; ! assertStringEquals("Link incorrect!!","http://address.kimo.com.tw",linkNode2.getLink()); ! assertEquals("Link beginning",new Integer(46),new Integer(linkNode2.elementBegin())); ! assertEquals("Link ending",new Integer(42),new Integer(linkNode2.elementEnd())); } --- 136,145 ---- parseAndAssertNodeCount(6); ! assertTrue("Node should be a LinkTag",node[2] instanceof LinkTag); LinkTag linkNode = (LinkTag)node[2]; ! assertStringEquals("Link incorrect!!",link2,linkNode.getLink()); ! assertTrue("Node should be a LinkTag",node[4] instanceof LinkTag); ! LinkTag linkNode2 = (LinkTag)node[4]; ! assertStringEquals("Link incorrect!!",link3,linkNode2.getLink()); } *************** *** 268,275 **** public void testToHTML() throws ParserException { ! createParser("<A HREF='mailto:so...@ya...'>hello</A>\n"+ ! "<LI><font color=\"FF0000\" size=-1><b>Tech Samachar:</b></font><a \n"+ "href=\"http://ads.samachar.com/bin/redirect/tech.txt?http://www.samachar.com/tech\n"+ ! "nical.html\"> Journalism 3.0</a> by Rajesh Jain","http://www.cj.com/"); Parser.setLineSeparator("\r\n"); // Register the image scanner --- 267,277 ---- public void testToHTML() throws ParserException { ! String link1 = "<A HREF='mailto:so...@ya...'>hello</A>"; ! String link2 = "<a \n"+ "href=\"http://ads.samachar.com/bin/redirect/tech.txt?http://www.samachar.com/tech\n"+ ! "nical.html\"> Journalism 3.0</a>"; ! createParser(link1 + "\n"+ ! "<LI><font color=\"FF0000\" size=-1><b>Tech Samachar:</b></font>" + ! link2 + " by Rajesh Jain","http://www.cj.com/"); Parser.setLineSeparator("\r\n"); // Register the image scanner *************** *** 279,362 **** assertTrue("First Node should be a LinkTag",node[0] instanceof LinkTag); LinkTag linkTag = (LinkTag)node[0]; ! assertStringEquals("Link Raw Text","<A HREF=\"mailto:so...@ya...\">hello</A>",linkTag.toHtml()); assertTrue("Ninth Node should be a HTMLLinkTag",node[8] instanceof LinkTag); linkTag = (LinkTag)node[8]; ! assertStringEquals("Link Raw Text","<A HREF=\"http://ads.samachar.com/bin/redirect/tech.txt?http://www.samachar.com/tech\nnical.html\"> Journalism 3.0</A>",linkTag.toHtml()); } public void testTypeHttps() throws ParserException{ ! fail ("not implemented"); ! // LinkTag linkTag = ! // new LinkTag( ! // new TagData(0,0,"",""), ! // new CompositeTagData(null,null,null), ! // new LinkData("https://www.someurl.com","","",false,false) ! // ); ! // assertTrue("This is a https link",linkTag.isHTTPSLink()); } public void testTypeFtp() throws ParserException{ ! fail ("not implemented"); ! // LinkTag linkTag = ! // new LinkTag( ! // new TagData(0,0,"",""), ! // new CompositeTagData(null,null,null), ! // new LinkData("ftp://www.someurl.com","","",false,false) ! // ); ! // assertTrue("This is an ftp link",linkTag.isFTPLink()); } public void testTypeJavaScript() throws ParserException { ! fail ("not implemented"); ! // LinkTag linkTag = ! // new LinkTag( ! // new TagData(0,0,"",""), ! // new CompositeTagData(null,null,null), ! // new LinkData("javascript://www.someurl.com","","",false,true) ! // ); ! // assertTrue("This is a javascript link",linkTag.isJavascriptLink()); } public void testTypeHttpLink() throws ParserException { ! fail ("not implemented"); ! // LinkTag linkTag = ! // new LinkTag( ! // new TagData(0,0,"",""), ! // new CompositeTagData(null,null,null), ! // new LinkData("http://www.someurl.com","","",false,false) ! // ); ! // assertTrue("This is a http link : "+linkTag.getLink(),linkTag.isHTTPLink()); ! // linkTag = ! // new LinkTag( ! // new TagData(0,0,"",""), ! // new CompositeTagData(null,null,null), ! // new LinkData("somePage.html","","",false,false) ! // ); ! // assertTrue("This relative link is alsp a http link : "+linkTag.getLink(),linkTag.isHTTPLink()); ! // linkTag = ! // new LinkTag( ! // new TagData(0,0,"",""), ! // new CompositeTagData(null,null,null), ! // new LinkData("ftp://somePage.html","","",false,false) ! // ); ! // assertTrue("This is not a http link : "+linkTag.getLink(),!linkTag.isHTTPLink()); } public void testTypeHttpLikeLink() throws ParserException { ! fail ("not implemented"); ! // LinkTag linkTag = ! // new LinkTag( ! // new TagData(0,0,"",""), ! // new CompositeTagData(null,null,null), ! // new LinkData("http://","","",false,false) ! // ); ! // assertTrue("This is a http link",linkTag.isHTTPLikeLink()); ! // LinkTag linkTag2 = ! // new LinkTag( ! // new TagData(0,0,"",""), ! // new CompositeTagData(null,null,null), ! // new LinkData("https://www.someurl.com","","",false,false) ! // ); ! // assertTrue("This is a https link",linkTag2.isHTTPLikeLink()); } --- 281,359 ---- assertTrue("First Node should be a LinkTag",node[0] instanceof LinkTag); LinkTag linkTag = (LinkTag)node[0]; ! assertStringEquals("Link Raw Text",link1,linkTag.toHtml()); assertTrue("Ninth Node should be a HTMLLinkTag",node[8] instanceof LinkTag); linkTag = (LinkTag)node[8]; ! assertStringEquals("Link Raw Text",link2,linkTag.toHtml()); } public void testTypeHttps() throws ParserException{ ! LinkTag linkTag = ! new LinkTag( ! new TagData("A", 0, new Vector (), "http://www.someurl.com", false), ! new CompositeTagData(null,null,null), ! new LinkData("https://www.someurl.com","","",false,false) ! ); ! assertTrue("This is a https link",linkTag.isHTTPSLink()); } public void testTypeFtp() throws ParserException{ ! LinkTag linkTag = ! new LinkTag( ! new TagData("A", 0, new Vector (), "http://www.someurl.com", false), ! new CompositeTagData(null,null,null), ! new LinkData("ftp://www.someurl.com","","",false,false) ! ); ! assertTrue("This is an ftp link",linkTag.isFTPLink()); } public void testTypeJavaScript() throws ParserException { ! LinkTag linkTag = ! new LinkTag( ! new TagData("A", 0, new Vector (), "http://www.someurl.com", false), ! new CompositeTagData(null,null,null), ! new LinkData("javascript://www.someurl.com","","",false,true) ! ); ! assertTrue("This is a javascript link",linkTag.isJavascriptLink()); } public void testTypeHttpLink() throws ParserException { ! LinkTag linkTag = ! new LinkTag( ! new TagData("A", 0, new Vector (), "http://www.someurl.com", false), ! new CompositeTagData(null,null,null), ! new LinkData("http://www.someurl.com","","",false,false) ! ); ! assertTrue("This is a http link : "+linkTag.getLink(),linkTag.isHTTPLink()); ! linkTag = ! new LinkTag( ! new TagData("A", 0, new Vector (), "http://www.someurl.com", false), ! new CompositeTagData(null,null,null), ! new LinkData("somePage.html","","",false,false) ! ); ! assertTrue("This relative link is alsp a http link : "+linkTag.getLink(),linkTag.isHTTPLink()); ! linkTag = ! new LinkTag( ! new TagData("A", 0, new Vector (), "http://www.someurl.com", false), ! new CompositeTagData(null,null,null), ! new LinkData("ftp://somePage.html","","",false,false) ! ); ! assertTrue("This is not a http link : "+linkTag.getLink(),!linkTag.isHTTPLink()); } public void testTypeHttpLikeLink() throws ParserException { ! LinkTag linkTag = ! new LinkTag( ! new TagData("A", 0, new Vector (), "http://www.someurl.com", false), ! new CompositeTagData(null,null,null), ! new LinkData("http://","","",false,false) ! ); ! assertTrue("This is a http link",linkTag.isHTTPLikeLink()); ! LinkTag linkTag2 = ! new LinkTag( ! new TagData("A", 0, new Vector (), "http://www.someurl.com", false), ! new CompositeTagData(null,null,null), ! new LinkData("https://www.someurl.com","","",false,false) ! ); ! assertTrue("This is a https link",linkTag2.isHTTPLikeLink()); } *************** *** 384,396 **** public void testIrcIsNotAHTTPLink () throws ParserException { ! fail ("not implemented"); ! // LinkTag link; ! // ! // link = new LinkTag( ! // new TagData(0,0,"",""), ! // new CompositeTagData(null,null,null), ! // new LinkData("irc://server/channel","","",false,false) ! // ); ! // assertTrue("This is not a http link", !link.isHTTPLikeLink ()); } } --- 381,392 ---- public void testIrcIsNotAHTTPLink () throws ParserException { ! LinkTag link; ! ! link = new LinkTag( ! new TagData("A", 0, new Vector (), "http://www.someurl.com", false), ! new CompositeTagData(null,null,null), ! new LinkData("irc://server/channel","","",false,false) ! ); ! assertTrue("This is not a http link", !link.isHTTPLikeLink ()); } } Index: MetaTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/MetaTagTest.java,v retrieving revision 1.30 retrieving revision 1.31 diff -C2 -d -r1.30 -r1.31 *** MetaTagTest.java 5 Oct 2003 13:49:54 -0000 1.30 --- MetaTagTest.java 6 Oct 2003 01:43:28 -0000 1.31 *************** *** 40,43 **** --- 40,46 ---- public void testToHTML() throws ParserException { + String description = "\"description\""; + String content = "\"Protecting the internet community through technology, not legislation. SpamCop eliminates spam. Automatically file spam reports with the network administrators who can stop spam at the source. Subscribe, and filter your email through powerful statistical analysis before it reaches your inbox.\""; + String tag = "<META name=" + description + " content=" + content + ">"; createParser( "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0//EN\">\n"+ *************** *** 45,49 **** "<head><title>SpamCop - Welcome to SpamCop\n"+ "</title>\n"+ ! "<META name=\"description\" content=\"Protecting the internet community through technology, not legislation. SpamCop eliminates spam. Automatically file spam reports with the network administrators who can stop spam at the source. Subscribe, and filter your email through powerful statistical analysis before it reaches your inbox.\">\n"+ "<META name=\"keywords\" content=\"SpamCop spam cop email filter abuse header headers parse parser utility script net net-abuse filter mail program system trace traceroute dns\">\n"+ "<META name=\"language\" content=\"en\">\n"+ --- 48,52 ---- "<head><title>SpamCop - Welcome to SpamCop\n"+ "</title>\n"+ ! tag + "\n"+ "<META name=\"keywords\" content=\"SpamCop spam cop email filter abuse header headers parse parser utility script net net-abuse filter mail program system trace traceroute dns\">\n"+ "<META name=\"language\" content=\"en\">\n"+ *************** *** 57,63 **** MetaTag metaTag; metaTag = (MetaTag) node[7]; ! assertStringEquals("Meta Tag 7 Name","description",metaTag.getMetaTagName()); ! assertStringEquals("Meta Tag 7 Contents","Protecting the internet community through technology, not legislation. SpamCop eliminates spam. Automatically file spam reports with the network administrators who can stop spam at the source. Subscribe, and filter your email through powerful statistical analysis before it reaches your inbox.",metaTag.getMetaContent()); ! assertStringEquals("toHTML()","<META name=\"description\" content=\"Protecting the internet community through technology, not legislation. SpamCop eliminates spam. Automatically file spam reports with the network administrators who can stop spam at the source. Subscribe, and filter your email through powerful statistical analysis before it reaches your inbox.\">",metaTag.toHtml()); } } --- 60,66 ---- MetaTag metaTag; metaTag = (MetaTag) node[7]; ! assertStringEquals("Meta Tag 7 Name",description,metaTag.getMetaTagName()); ! assertStringEquals("Meta Tag 7 Contents",content,metaTag.getMetaContent()); ! assertStringEquals("toHTML()",tag,metaTag.toHtml()); } } Index: OptionTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/OptionTagTest.java,v retrieving revision 1.30 retrieving revision 1.31 diff -C2 -d -r1.30 -r1.31 *** OptionTagTest.java 22 Sep 2003 02:40:13 -0000 1.30 --- OptionTagTest.java 6 Oct 2003 01:43:28 -0000 1.31 *************** *** 50,54 **** "<OPTION>Mailcity\n</OPTION>"+ "<OPTION>\nIndiatimes\n</OPTION>"+ ! "<OPTION>\nRediff\n</OPTION>\n"+ "<OPTION>Cricinfo" + "<OPTION value=\"Microsoft Passport\">" --- 50,54 ---- "<OPTION>Mailcity\n</OPTION>"+ "<OPTION>\nIndiatimes\n</OPTION>"+ ! "<OPTION>\nRediff\n</OPTION>"+ "<OPTION>Cricinfo" + "<OPTION value=\"Microsoft Passport\">" *************** *** 87,99 **** assertStringEquals("HTML String","<OPTION>Yahoo!</OPTION>",OptionTag.toHtml()); OptionTag = (OptionTag) node[4]; ! assertStringEquals("HTML String","<OPTION>\r\nHotmail</OPTION>",OptionTag.toHtml()); OptionTag = (OptionTag) node[5]; assertStringEquals("HTML String","<OPTION VALUE=\"ICQ Messenger\"></OPTION>",OptionTag.toHtml()); OptionTag = (OptionTag) node[6]; ! assertStringEquals("HTML String","<OPTION>Mailcity\r\n</OPTION>",OptionTag.toHtml()); OptionTag = (OptionTag) node[7]; ! assertStringEquals("HTML String","<OPTION>\r\nIndiatimes\r\n</OPTION>",OptionTag.toHtml()); OptionTag = (OptionTag) node[8]; ! assertStringEquals("HTML String","<OPTION>\r\nRediff\r\n</OPTION>",OptionTag.toHtml()); OptionTag = (OptionTag) node[9]; assertStringEquals("HTML String","<OPTION>Cricinfo</OPTION>",OptionTag.toHtml()); --- 87,99 ---- assertStringEquals("HTML String","<OPTION>Yahoo!</OPTION>",OptionTag.toHtml()); OptionTag = (OptionTag) node[4]; ! assertStringEquals("HTML String","<OPTION>\nHotmail</OPTION>",OptionTag.toHtml()); OptionTag = (OptionTag) node[5]; assertStringEquals("HTML String","<OPTION VALUE=\"ICQ Messenger\"></OPTION>",OptionTag.toHtml()); OptionTag = (OptionTag) node[6]; ! assertStringEquals("HTML String","<OPTION>Mailcity\n</OPTION>",OptionTag.toHtml()); OptionTag = (OptionTag) node[7]; ! assertStringEquals("HTML String","<OPTION>\nIndiatimes\n</OPTION>",OptionTag.toHtml()); OptionTag = (OptionTag) node[8]; ! assertStringEquals("HTML String","<OPTION>\nRediff\n</OPTION>",OptionTag.toHtml()); OptionTag = (OptionTag) node[9]; assertStringEquals("HTML String","<OPTION>Cricinfo</OPTION>",OptionTag.toHtml()); *************** *** 126,134 **** assertEquals("HTML Raw String","OPTION VALUE: ICQ Messenger TEXT: \n",OptionTag.toString()); OptionTag = (OptionTag) node[6]; ! assertEquals("HTML Raw String","OPTION VALUE: null TEXT: Mailcity\r\n\n",OptionTag.toString()); OptionTag = (OptionTag) node[7]; ! assertEquals("HTML Raw String","OPTION VALUE: null TEXT: Indiatimes\r\n\n",OptionTag.toString()); OptionTag = (OptionTag) node[8]; ! assertEquals("HTML Raw String","OPTION VALUE: null TEXT: Rediff\r\n\n",OptionTag.toString()); OptionTag = (OptionTag) node[9]; assertEquals("HTML Raw String","OPTION VALUE: null TEXT: Cricinfo\n",OptionTag.toString()); --- 126,134 ---- assertEquals("HTML Raw String","OPTION VALUE: ICQ Messenger TEXT: \n",OptionTag.toString()); OptionTag = (OptionTag) node[6]; ! assertEquals("HTML Raw String","OPTION VALUE: null TEXT: Mailcity\n\n",OptionTag.toString()); OptionTag = (OptionTag) node[7]; ! assertEquals("HTML Raw String","OPTION VALUE: null TEXT: Indiatimes\n\n",OptionTag.toString()); OptionTag = (OptionTag) node[8]; ! assertEquals("HTML Raw String","OPTION VALUE: null TEXT: Rediff\n\n",OptionTag.toString()); OptionTag = (OptionTag) node[9]; assertEquals("HTML Raw String","OPTION VALUE: null TEXT: Cricinfo\n",OptionTag.toString()); Index: ScriptTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/ScriptTagTest.java,v retrieving revision 1.31 retrieving revision 1.32 diff -C2 -d -r1.31 -r1.32 *** ScriptTagTest.java 5 Oct 2003 13:49:54 -0000 1.31 --- ScriptTagTest.java 6 Oct 2003 01:43:28 -0000 1.32 *************** *** 139,143 **** // Register the image scanner parser.addScanner(new ScriptScanner("-s")); ! parseAndAssertNodeCount(1); assertTrue("Node should be a script tag",node[0] instanceof ScriptTag); ScriptTag scriptTag = (ScriptTag)node[0]; --- 139,143 ---- // Register the image scanner parser.addScanner(new ScriptScanner("-s")); ! parseAndAssertNodeCount(2); assertTrue("Node should be a script tag",node[0] instanceof ScriptTag); ScriptTag scriptTag = (ScriptTag)node[0]; *************** *** 163,169 **** public void testSingleApostropheParsingBug() throws ParserException { ! StringBuffer sb1 = new StringBuffer(); ! sb1.append("<script src='<%=sourceFileName%>'></script>"); ! createParser(sb1.toString()); // Register the image scanner --- 163,168 ---- public void testSingleApostropheParsingBug() throws ParserException { ! String script = "<script src='<%=sourceFileName%>'></script>"; ! createParser(script); // Register the image scanner *************** *** 172,176 **** assertTrue("Node should be a script tag",node[0] instanceof ScriptTag); ScriptTag scriptTag = (ScriptTag)node[0]; ! assertStringEquals("Script toHTML()","<SCRIPT SRC=\"<%=sourceFileName%>\"></SCRIPT>",scriptTag.toHtml()); } --- 171,175 ---- assertTrue("Node should be a script tag",node[0] instanceof ScriptTag); ScriptTag scriptTag = (ScriptTag)node[0]; ! assertStringEquals("Script toHTML()",script,scriptTag.toHtml()); } Index: SelectTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/SelectTagTest.java,v retrieving revision 1.31 retrieving revision 1.32 diff -C2 -d -r1.31 -r1.32 *** SelectTagTest.java 22 Sep 2003 02:40:13 -0000 1.31 --- SelectTagTest.java 6 Oct 2003 01:43:28 -0000 1.32 *************** *** 73,86 **** public void testToHTML() throws ParserException { ! assertStringEquals("HTML String","<SELECT NAME=\"Nominees\">\r\n"+ ! "<OPTION VALUE=\"Spouse\">Spouse</OPTION>\r\n"+ ! "<OPTION VALUE=\"Father\"></OPTION>\r\n"+ ! "<OPTION VALUE=\"Mother\">Mother\r\n</OPTION>\r\n" + ! "<OPTION VALUE=\"Son\">\r\nSon\r\n</OPTION>"+ ! "<OPTION VALUE=\"Daughter\">\r\nDaughter\r\n</OPTION>\r\n"+ ! "<OPTION VALUE=\"Nephew\">\r\nNephew</OPTION>\r\n"+ ! "<OPTION VALUE=\"Niece\">Niece\r\n</OPTION>"+ ! "</SELECT>", ! selectTag.toHtml()); } --- 73,77 ---- public void testToHTML() throws ParserException { ! assertStringEquals("HTML String",testHTML, selectTag.toHtml()); } *************** *** 94,102 **** "OPTION VALUE: Spouse TEXT: Spouse\n\n" + "OPTION VALUE: Father TEXT: \n\n" + ! "OPTION VALUE: Mother TEXT: Mother\r\n\n\n" + ! "OPTION VALUE: Son TEXT: Son\r\n\n\n" + ! "OPTION VALUE: Daughter TEXT: Daughter\r\n\n\n" + ! "OPTION VALUE: Nephew TEXT: Nephew\n\n" + ! "OPTION VALUE: Niece TEXT: Niece\r\n\n\n", selectTag.toString()); } --- 85,93 ---- "OPTION VALUE: Spouse TEXT: Spouse\n\n" + "OPTION VALUE: Father TEXT: \n\n" + ! "OPTION VALUE: Mother TEXT: Mother\n\n\n" + ! "OPTION VALUE: Son TEXT: \nSon\n\n\n" + ! "OPTION VALUE: Daughter TEXT: \nDaughter\n\n\n" + ! "OPTION VALUE: Nephew TEXT: \nNephew\n\n" + ! "OPTION VALUE: Niece TEXT: Niece\n\n\n", selectTag.toString()); } Index: StyleTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/StyleTagTest.java,v retrieving revision 1.28 retrieving revision 1.29 diff -C2 -d -r1.28 -r1.29 *** StyleTagTest.java 22 Sep 2003 02:40:13 -0000 1.28 --- StyleTagTest.java 6 Oct 2003 01:43:28 -0000 1.29 *************** *** 41,50 **** public void testToHTML() throws ParserException { ! createParser("<style>a.h{background-color:#ffee99}</style>"); parser.registerScanners(); parseAndAssertNodeCount(1); assertTrue(node[0] instanceof StyleTag); StyleTag styleTag = (StyleTag)node[0]; ! assertEquals("Raw String","<STYLE>a.h{background-color:#ffee99}</STYLE>",styleTag.toHtml()); } --- 41,51 ---- public void testToHTML() throws ParserException { ! String html = "<style>a.h{background-color:#ffee99}</style>"; ! createParser(html); parser.registerScanners(); parseAndAssertNodeCount(1); assertTrue(node[0] instanceof StyleTag); StyleTag styleTag = (StyleTag)node[0]; ! assertEquals("Raw String",html,styleTag.toHtml()); } *************** *** 53,62 **** * style tag attributes being missed */ ! public void testToHTML_Attriubtes() throws ParserException { ! createParser("<STYLE type=\"text/css\">\n"+ "<!--"+ "{something....something}"+ "-->"+ ! "</STYLE>"); Parser.setLineSeparator("\r\n"); --- 54,64 ---- * style tag attributes being missed */ ! public void testToHtmlAttributes() throws ParserException { ! String style = "<STYLE type=\"text/css\">\n"+ "<!--"+ "{something....something}"+ "-->"+ ! "</STYLE>"; ! createParser(style); Parser.setLineSeparator("\r\n"); *************** *** 65,73 **** assertTrue(node[0] instanceof StyleTag); StyleTag styleTag = (StyleTag)node[0]; ! assertStringEquals("Raw String","<STYLE TYPE=\"text/css\">\r\n"+ ! "<!--"+ ! "{something....something}"+ ! "-->"+ ! "</STYLE>",styleTag.toHtml()); } } --- 67,71 ---- assertTrue(node[0] instanceof StyleTag); StyleTag styleTag = (StyleTag)node[0]; ! assertStringEquals("toHtml",style,styleTag.toHtml()); } } Index: TagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/TagTest.java,v retrieving revision 1.45 retrieving revision 1.46 diff -C2 -d -r1.45 -r1.46 *** TagTest.java 2 Oct 2003 23:48:53 -0000 1.45 --- TagTest.java 6 Oct 2003 01:43:28 -0000 1.46 *************** *** 125,129 **** h = tag.getAttributes(); String classValue= (String)h.get("CLASS"); ! assertEquals ("The class value should be ","userData",classValue); } --- 125,129 ---- h = tag.getAttributes(); String classValue= (String)h.get("CLASS"); ! assertEquals ("The class value should be ","\"userData\"",classValue); } *************** *** 162,168 **** nice = (String)h.get("YOURPARAMETER"); assertEquals ("Link tag (A)","A",a); ! assertEquals ("href value","http://www.iki.fi/kaila",href); assertEquals ("myparameter value",null,myValue); ! assertEquals ("yourparameter value","Kaarle Kaaila",nice); } if (!(node instanceof LinkTag)) { --- 162,168 ---- nice = (String)h.get("YOURPARAMETER"); assertEquals ("Link tag (A)","A",a); ! assertEquals ("href value","\"http://www.iki.fi/kaila\"",href); assertEquals ("myparameter value",null,myValue); ! assertEquals ("yourparameter value","\"Kaarle Kaaila\"",nice); } if (!(node instanceof LinkTag)) { *************** *** 171,175 **** node = en.nextNode(); snode = (StringNode)node; ! assertEquals("Value of element",snode.getText(),"Kaarle's homepage"); } --- 171,175 ---- node = en.nextNode(); snode = (StringNode)node; ! assertEquals("Value of element","Kaarle's homepage",snode.getText()); } *************** *** 177,181 **** node = en.nextNode(); etag = (Tag)node; ! assertEquals("endtag of link",etag.getText(),"A"); } } --- 177,181 ---- node = en.nextNode(); etag = (Tag)node; ! assertEquals("endtag of link","/A", etag.getText()); } } *************** *** 190,199 **** node = en.nextNode(); snode = (StringNode)node; ! assertEquals("paragraph contents",snode.getText(),"Paragraph"); } if (en.hasMoreNodes()) { node = en.nextNode(); etag = (Tag)node; ! assertEquals("paragrapg endtag",etag.getText(),"p"); } --- 190,199 ---- node = en.nextNode(); snode = (StringNode)node; ! assertEquals("paragraph contents","Paragraph",snode.getText()); } if (en.hasMoreNodes()) { node = en.nextNode(); etag = (Tag)node; ! assertEquals("paragrapg endtag","/p",etag.getText()); } *************** *** 232,236 **** nice = (String)h.get("YOURPARAMETER"); assertEquals ("The tagname should be G",a,"G"); ! assertEquals ("Check the http address",href,"http://www.iki.fi/kaila"); assertEquals ("myValue is not null",myValue,null); assertEquals ("The second parameter value",nice,"Kaila"); --- 232,236 ---- nice = (String)h.get("YOURPARAMETER"); assertEquals ("The tagname should be G",a,"G"); ! assertEquals ("Check the http address",href,"\"http://www.iki.fi/kaila\""); assertEquals ("myValue is not null",myValue,null); assertEquals ("The second parameter value",nice,"Kaila"); *************** *** 245,249 **** node = en.nextNode(); etag = (Tag)node; ! assertEquals("Endtag is G",etag.getText(),"G"); } // testing rest --- 245,249 ---- node = en.nextNode(); etag = (Tag)node; ! assertEquals("Endtag is G","/G", etag.getText()); } // testing rest *************** *** 252,266 **** tag = (Tag)node; ! assertEquals("Follow up by p-tag",tag.getText(),"p"); } if (en.hasMoreNodes()) { node = en.nextNode(); snode = (StringNode)node; ! assertEquals("Verify the paragraph text",snode.getText(),"Paragraph"); } if (en.hasMoreNodes()) { node = en.nextNode(); etag = (Tag)node; ! assertEquals("Still patragraph endtag",etag.getText(),"p"); } --- 252,266 ---- tag = (Tag)node; ! assertEquals("Follow up by p-tag","p", tag.getText()); } if (en.hasMoreNodes()) { node = en.nextNode(); snode = (StringNode)node; ! assertEquals("Verify the paragraph text","Paragraph", snode.getText()); } if (en.hasMoreNodes()) { node = en.nextNode(); etag = (Tag)node; ! assertEquals("Still patragraph endtag","/p", etag.getText()); } *************** *** 305,309 **** node = en.nextNode(); snode = (StringNode)node; ! assertEquals("Value of element",snode.getText(),"Kaarle's homepage"); } --- 305,309 ---- node = en.nextNode(); snode = (StringNode)node; ! assertEquals("Value of element","Kaarle's homepage",snode.getText()); } *************** *** 311,315 **** node = en.nextNode(); etag = (Tag)node; ! assertEquals("Still patragraph endtag",etag.getText(),"A"); } } --- 311,315 ---- node = en.nextNode(); etag = (Tag)node; ! assertEquals("Still patragraph endtag","/A",etag.getText()); } } *************** *** 388,406 **** */ public void testWithoutParseParameter() throws ParserException{ ! Node node=null; String testHTML = "<A href=\"http://www.iki.fi/kaila\" myParameter yourParameter=\"Kaarle\">Kaarle's homepage</A><p>Paragraph</p>"; createParser(testHTML); NodeIterator en = parser.elements(); String result=""; ! try { ! while (en.hasMoreNodes()) { ! node = en.nextNode(); ! result += node.toHtml(); ! } ! String expected = "<A YOURPARAMETER=\"Kaarle\" MYPARAMETER HREF=\"http://www.iki.fi/kaila\">Kaarle's homepage</A><P>Paragraph</P>"; ! assertStringEquals("Check collected contents to original", expected, result); ! } catch (ClassCastException ce) { ! fail("Bad class element = " + node.getClass().getName()); } } --- 388,401 ---- */ public void testWithoutParseParameter() throws ParserException{ ! Node node; String testHTML = "<A href=\"http://www.iki.fi/kaila\" myParameter yourParameter=\"Kaarle\">Kaarle's homepage</A><p>Paragraph</p>"; createParser(testHTML); NodeIterator en = parser.elements(); String result=""; ! while (en.hasMoreNodes()) { ! node = en.nextNode(); ! result += node.toHtml(); } + assertStringEquals("Check collected contents to original", testHTML, result); } *************** *** 411,415 **** */ public void testEmptyTagParseParameter() throws ParserException{ ! Node node=null; String testHTML = "<INPUT name=\"foo\" value=\"foobar\" type=\"text\" />"; --- 406,410 ---- */ public void testEmptyTagParseParameter() throws ParserException{ ! Node node; String testHTML = "<INPUT name=\"foo\" value=\"foobar\" type=\"text\" />"; *************** *** 417,431 **** NodeIterator en = parser.elements(); String result=""; ! try { ! while (en.hasMoreNodes()) { ! node = en.nextNode(); ! result = node.toHtml(); ! } ! String expected = "<INPUT VALUE=\"foobar\" NAME=\"foo\" TYPE=\"text\"/>"; ! assertStringEquals("Check collected contents to original", expected, result); ! } catch (ClassCastException ce) { ! ! fail("Bad class element = " + node.getClass().getName()); } } --- 412,420 ---- NodeIterator en = parser.elements(); String result=""; ! while (en.hasMoreNodes()) { ! node = en.nextNode(); ! result = node.toHtml(); } + assertStringEquals("Check collected contents to original", testHTML, result); } *************** *** 465,485 **** public void testIncorrectInvertedCommas() throws ParserException { ! String testHTML = new String("<META NAME=\"Author\" CONTENT = \"DORIER-APPRILL E., GERVAIS-LAMBONY P., MORICONI-EBRARD F., NAVEZ-BOUCHANINE F.\"\">"); createParser(testHTML); parseAndAssertNodeCount(1); assertTrue("Node should be a tag",node[0] instanceof Tag); Tag tag = (Tag)node[0]; ! assertStringEquals("Node contents","META NAME=\"Author\" CONTENT=\"DORIER-APPRILL E., GERVAIS-LAMBONY P., MORICONI-EBRARD F., NAVEZ-BOUCHANINE F.\"",tag.getText()); ! assertEquals("Meta Content","DORIER-APPRILL E., GERVAIS-LAMBONY P., MORICONI-EBRARD F., NAVEZ-BOUCHANINE F.",tag.getAttribute("CONTENT")); } public void testIncorrectInvertedCommas2() throws ParserException { ! String testHTML = new String("<META NAME=\"Keywords\" CONTENT=Moscou, modernisation, politique urbaine, spécificités culturelles, municipalité, Moscou, modernisation, urban politics, cultural specificities, municipality\">"); createParser(testHTML); parseAndAssertNodeCount(1); assertTrue("Node should be a tag",node[0] instanceof Tag); Tag tag = (Tag)node[0]; ! assertStringEquals("Node contents","META NAME=\"Keywords\" CONTENT=\"Moscou, modernisation, politique urbaine, spécificités culturelles, municipalité, Moscou, modernisation, urban politics, cultural specificities, municipality\"",tag.getText()); } --- 454,477 ---- public void testIncorrectInvertedCommas() throws ParserException { ! String content = "\"DORIER-APPRILL E., GERVAIS-LAMBONY P., MORICONI-EBRARD F., NAVEZ-BOUCHANINE F.\""; ! String guts = "META NAME=\"Author\" CONTENT = " + content + " \""; ! String testHTML = "<" + guts + ">"; createParser(testHTML); parseAndAssertNodeCount(1); assertTrue("Node should be a tag",node[0] instanceof Tag); Tag tag = (Tag)node[0]; ! assertStringEquals("Node contents",guts,tag.getText()); ! assertEquals("Meta Content",content,tag.getAttribute("CONTENT")); } public void testIncorrectInvertedCommas2() throws ParserException { ! String guts = "META NAME=\"Keywords\" CONTENT=Moscou, modernisation, politique urbaine, spécificités culturelles, municipalité, Moscou, modernisation, urban politics, cultural specificities, municipality\""; ! String testHTML = "<" + guts + ">"; createParser(testHTML); parseAndAssertNodeCount(1); assertTrue("Node should be a tag",node[0] instanceof Tag); Tag tag = (Tag)node[0]; ! assertStringEquals("Node contents",guts,tag.getText()); } *************** *** 544,548 **** StringNode stringNode = (StringNode)node[2]; String actual = stringNode.getText(); ! assertEquals("Third node has incorrect text","text\r\n<>text",actual); } --- 536,540 ---- StringNode stringNode = (StringNode)node[2]; String actual = stringNode.getText(); ! assertEquals("Third node has incorrect text","text\n<>text",actual); } *************** *** 559,563 **** StringNode stringNode = (StringNode)node[2]; String actual = stringNode.getText(); ! assertEquals("Third node has incorrect text","text<\r\n>text",actual); } --- 551,555 ---- StringNode stringNode = (StringNode)node[2]; String actual = stringNode.getText(); ! assertEquals("Third node has incorrect text","text<\n>text",actual); } *************** *** 574,587 **** StringNode stringNode = (StringNode)node[2]; String actual = stringNode.getText(); ! assertEquals("Third node has incorrect text","text<>\r\ntext",actual); } public void testAttributesReconstruction() throws ParserException { ! String testHTML = "<TEXTAREA name=\"JohnDoe\" ></TEXTAREA>"; createParser(testHTML); parseAndAssertNodeCount(2); assertTrue("First node should be an HTMLtag",node[0] instanceof Tag); Tag htmlTag = (Tag)node[0]; - String expectedHTML = "<TEXTAREA NAME=\"JohnDoe\">"; assertStringEquals("Expected HTML",expectedHTML,htmlTag.toHtml()); } --- 566,579 ---- StringNode stringNode = (StringNode)node[2]; String actual = stringNode.getText(); ! assertEquals("Third node has incorrect text","text<>\ntext",actual); } public void testAttributesReconstruction() throws ParserException { ! String expectedHTML = "<TEXTAREA name=\"JohnDoe\" >"; ! String testHTML = expectedHTML + "</TEXTAREA>"; createParser(testHTML); parseAndAssertNodeCount(2); assertTrue("First node should be an HTMLtag",node[0] instanceof Tag); Tag htmlTag = (Tag)node[0]; assertStringEquals("Expected HTML",expectedHTML,htmlTag.toHtml()); } *************** *** 653,657 **** { // straight out of a real world example ! createParser ("<a href=http://www.google.com/webhp?hl=en>"); // register standard scanners (Very Important) parser.registerScanners (); --- 645,650 ---- { // straight out of a real world example ! String html = "<a href=http://www.google.com/webhp?hl=en>"; ! createParser (html); // register standard scanners (Very Important) parser.registerScanners (); *************** *** 663,669 **** } assertNotNull ("No nodes", temp); ! assertStringEquals ("Incorrect HTML output: ", ! "<A HREF=\"http://www.google.com/webhp?hl=en\"></A>", ! temp); } --- 656,660 --... [truncated message content] |
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests In directory sc8-pr-cvs1:/tmp/cvs-serv2828/tests/scannersTests Modified Files: BodyScannerTest.java BulletListScannerTest.java CompositeTagScannerTest.java FormScannerTest.java LabelScannerTest.java LinkScannerTest.java MetaTagScannerTest.java StyleScannerTest.java TableScannerTest.java TitleScannerTest.java Log Message: Updated tests to correspond to new behaviour. Mostly due to changes in order and case of tag contents. Of the forty odd remaining failing tests, the majority comprise altered functionality that needs to be resolved. Index: BodyScannerTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests/BodyScannerTest.java,v retrieving revision 1.13 retrieving revision 1.14 diff -C2 -d -r1.13 -r1.14 *** BodyScannerTest.java 22 Sep 2003 02:40:09 -0000 1.13 --- BodyScannerTest.java 6 Oct 2003 01:43:28 -0000 1.14 *************** *** 53,62 **** BodyTag bodyTag = (BodyTag) node[4]; assertEquals("Body","This is a body tag",bodyTag.getBody()); ! assertEquals("Body","<BODY>This is a body tag</BODY>",bodyTag.toHtml()); assertEquals("Body Scanner",bodyScanner,bodyTag.getThisScanner()); } public void testBodywithJsp() throws ParserException { ! createParser("<html><head><title>Test 1</title></head><body><%=BodyValue%></body></html>"); parser.registerScanners(); BodyScanner bodyScanner = new BodyScanner("-b"); --- 53,63 ---- BodyTag bodyTag = (BodyTag) node[4]; assertEquals("Body","This is a body tag",bodyTag.getBody()); ! assertEquals("Body","<body>This is a body tag</body>",bodyTag.toHtml()); assertEquals("Body Scanner",bodyScanner,bodyTag.getThisScanner()); } public void testBodywithJsp() throws ParserException { ! String body = "<body><%=BodyValue%></body>"; ! createParser("<html><head><title>Test 1</title></head>" + body + "</html>"); parser.registerScanners(); BodyScanner bodyScanner = new BodyScanner("-b"); *************** *** 66,75 **** // check the body node BodyTag bodyTag = (BodyTag) node[4]; ! assertStringEquals("Body","<BODY><%=BodyValue%></BODY>",bodyTag.toHtml()); assertEquals("Body Scanner",bodyScanner,bodyTag.getThisScanner()); } public void testBodyMixed() throws ParserException { ! createParser("<html><head><title>Test 1</title></head><body>before jsp<%=BodyValue%>after jsp</body></html>"); parser.registerScanners(); BodyScanner bodyScanner = new BodyScanner("-b"); --- 67,77 ---- // check the body node BodyTag bodyTag = (BodyTag) node[4]; ! assertStringEquals("Body",body,bodyTag.toHtml()); assertEquals("Body Scanner",bodyScanner,bodyTag.getThisScanner()); } public void testBodyMixed() throws ParserException { ! String body = "<body>before jsp<%=BodyValue%>after jsp</body>"; ! createParser("<html><head><title>Test 1</title></head>" + body + "</html>"); parser.registerScanners(); BodyScanner bodyScanner = new BodyScanner("-b"); *************** *** 79,88 **** // check the body node BodyTag bodyTag = (BodyTag) node[4]; ! assertEquals("Body","<BODY>before jsp<%=BodyValue%>after jsp</BODY>",bodyTag.toHtml()); assertEquals("Body Scanner",bodyScanner,bodyTag.getThisScanner()); } public void testBodyEnding() throws ParserException { ! createParser("<html><body>before jsp<%=BodyValue%>after jsp</html>"); parser.registerScanners(); BodyScanner bodyScanner = new BodyScanner("-b"); --- 81,91 ---- // check the body node BodyTag bodyTag = (BodyTag) node[4]; ! assertEquals("Body",body,bodyTag.toHtml()); assertEquals("Body Scanner",bodyScanner,bodyTag.getThisScanner()); } public void testBodyEnding() throws ParserException { ! String body = "<body>before jsp<%=BodyValue%>after jsp"; ! createParser("<html>" + body + "</html>"); parser.registerScanners(); BodyScanner bodyScanner = new BodyScanner("-b"); *************** *** 92,96 **** // check the body node BodyTag bodyTag = (BodyTag) node[1]; ! assertEquals("Body","<BODY>before jsp<%=BodyValue%>after jsp</BODY>",bodyTag.toHtml()); assertEquals("Body Scanner",bodyScanner,bodyTag.getThisScanner()); } --- 95,99 ---- // check the body node BodyTag bodyTag = (BodyTag) node[1]; ! assertEquals("Body",body + "</body>",bodyTag.toHtml()); assertEquals("Body Scanner",bodyScanner,bodyTag.getThisScanner()); } Index: BulletListScannerTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests/BulletListScannerTest.java,v retrieving revision 1.9 retrieving revision 1.10 diff -C2 -d -r1.9 -r1.10 *** BulletListScannerTest.java 22 Sep 2003 02:40:09 -0000 1.9 --- BulletListScannerTest.java 6 Oct 2003 01:43:28 -0000 1.10 *************** *** 87,91 **** assertStringEquals( "expected text", ! "Energy supply\r\n" + " (Campbell) ", firstNodeInFirstBullet.toPlainTextString() --- 87,91 ---- assertStringEquals( "expected text", ! "Energy supply\n" + " (Campbell) ", firstNodeInFirstBullet.toPlainTextString() Index: CompositeTagScannerTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests/CompositeTagScannerTest.java,v retrieving revision 1.40 retrieving revision 1.41 diff -C2 -d -r1.40 -r1.41 *** CompositeTagScannerTest.java 28 Sep 2003 15:33:59 -0000 1.40 --- CompositeTagScannerTest.java 6 Oct 2003 01:43:28 -0000 1.41 *************** *** 72,115 **** public void testEmptyCompositeTag() throws ParserException { ! createParser( ! "<Custom/>" ! ); CustomTag customTag = parseCustomTag(1); assertEquals("child count",0,customTag.getChildCount()); assertTrue("custom tag should be xml end tag",customTag.isEmptyXmlTag()); assertEquals("starting loc",0,customTag.getStartTag().elementBegin()); ! assertEquals("ending loc",8,customTag.getStartTag().elementEnd()); ! assertEquals("starting line position",1,customTag.tagData.getStartLine()); ! assertEquals("ending line position",1,customTag.tagData.getEndLine()); ! assertStringEquals("html","<CUSTOM/>",customTag.toHtml()); } public void testEmptyCompositeTagAnotherStyle() throws ParserException { ! createParser( ! "<Custom></Custom>" ! ); CustomTag customTag = parseCustomTag(1); assertEquals("child count",0,customTag.getChildCount()); assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); assertEquals("starting loc",0,customTag.getStartTag().elementBegin()); ! assertEquals("ending loc",7,customTag.getStartTag().elementEnd()); ! assertEquals("starting line position",1,customTag.tagData.getStartLine()); ! assertEquals("ending line position",1,customTag.tagData.getEndLine()); ! assertEquals("html","<CUSTOM></CUSTOM>",customTag.toHtml()); } public void testCompositeTagWithOneTextChild() throws ParserException { ! createParser( "<Custom>" + "Hello" + ! "</Custom>" ! ); CustomTag customTag = parseCustomTag(1); assertEquals("child count",1,customTag.getChildCount()); assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); assertEquals("starting loc",0,customTag.getStartTag().elementBegin()); ! assertEquals("ending loc",7,customTag.getStartTag().elementEnd()); ! assertEquals("starting line position",1,customTag.tagData.getStartLine()); ! assertEquals("ending line position",1,customTag.tagData.getEndLine()); Node child = customTag.childAt(0); --- 72,113 ---- public void testEmptyCompositeTag() throws ParserException { ! String html = "<Custom/>"; ! createParser(html); CustomTag customTag = parseCustomTag(1); assertEquals("child count",0,customTag.getChildCount()); assertTrue("custom tag should be xml end tag",customTag.isEmptyXmlTag()); assertEquals("starting loc",0,customTag.getStartTag().elementBegin()); ! assertEquals("ending loc",9,customTag.getStartTag().elementEnd()); ! assertEquals("starting line position",0,customTag.tagData.getStartLine()); ! assertEquals("ending line position",0,customTag.tagData.getEndLine()); ! assertStringEquals("html",html,customTag.toHtml()); } public void testEmptyCompositeTagAnotherStyle() throws ParserException { ! String html = "<Custom></Custom>"; ! createParser(html); CustomTag customTag = parseCustomTag(1); assertEquals("child count",0,customTag.getChildCount()); assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); assertEquals("starting loc",0,customTag.getStartTag().elementBegin()); ! assertEquals("ending loc",8,customTag.getStartTag().elementEnd()); ! assertEquals("starting line position",0,customTag.tagData.getStartLine()); ! assertEquals("ending line position",0,customTag.tagData.getEndLine()); ! assertEquals("html",html,customTag.toHtml()); } public void testCompositeTagWithOneTextChild() throws ParserException { ! String html = "<Custom>" + "Hello" + ! "</Custom>"; ! createParser(html); CustomTag customTag = parseCustomTag(1); assertEquals("child count",1,customTag.getChildCount()); assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); assertEquals("starting loc",0,customTag.getStartTag().elementBegin()); ! assertEquals("ending loc",8,customTag.getStartTag().elementEnd()); ! assertEquals("starting line position",0,customTag.tagData.getStartLine()); ! assertEquals("ending line position",0,customTag.tagData.getEndLine()); Node child = customTag.childAt(0); *************** *** 119,125 **** public void testCompositeTagWithTagChild() throws ParserException { createParser( "<Custom>" + ! "<Hello>" + "</Custom>" ); --- 117,124 ---- public void testCompositeTagWithTagChild() throws ParserException { + String childtag = "<Hello>"; createParser( "<Custom>" + ! childtag + "</Custom>" ); *************** *** 128,144 **** assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); assertEquals("starting loc",0,customTag.getStartTag().elementBegin()); ! assertEquals("ending loc",7,customTag.getStartTag().elementEnd()); assertEquals("custom tag starting loc",0,customTag.elementBegin()); ! assertEquals("custom tag ending loc",23,customTag.elementEnd()); Node child = customTag.childAt(0); assertType("child",Tag.class,child); ! assertStringEquals("child html","<HELLO>",child.toHtml()); } public void testCompositeTagWithAnotherTagChild() throws ParserException { createParser( "<Custom>" + ! "<Another/>" + "</Custom>" ); --- 127,144 ---- assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); assertEquals("starting loc",0,customTag.getStartTag().elementBegin()); ! assertEquals("ending loc",8,customTag.getStartTag().elementEnd()); assertEquals("custom tag starting loc",0,customTag.elementBegin()); ! assertEquals("custom tag ending loc",24,customTag.elementEnd()); Node child = customTag.childAt(0); assertType("child",Tag.class,child); ! assertStringEquals("child html",childtag,child.toHtml()); } public void testCompositeTagWithAnotherTagChild() throws ParserException { + String childtag = "<Another/>"; createParser( "<Custom>" + ! childtag + "</Custom>" ); *************** *** 148,154 **** assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); assertEquals("starting loc",0,customTag.getStartTag().elementBegin()); ! assertEquals("ending loc",7,customTag.getStartTag().elementEnd()); assertEquals("custom tag starting loc",0,customTag.elementBegin()); ! assertEquals("custom tag ending loc",26,customTag.elementEnd()); Node child = customTag.childAt(0); --- 148,154 ---- assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); assertEquals("starting loc",0,customTag.getStartTag().elementBegin()); ! assertEquals("ending loc",8,customTag.getStartTag().elementEnd()); assertEquals("custom tag starting loc",0,customTag.elementBegin()); ! assertEquals("custom tag ending loc",27,customTag.elementEnd()); Node child = customTag.childAt(0); *************** *** 156,163 **** AnotherTag tag = (AnotherTag)child; assertEquals("another tag start pos",8,tag.elementBegin()); ! assertEquals("another tag ending pos",17,tag.elementEnd()); assertEquals("custom end tag start pos",18,customTag.getEndTag().elementBegin()); ! assertStringEquals("child html","<ANOTHER/>",child.toHtml()); } --- 156,163 ---- AnotherTag tag = (AnotherTag)child; assertEquals("another tag start pos",8,tag.elementBegin()); ! assertEquals("another tag ending pos",18,tag.elementEnd()); assertEquals("custom end tag start pos",18,customTag.getEndTag().elementBegin()); ! assertStringEquals("child html",childtag,child.toHtml()); } *************** *** 251,308 **** public void testErroneousCompositeTag() throws ParserException { ! createParser("<custom>"); CustomTag customTag = parseCustomTag(1); assertEquals("child count",0,customTag.getChildCount()); ! assertFalse("custom tag should be xml end tag",customTag.isEmptyXmlTag()); assertEquals("starting loc",0,customTag.getStartTag().elementBegin()); ! assertEquals("ending loc",7,customTag.getStartTag().elementEnd()); ! assertEquals("starting line position",1,customTag.tagData.getStartLine()); ! assertEquals("ending line position",1,customTag.tagData.getEndLine()); ! assertStringEquals("html","<CUSTOM></CUSTOM>",customTag.toHtml()); } public void testErroneousCompositeTagWithChildren() throws ParserException { ! createParser( ! "<custom>" + ! "<firstChild>" + ! "<secondChild>" ! ); CustomTag customTag = parseCustomTag(1); assertEquals("child count",2,customTag.getChildCount()); ! assertFalse("custom tag should be xml end tag",customTag.isEmptyXmlTag()); assertEquals("starting loc",0,customTag.getStartTag().elementBegin()); ! assertEquals("ending loc",7,customTag.getStartTag().elementEnd()); ! assertEquals("starting line position",1,customTag.tagData.getStartLine()); ! assertEquals("ending line position",1,customTag.tagData.getEndLine()); ! assertStringEquals("html","<CUSTOM><FIRSTCHILD><SECONDCHILD></CUSTOM>",customTag.toHtml()); } public void testErroneousCompositeTagWithChildrenAndLineBreak() throws ParserException { ! createParser( ! "<custom>" + ! "<firstChild>\n" + ! "<secondChild>" ! ); CustomTag customTag = parseCustomTag(1); ! assertEquals("child count",2,customTag.getChildCount()); assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); assertEquals("starting loc",0,customTag.getStartTag().elementBegin()); ! assertEquals("ending loc",7,customTag.getStartTag().elementEnd()); ! assertEquals("starting line position",1,customTag.tagData.getStartLine()); ! assertEquals("ending line position",2,customTag.tagData.getEndLine()); ! assertStringEquals( ! "html", ! "<CUSTOM><FIRSTCHILD>\r\n" + ! "<SECONDCHILD>" + ! "</CUSTOM>", ! customTag.toHtml() ); } public void testTwoConsecutiveErroneousCompositeTags() throws ParserException { ! createParser( ! "<custom>something" + ! "<custom></endtag>" ! ); parser.addScanner(new CustomScanner(false)); parseAndAssertNodeCount(2); --- 251,297 ---- public void testErroneousCompositeTag() throws ParserException { ! String html = "<custom>"; ! createParser(html); CustomTag customTag = parseCustomTag(1); assertEquals("child count",0,customTag.getChildCount()); ! assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); assertEquals("starting loc",0,customTag.getStartTag().elementBegin()); ! assertEquals("ending loc",8,customTag.getStartTag().elementEnd()); ! assertEquals("starting line position",0,customTag.tagData.getStartLine()); ! assertEquals("ending line position",0,customTag.tagData.getEndLine()); ! assertStringEquals("html",html + "</custom>",customTag.toHtml()); } public void testErroneousCompositeTagWithChildren() throws ParserException { ! String html = "<custom>" + "<firstChild>" + "<secondChild>"; ! createParser(html); CustomTag customTag = parseCustomTag(1); assertEquals("child count",2,customTag.getChildCount()); ! assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); assertEquals("starting loc",0,customTag.getStartTag().elementBegin()); ! assertEquals("ending loc",8,customTag.getStartTag().elementEnd()); ! assertEquals("starting line position",0,customTag.tagData.getStartLine()); ! assertEquals("ending line position",0,customTag.tagData.getEndLine()); ! assertStringEquals("html",html + "</custom>",customTag.toHtml()); } public void testErroneousCompositeTagWithChildrenAndLineBreak() throws ParserException { ! String html = "<custom>" + "<firstChild>" + "\n" + "<secondChild>"; ! createParser(html); CustomTag customTag = parseCustomTag(1); ! assertEquals("child count",3,customTag.getChildCount()); assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); assertEquals("starting loc",0,customTag.getStartTag().elementBegin()); ! assertEquals("ending loc",8,customTag.getStartTag().elementEnd()); ! assertEquals("starting line position",0,customTag.tagData.getStartLine()); ! assertEquals("ending line position",1,customTag.tagData.getEndLine()); ! assertStringEquals("html", html + "</custom>", customTag.toHtml() ); } public void testTwoConsecutiveErroneousCompositeTags() throws ParserException { ! String tag1 = "<custom>something"; ! String tag2 = "<custom></endtag>"; ! createParser(tag1 + tag2); parser.addScanner(new CustomScanner(false)); parseAndAssertNodeCount(2); *************** *** 311,336 **** assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); assertEquals("starting loc",0,customTag.getStartTag().elementBegin()); ! assertEquals("ending loc",7,customTag.getStartTag().elementEnd()); ! assertEquals("ending loc of custom tag",25,customTag.elementEnd()); ! assertEquals("starting line position",1,customTag.tagData.getStartLine()); ! assertEquals("ending line position",1,customTag.tagData.getEndLine()); ! assertStringEquals( ! "first custom tag", ! "<CUSTOM>something</CUSTOM>", ! customTag.toHtml() ! ); customTag = (CustomTag)node[1]; ! assertStringEquals( ! "second custom tag", ! "<CUSTOM></ENDTAG></CUSTOM>", ! customTag.toHtml() ! ); } public void testCompositeTagWithErroneousAnotherTagAndLineBreak() throws ParserException { createParser( ! "<another>" + ! "<custom>\n" + ! "</custom>" ); parser.addScanner(new AnotherScanner()); --- 300,318 ---- assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); assertEquals("starting loc",0,customTag.getStartTag().elementBegin()); ! assertEquals("ending loc",8,customTag.getStartTag().elementEnd()); ! assertEquals("ending loc of custom tag",17,customTag.elementEnd()); ! assertEquals("starting line position",0,customTag.tagData.getStartLine()); ! assertEquals("ending line position",0,customTag.tagData.getEndLine()); ! assertStringEquals("1st custom tag", tag1 + "</custom>", customTag.toHtml()); customTag = (CustomTag)node[1]; ! assertStringEquals("2nd custom tag", tag2 + "</custom>", customTag.toHtml()); } public void testCompositeTagWithErroneousAnotherTagAndLineBreak() throws ParserException { + String another = "<another>"; + String custom = "<custom>\n</custom>"; createParser( ! another + ! custom ); parser.addScanner(new AnotherScanner()); *************** *** 341,352 **** CustomTag customTag = (CustomTag)node[1]; ! assertEquals("child count",0,customTag.getChildCount()); assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); assertEquals("starting loc",9,customTag.getStartTag().elementBegin()); ! assertEquals("ending loc",16,customTag.getStartTag().elementEnd()); ! assertEquals("starting line position",1,customTag.tagData.getStartLine()); ! assertEquals("ending line position",2,customTag.tagData.getEndLine()); ! assertStringEquals("another tag html","<ANOTHER></ANOTHER>",anotherTag.toHtml()); ! assertStringEquals("custom tag html","<CUSTOM>\r\n</CUSTOM>",customTag.toHtml()); } --- 323,334 ---- CustomTag customTag = (CustomTag)node[1]; ! assertEquals("child count",1,customTag.getChildCount()); assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); assertEquals("starting loc",9,customTag.getStartTag().elementBegin()); ! assertEquals("ending loc",17,customTag.getStartTag().elementEnd()); ! assertEquals("starting line position",0,customTag.tagData.getStartLine()); ! assertEquals("ending line position",1,customTag.tagData.getEndLine()); ! assertStringEquals("another tag html",another,anotherTag.toHtml()); ! assertStringEquals("custom tag html",custom,customTag.toHtml()); } *************** *** 362,371 **** assertFalse("custom tag should be xml end tag",customTag.isEmptyXmlTag()); assertEquals("starting loc",0,customTag.getStartTag().elementBegin()); ! assertEquals("ending loc",7,customTag.getStartTag().elementEnd()); AnotherTag anotherTag = (AnotherTag)customTag.childAt(0); ! assertEquals("another tag ending loc",26,anotherTag.elementEnd()); ! assertEquals("starting line position",1,customTag.tagData.getStartLine()); ! assertEquals("ending line position",1,customTag.tagData.getEndLine()); ! assertStringEquals("html","<CUSTOM><ANOTHER></ANOTHER></CUSTOM>",customTag.toHtml()); } --- 344,353 ---- assertFalse("custom tag should be xml end tag",customTag.isEmptyXmlTag()); assertEquals("starting loc",0,customTag.getStartTag().elementBegin()); ! assertEquals("ending loc",8,customTag.getStartTag().elementEnd()); AnotherTag anotherTag = (AnotherTag)customTag.childAt(0); ! assertEquals("another tag ending loc",17,anotherTag.elementEnd()); ! assertEquals("starting line position",0,customTag.tagData.getStartLine()); ! assertEquals("ending line position",0,customTag.tagData.getEndLine()); ! assertStringEquals("html","<custom><another></another></custom>",customTag.toHtml()); } *************** *** 384,390 **** assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); assertEquals("starting loc",0,customTag.getStartTag().elementBegin()); ! assertEquals("ending loc",7,customTag.getStartTag().elementEnd()); ! assertEquals("starting line position",1,customTag.tagData.getStartLine()); ! assertEquals("ending line position",1,customTag.tagData.getEndLine()); AnotherTag anotherTag = (AnotherTag)customTag.childAt(0); assertEquals("anotherTag child count",1,anotherTag.getChildCount()); --- 366,372 ---- assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); assertEquals("starting loc",0,customTag.getStartTag().elementBegin()); ! assertEquals("ending loc",8,customTag.getStartTag().elementEnd()); ! assertEquals("starting line position",0,customTag.tagData.getStartLine()); ! assertEquals("ending line position",0,customTag.tagData.getEndLine()); AnotherTag anotherTag = (AnotherTag)customTag.childAt(0); assertEquals("anotherTag child count",1,anotherTag.getChildCount()); *************** *** 393,397 **** assertStringEquals( "first custom tag html", ! "<CUSTOM><ANOTHER>something</ANOTHER></CUSTOM>", customTag.toHtml() ); --- 375,379 ---- assertStringEquals( "first custom tag html", ! "<custom><another>something</another></custom>", customTag.toHtml() ); *************** *** 399,403 **** assertStringEquals( "second custom tag html", ! "<CUSTOM><ANOTHER>else</ANOTHER></CUSTOM>", customTag.toHtml() ); --- 381,385 ---- assertStringEquals( "second custom tag html", ! "<custom><another>else</another></custom>", customTag.toHtml() ); *************** *** 415,424 **** assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); assertEquals("starting loc",0,customTag.getStartTag().elementBegin()); ! assertEquals("ending loc",7,customTag.getStartTag().elementEnd()); AnotherTag anotherTag = (AnotherTag)customTag.childAt(0); ! assertEquals("anotherTag child count",1,anotherTag.getChildCount()); ! assertEquals("anotherTag end loc",9,anotherTag.elementEnd()); ! assertEquals("custom end tag begin loc",10,customTag.getEndTag().elementBegin()); ! assertEquals("custom end tag end loc",8,customTag.getEndTag().elementEnd()); } --- 397,406 ---- assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); assertEquals("starting loc",0,customTag.getStartTag().elementBegin()); ! assertEquals("ending loc",8,customTag.getStartTag().elementEnd()); AnotherTag anotherTag = (AnotherTag)customTag.childAt(0); ! assertEquals("anotherTag child count",2,anotherTag.getChildCount()); ! assertEquals("anotherTag end loc",27,anotherTag.elementEnd()); ! assertEquals("custom end tag begin loc",27,customTag.getEndTag().elementBegin()); ! assertEquals("custom end tag end loc",36,customTag.getEndTag().elementEnd()); } Index: FormScannerTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests/FormScannerTest.java,v retrieving revision 1.34 retrieving revision 1.35 diff -C2 -d -r1.34 -r1.35 *** FormScannerTest.java 5 Oct 2003 13:49:54 -0000 1.34 --- FormScannerTest.java 6 Oct 2003 01:43:28 -0000 1.35 *************** *** 61,81 **** "</FORM>"; ! public static final String EXPECTED_FORM_HTML_FORMLINE="<FORM ACTION=\"http://www.google.com/test/do_login.php\" NAME=\"login_form\" ONSUBMIT=\"return CheckData()\" METHOD=\""+FormTag.POST+"\">\r\n"; ! public static final String EXPECTED_FORM_HTML_REST_OF_FORM= ! "<TR><TD ALIGN=\"center\"> </TD></TR>\r\n"+ ! "<TR><TD ALIGN=\"center\"><FONT FACE=\"Arial, verdana\" SIZE=\"2\"><B>User Name</B></FONT></TD></TR>\r\n"+ ! "<TR><TD ALIGN=\"center\"><INPUT NAME=\"name\" SIZE=\"20\" TYPE=\"text\"></TD></TR>\r\n"+ ! "<TR><TD ALIGN=\"center\"><FONT FACE=\"Arial, verdana\" SIZE=\"2\"><B>Password</B></FONT></TD></TR>\r\n"+ ! "<TR><TD ALIGN=\"center\"><INPUT NAME=\"passwd\" SIZE=\"20\" TYPE=\"password\"></TD></TR>\r\n"+ ! "<TR><TD ALIGN=\"center\"> </TD></TR>\r\n"+ ! "<TR><TD ALIGN=\"center\"><INPUT VALUE=\"Login\" NAME=\"submit\" TYPE=\"submit\"></TD></TR>\r\n"+ ! "<TR><TD ALIGN=\"center\"> </TD></TR>\r\n"+ ! "<TEXTAREA TABINDEX=\"5\" ROWS=\"15\" COLS=\"55\" CLASS=\"composef\" NAME=\"Description\" WRAP=\"virtual\">Contents of TextArea</TEXTAREA>\r\n"+ ! // "<TEXTAREA TABINDEX=\"5\" ROWS=\"15\" COLS=\"55\" CLASS=\"composef\" NAME=\"AnotherDescription\" WRAP=\"virtual\">\r\n"+ ! "<INPUT NAME=\"password\" SIZE=\"20\" TYPE=\"hidden\">\r\n"+ ! "<INPUT TYPE=\"submit\">\r\n"+ ! "</FORM>"; ! public static final String EXPECTED_FORM_HTML = EXPECTED_FORM_HTML_FORMLINE+EXPECTED_FORM_HTML_REST_OF_FORM; ! public FormScannerTest(String name) { super(name); --- 61,81 ---- "</FORM>"; ! // public static final String EXPECTED_FORM_HTML_FORMLINE="<FORM ACTION=\"http://www.google.com/test/do_login.php\" NAME=\"login_form\" ONSUBMIT=\"return CheckData()\" METHOD=\""+FormTag.POST+"\">\r\n"; ! // public static final String EXPECTED_FORM_HTML_REST_OF_FORM= ! // "<TR><TD ALIGN=\"center\"> </TD></TR>\r\n"+ ! // "<TR><TD ALIGN=\"center\"><FONT FACE=\"Arial, verdana\" SIZE=\"2\"><B>User Name</B></FONT></TD></TR>\r\n"+ ! // "<TR><TD ALIGN=\"center\"><INPUT NAME=\"name\" SIZE=\"20\" TYPE=\"text\"></TD></TR>\r\n"+ ! // "<TR><TD ALIGN=\"center\"><FONT FACE=\"Arial, verdana\" SIZE=\"2\"><B>Password</B></FONT></TD></TR>\r\n"+ ! // "<TR><TD ALIGN=\"center\"><INPUT NAME=\"passwd\" SIZE=\"20\" TYPE=\"password\"></TD></TR>\r\n"+ ! // "<TR><TD ALIGN=\"center\"> </TD></TR>\r\n"+ ! // "<TR><TD ALIGN=\"center\"><INPUT VALUE=\"Login\" NAME=\"submit\" TYPE=\"submit\"></TD></TR>\r\n"+ ! // "<TR><TD ALIGN=\"center\"> </TD></TR>\r\n"+ ! // "<TEXTAREA TABINDEX=\"5\" ROWS=\"15\" COLS=\"55\" CLASS=\"composef\" NAME=\"Description\" WRAP=\"virtual\">Contents of TextArea</TEXTAREA>\r\n"+ ! //// "<TEXTAREA TABINDEX=\"5\" ROWS=\"15\" COLS=\"55\" CLASS=\"composef\" NAME=\"AnotherDescription\" WRAP=\"virtual\">\r\n"+ ! // "<INPUT NAME=\"password\" SIZE=\"20\" TYPE=\"hidden\">\r\n"+ ! // "<INPUT TYPE=\"submit\">\r\n"+ ! // "</FORM>"; ! // public static final String EXPECTED_FORM_HTML = EXPECTED_FORM_HTML_FORMLINE+EXPECTED_FORM_HTML_REST_OF_FORM; ! // public FormScannerTest(String name) { super(name); *************** *** 129,133 **** assertNull("Should have been null",formTag.getTextAreaTag("junk")); ! assertStringEquals("toHTML",EXPECTED_FORM_HTML,formTag.toHtml()); } --- 129,137 ---- assertNull("Should have been null",formTag.getTextAreaTag("junk")); ! String expected = ! FORM_HTML.substring (0, FORM_HTML.indexOf ("\"do_login.php\"")) ! + "http://www.google.com/test/do_login.php" ! + FORM_HTML.substring (FORM_HTML.indexOf ("\"do_login.php\"") + 14); ! assertStringEquals("toHTML",expected,formTag.toHtml()); } *************** *** 313,317 **** for (NodeIterator e = parser.elements(); e.hasMoreNodes();) nodes[i++] = e.nextNode(); ! assertEquals ("Expected nodes", 39, i); } } --- 317,321 ---- for (NodeIterator e = parser.elements(); e.hasMoreNodes();) nodes[i++] = e.nextNode(); ! assertEquals ("Expected nodes", 40, i); } } Index: LabelScannerTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests/LabelScannerTest.java,v retrieving revision 1.37 retrieving revision 1.38 diff -C2 -d -r1.37 -r1.38 *** LabelScannerTest.java 5 Oct 2003 13:49:54 -0000 1.37 --- LabelScannerTest.java 6 Oct 2003 01:43:28 -0000 1.38 *************** *** 59,63 **** public void testLabelWithJspTag() throws ParserException { ! createParser("<label><%=labelValue%></label>"); parser.registerScanners(); LabelScanner labelScanner = new LabelScanner("-l"); --- 59,64 ---- public void testLabelWithJspTag() throws ParserException { ! String label = "<label><%=labelValue%></label>"; ! createParser(label); parser.registerScanners(); LabelScanner labelScanner = new LabelScanner("-l"); *************** *** 67,71 **** // check the title node LabelTag labelTag = (LabelTag) node[0]; ! assertStringEquals("Label","<LABEL><%=labelValue%></LABEL>",labelTag.toHtml()); assertEquals("Label Scanner",labelScanner,labelTag.getThisScanner()); } --- 68,72 ---- // check the title node LabelTag labelTag = (LabelTag) node[0]; ! assertStringEquals("Label",label,labelTag.toHtml()); assertEquals("Label Scanner",labelScanner,labelTag.getThisScanner()); } *************** *** 86,90 **** public void testLabelWithManyCompositeTags() throws ParserException { ! createParser("<label><span>Jane <b> Doe </b> Smith</span></label>"); parser.registerScanners(); LabelScanner labelScanner = new LabelScanner("-l"); --- 87,93 ---- public void testLabelWithManyCompositeTags() throws ParserException { ! String guts = "<span>Jane <b> Doe </b> Smith</span>"; ! String html = "<label>" + guts + "</label>"; ! createParser(html); parser.registerScanners(); LabelScanner labelScanner = new LabelScanner("-l"); *************** *** 93,99 **** assertTrue(node[0] instanceof LabelTag); LabelTag labelTag = (LabelTag) node[0]; ! assertEquals("Label value","<SPAN>Jane <B> Doe </B> Smith</SPAN>",labelTag.getChildrenHTML()); assertEquals("Label value","Jane Doe Smith",labelTag.getLabel()); ! assertStringEquals("Label","<LABEL><SPAN>Jane <B> Doe </B> Smith</SPAN></LABEL>",labelTag.toHtml()); assertEquals("Label Scanner",labelScanner,labelTag.getThisScanner()); } --- 96,102 ---- assertTrue(node[0] instanceof LabelTag); LabelTag labelTag = (LabelTag) node[0]; ! assertEquals("Label value",guts,labelTag.getChildrenHTML()); assertEquals("Label value","Jane Doe Smith",labelTag.getLabel()); ! assertStringEquals("Label",html,labelTag.toHtml()); assertEquals("Label Scanner",labelScanner,labelTag.getThisScanner()); } *************** *** 170,182 **** assertStringEquals("HTML String","<LABEL>Yahoo!</LABEL>",LabelTag.toHtml()); LabelTag = (LabelTag) node[4]; ! assertStringEquals("HTML String","<LABEL>\r\nHotmail</LABEL>",LabelTag.toHtml()); LabelTag = (LabelTag) node[5]; assertStringEquals("HTML String","<LABEL VALUE=\"ICQ Messenger\"></LABEL>",LabelTag.toHtml()); LabelTag = (LabelTag) node[6]; ! assertStringEquals("HTML String","<LABEL>Mailcity\r\n</LABEL>",LabelTag.toHtml()); LabelTag = (LabelTag) node[7]; ! assertStringEquals("HTML String","<LABEL>\r\nIndiatimes\r\n</LABEL>",LabelTag.toHtml()); LabelTag = (LabelTag) node[8]; ! assertStringEquals("HTML String","<LABEL>\r\nRediff\r\n</LABEL>",LabelTag.toHtml()); LabelTag = (LabelTag) node[9]; assertStringEquals("HTML String","<LABEL>Cricinfo</LABEL>",LabelTag.toHtml()); --- 173,185 ---- assertStringEquals("HTML String","<LABEL>Yahoo!</LABEL>",LabelTag.toHtml()); LabelTag = (LabelTag) node[4]; ! assertStringEquals("HTML String","<LABEL>\nHotmail</LABEL>",LabelTag.toHtml()); LabelTag = (LabelTag) node[5]; assertStringEquals("HTML String","<LABEL VALUE=\"ICQ Messenger\"></LABEL>",LabelTag.toHtml()); LabelTag = (LabelTag) node[6]; ! assertStringEquals("HTML String","<LABEL>Mailcity\n</LABEL>",LabelTag.toHtml()); LabelTag = (LabelTag) node[7]; ! assertStringEquals("HTML String","<LABEL>\nIndiatimes\n</LABEL>",LabelTag.toHtml()); LabelTag = (LabelTag) node[8]; ! assertStringEquals("HTML String","<LABEL>\nRediff\n</LABEL>",LabelTag.toHtml()); LabelTag = (LabelTag) node[9]; assertStringEquals("HTML String","<LABEL>Cricinfo</LABEL>",LabelTag.toHtml()); Index: LinkScannerTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests/LinkScannerTest.java,v retrieving revision 1.39 retrieving revision 1.40 diff -C2 -d -r1.39 -r1.40 *** LinkScannerTest.java 5 Oct 2003 13:49:54 -0000 1.39 --- LinkScannerTest.java 6 Oct 2003 01:43:28 -0000 1.40 *************** *** 29,32 **** --- 29,33 ---- package org.htmlparser.tests.scannersTests; + import java.util.Vector; import org.htmlparser.AbstractNode; *************** *** 34,37 **** --- 35,39 ---- import org.htmlparser.Parser; import org.htmlparser.StringNode; + import org.htmlparser.lexer.nodes.Attribute; import org.htmlparser.scanners.LinkScanner; import org.htmlparser.tags.ImageTag; *************** *** 69,81 **** ); parser.registerScanners(); ! parseAndAssertNodeCount(5); // The first node should be a Tag assertTrue("First node should be a Tag",node[0] instanceof Tag); // The second node should be a HTMLStringNode ! assertTrue("Second node should be a HTMLStringNode",node[1] instanceof StringNode); StringNode stringNode = (StringNode)node[1]; assertEquals("Text of the StringNode","Site Comments?",stringNode.getText()); assertTrue("Third node should be a tag",node[2] instanceof Tag); ! } --- 71,85 ---- ); parser.registerScanners(); ! parseAndAssertNodeCount(6); // The first node should be a Tag assertTrue("First node should be a Tag",node[0] instanceof Tag); // The second node should be a HTMLStringNode ! assertTrue("Second node should be a StringNode",node[1] instanceof StringNode); StringNode stringNode = (StringNode)node[1]; assertEquals("Text of the StringNode","Site Comments?",stringNode.getText()); assertTrue("Third node should be a tag",node[2] instanceof Tag); ! assertTrue("Fourth node should be a link",node[3] instanceof LinkTag); ! assertTrue("Fifth node should be a link",node[4] instanceof LinkTag); ! assertTrue("Sixth node should be a Tag",node[5] instanceof Tag); } *************** *** 89,96 **** */ public void testErroneousLinkBugFromYahoo2() throws ParserException { createParser( "<td>" + ! "<a href=s/8741>" + ! "<img src=\"http://us.i1.yimg.com/us.yimg.com/i/i16/mov_popc.gif\" height=16 width=16 border=0>" + "</td>" + "<td nowrap> \n"+ --- 93,101 ---- */ public void testErroneousLinkBugFromYahoo2() throws ParserException { + String link = "<a href=s/8741>" + + "<img src=\"http://us.i1.yimg.com/us.yimg.com/i/i16/mov_popc.gif\" height=16 width=16 border=0>"; createParser( "<td>" + ! link + "</td>" + "<td nowrap> \n"+ *************** *** 106,110 **** assertStringEquals("Link Text","",linkTag.getLinkText()); // Verify the reconstruction html ! assertStringEquals("toHTML","<A HREF=\"s/8741\"><IMG BORDER=\"0\" WIDTH=\"16\" SRC=\"http://us.i1.yimg.com/us.yimg.com/i/i16/mov_popc.gif\" HEIGHT=\"16\"></A>",linkTag.toHtml()); } --- 111,115 ---- assertStringEquals("Link Text","",linkTag.getLinkText()); // Verify the reconstruction html ! assertStringEquals("toHTML",link + "</a>",linkTag.toHtml()); } *************** *** 118,122 **** */ public void testErroneousLinkBugFromYahoo() throws ParserException { ! createParser( "<a href=s/8741>" + "<img src=\"http://us.i1.yimg.com/us.yimg.com/i/i16/mov_popc.gif\" " + --- 123,127 ---- */ public void testErroneousLinkBugFromYahoo() throws ParserException { ! String link = "<a href=s/8741>" + "<img src=\"http://us.i1.yimg.com/us.yimg.com/i/i16/mov_popc.gif\" " + *************** *** 124,128 **** "width=16 " + "border=0>" + ! "This is a test\n"+ "<a href=s/7509>" + "<b>Yahoo! Movies</b>" + --- 129,135 ---- "width=16 " + "border=0>" + ! "This is a test\n"; ! createParser( ! link + "<a href=s/7509>" + "<b>Yahoo! Movies</b>" + *************** *** 133,146 **** parser.registerScanners(); parseAndAssertNodeCount(2); ! // The first node should be a Tag ! assertTrue("First node should be a HTMLLinkTag",node[0] instanceof LinkTag); ! // The second node should be a HTMLStringNode ! assertTrue("Second node should be a HTMLLinkTag",node[1] instanceof LinkTag); LinkTag linkTag = (LinkTag)node[0]; assertEquals("Link","http://www.yahoo.com/s/8741",linkTag.getLink()); // Verify the link data ! assertEquals("Link Text","This is a test\r\n",linkTag.getLinkText()); // Verify the reconstruction html ! assertStringEquals("toHTML()","<A HREF=\"s/8741\"><IMG BORDER=\"0\" WIDTH=\"16\" SRC=\"http://us.i1.yimg.com/us.yimg.com/i/i16/mov_popc.gif\" HEIGHT=\"16\">This is a test\r\n</A>",linkTag.toHtml()); } --- 140,151 ---- parser.registerScanners(); parseAndAssertNodeCount(2); ! assertTrue("First node should be a LinkTag",node[0] instanceof LinkTag); ! assertTrue("Second node should be a LinkTag",node[1] instanceof LinkTag); LinkTag linkTag = (LinkTag)node[0]; assertEquals("Link","http://www.yahoo.com/s/8741",linkTag.getLink()); // Verify the link data ! assertEquals("Link Text","This is a test\n",linkTag.getLinkText()); // Verify the reconstruction html ! assertStringEquals("toHTML()",link + "</a>",linkTag.toHtml()); } *************** *** 157,166 **** public void testExtractLinkInvertedCommasBug() throws ParserException { ! fail ("not implemented"); ! // String tagContents = "a href=r/anorth/top.html"; ! // Tag tag = new Tag(new TagData(0,0,tagContents,"")); ! // String url = "c:\\cvs\\html\\binaries\\yahoo.htm"; ! // LinkScanner scanner = new LinkScanner("-l"); ! // assertEquals("Extracted Link","r/anorth/top.html",scanner.extractLink(tag,url)); } --- 162,173 ---- public void testExtractLinkInvertedCommasBug() throws ParserException { ! String link = "r/anorth/top.html"; ! Vector attributes = new Vector (); ! attributes.addElement (new Attribute ("A")); ! attributes.addElement (new Attribute ("HREF", link, (char)0)); ! Tag tag = new Tag(null, 0, 0, attributes); ! String url = "c:\\cvs\\html\\binaries\\yahoo.htm"; ! LinkScanner scanner = new LinkScanner(); ! assertEquals("Extracted Link","r/anorth/top.html",scanner.extractLink(tag,url)); } *************** *** 280,301 **** } - public void testReplaceFaultyTagWithEndTag() throws ParserException { - fail ("not implemented"); - // String currentLine = "<p>Site Comments?<br><a href=\"mailto:sa...@ne...?subject=Site Comments\">Mail Us<a></p>"; - // Tag tag = new Tag(new TagData(85,87,"a",currentLine)); - // LinkScanner linkScanner = new LinkScanner(); - // String newLine = linkScanner.replaceFaultyTagWithEndTag(tag,currentLine); - // assertEquals("Expected replacement","<p>Site Comments?<br><a href=\"mailto:sa...@ne...?subject=Site Comments\">Mail Us</A></p>",newLine); - } - - public void testInsertEndTagBeforeTag() throws ParserException { - fail ("not implemented"); - // String currentLine = "<a href=s/7509><b>Yahoo! Movies</b></a>"; - // Tag tag = new Tag(new TagData(0,14,"a href=s/7509",currentLine)); - // LinkScanner linkScanner = new LinkScanner(); - // String newLine = linkScanner.insertEndTagBeforeNode(tag,currentLine); - // assertEquals("Expected insertion","</A><a href=s/7509><b>Yahoo! Movies</b></a>",newLine); - } - /** * A bug in the freshmeat page - really bad html --- 287,290 ---- *************** *** 318,322 **** tag = (Tag)node[2]; assertTrue("Node 2 should be an end tag",tag.isEndTag ()); ! assertEquals("End Tag Contents","a",tag.getText()); } --- 307,311 ---- tag = (Tag)node[2]; assertTrue("Node 2 should be an end tag",tag.isEndTag ()); ! assertEquals("End Tag Contents","/a",tag.getText()); } *************** *** 380,388 **** Tag tag = (Tag)containedNodes[3]; assertTrue("Fourth contained node should be an EndTag",tag.isEndTag ()); ! assertEquals("Fourth Tag contents","b",tag.getText()); assertTrue("Fifth contained node should be a Tag",containedNodes[4] instanceof Tag); tag = (Tag)containedNodes[4]; assertTrue("Fifth contained node should be an EndTag",tag.isEndTag ()); ! assertEquals("Fifth Tag contents","font",tag.getText()); } --- 369,377 ---- Tag tag = (Tag)containedNodes[3]; assertTrue("Fourth contained node should be an EndTag",tag.isEndTag ()); ! assertEquals("Fourth Tag contents","/b",tag.getText()); assertTrue("Fifth contained node should be a Tag",containedNodes[4] instanceof Tag); tag = (Tag)containedNodes[4]; assertTrue("Fifth contained node should be an EndTag",tag.isEndTag ()); ! assertEquals("Fifth Tag contents","/font",tag.getText()); } *************** *** 399,404 **** parser.registerScanners(); parseAndAssertNodeCount(11); ! assertTrue("Node 4 should be a link tag",node[6] instanceof LinkTag); ! LinkTag linkTag = (LinkTag)node[6]; assertEquals("Resolved Link","http://www.abc.com/home.cfm",linkTag.getLink()); assertEquals("Resolved Link Text","Home",linkTag.getLinkText()); --- 388,393 ---- parser.registerScanners(); parseAndAssertNodeCount(11); ! assertTrue("Node 9 should be a link tag",node[8] instanceof LinkTag); ! LinkTag linkTag = (LinkTag)node[8]; assertEquals("Resolved Link","http://www.abc.com/home.cfm",linkTag.getLink()); assertEquals("Resolved Link Text","Home",linkTag.getLinkText()); Index: MetaTagScannerTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests/MetaTagScannerTest.java,v retrieving revision 1.30 retrieving revision 1.31 diff -C2 -d -r1.30 -r1.31 *** MetaTagScannerTest.java 5 Oct 2003 13:49:54 -0000 1.30 --- MetaTagScannerTest.java 6 Oct 2003 01:43:28 -0000 1.31 *************** *** 42,45 **** --- 42,48 ---- public void testScan() throws ParserException { + String description = "\"description\""; + String content = "\"Protecting the internet community through technology, not legislation. SpamCop eliminates spam. Automatically file spam reports with the network administrators who can stop spam at the source. Subscribe, and filter your email through powerful statistical analysis before it reaches your inbox.\""; + String tag = "<META name=" + description + " content=" + content + ">"; createParser( "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0//EN\">\n"+ *************** *** 47,90 **** "<head><title>SpamCop - Welcome to SpamCop\n"+ "</title>\n"+ ! "<META name=\"description\" content=\"Protecting the internet community through technology, not legislation. SpamCop eliminates spam. Automatically file spam reports with the network administrators who can stop spam at the source. Subscribe, and filter your email through powerful statistical analysis before it reaches your inbox.\">\n"+ "<META name=\"keywords\" content=\"SpamCop spam cop email filter abuse header headers parse parser utility script net net-abuse filter mail program system trace traceroute dns\">\n"+ "<META name=\"language\" content=\"en\">\n"+ "<META name=\"owner\" content=\"se...@ad...\">\n"+ ! "<META HTTP-EQUIV=\"content-type\" CONTENT=\"text/html; charset=ISO-8859-1\">","http://www.google.com/test/index.html"); MetaTagScanner scanner = new MetaTagScanner("-t"); parser.addScanner(scanner); parseAndAssertNodeCount(18); ! assertTrue("Node 7 should be End Tag",node[7] instanceof Tag && ((Tag)node[7]).isEndTag ()); ! assertTrue("Node 9 should be META Tag",node[9] instanceof MetaTag); MetaTag metaTag; metaTag = (MetaTag) node[9]; ! assertEquals("Meta Tag 9 Name","description",metaTag.getMetaTagName()); ! assertEquals("Meta Tag 9 Contents","Protecting the internet community through technology, not legislation. SpamCop eliminates spam. Automatically file spam reports with the network administrators who can stop spam at the source. Subscribe, and filter your email through powerful statistical analysis before it reaches your inbox.",metaTag.getMetaContent()); ! assertTrue("Node 11 should be META Tag",node[11] instanceof MetaTag); ! assertTrue("Node 13 should be META Tag",node[13] instanceof MetaTag); ! assertTrue("Node 15 should be META Tag",node[15] instanceof MetaTag); ! assertTrue("Node 17 should be META Tag",node[17] instanceof MetaTag); metaTag = (MetaTag) node[11]; ! assertEquals("Meta Tag 11 Name","keywords",metaTag.getMetaTagName()); ! assertEquals("Meta Tag 11 Contents","SpamCop spam cop email filter abuse header headers parse parser utility script net net-abuse filter mail program system trace traceroute dns",metaTag.getMetaContent()); ! assertNull("Meta Tag 11 Http-Equiv",metaTag.getHttpEquiv()); metaTag = (MetaTag) node[13]; ! assertEquals("Meta Tag 13 Name","language",metaTag.getMetaTagName()); ! assertEquals("Meta Tag 13 Contents","en",metaTag.getMetaContent()); ! assertNull("Meta Tag 13 Http-Equiv",metaTag.getHttpEquiv()); metaTag = (MetaTag) node[15]; ! assertEquals("Meta Tag 15 Name","owner",metaTag.getMetaTagName()); ! assertEquals("Meta Tag 15 Contents","se...@ad...",metaTag.getMetaContent()); ! assertNull("Meta Tag 15 Http-Equiv",metaTag.getHttpEquiv()); metaTag = (MetaTag) node[17]; ! assertNull("Meta Tag 17 Name",metaTag.getMetaTagName()); ! assertEquals("Meta Tag 17 Contents","text/html; charset=ISO-8859-1",metaTag.getMetaContent()); ! assertEquals("Meta Tag 17 Http-Equiv","content-type",metaTag.getHttpEquiv()); assertEquals("This Scanner",scanner,metaTag.getThisScanner()); --- 50,93 ---- "<head><title>SpamCop - Welcome to SpamCop\n"+ "</title>\n"+ ! tag + "\n"+ "<META name=\"keywords\" content=\"SpamCop spam cop email filter abuse header headers parse parser utility script net net-abuse filter mail program system trace traceroute dns\">\n"+ "<META name=\"language\" content=\"en\">\n"+ "<META name=\"owner\" content=\"se...@ad...\">\n"+ ! "<META HTTP-EQUIV=\"content-type\" CONTENT=\"text/html; charset=ISO-8859-1\">"); MetaTagScanner scanner = new MetaTagScanner("-t"); parser.addScanner(scanner); parseAndAssertNodeCount(18); ! assertTrue("Node 8 should be End Tag",node[7] instanceof Tag && ((Tag)node[7]).isEndTag ()); ! assertTrue("Node 10 should be META Tag",node[9] instanceof MetaTag); MetaTag metaTag; metaTag = (MetaTag) node[9]; ! assertEquals("Meta Tag 10 Name",description,metaTag.getMetaTagName()); ! assertEquals("Meta Tag 10 Contents",content,metaTag.getMetaContent()); ! assertTrue("Node 12 should be META Tag",node[11] instanceof MetaTag); ! assertTrue("Node 14 should be META Tag",node[13] instanceof MetaTag); ! assertTrue("Node 16 should be META Tag",node[15] instanceof MetaTag); ! assertTrue("Node 18 should be META Tag",node[17] instanceof MetaTag); metaTag = (MetaTag) node[11]; ! assertEquals("Meta Tag 12 Name","\"keywords\"",metaTag.getMetaTagName()); ! assertEquals("Meta Tag 12 Contents","\"SpamCop spam cop email filter abuse header headers parse parser utility script net net-abuse filter mail program system trace traceroute dns\"",metaTag.getMetaContent()); ! assertNull("Meta Tag 12 Http-Equiv",metaTag.getHttpEquiv()); metaTag = (MetaTag) node[13]; ! assertEquals("Meta Tag 14 Name","\"language\"",metaTag.getMetaTagName()); ! assertEquals("Meta Tag 14 Contents","\"en\"",metaTag.getMetaContent()); ! assertNull("Meta Tag 14 Http-Equiv",metaTag.getHttpEquiv()); metaTag = (MetaTag) node[15]; ! assertEquals("Meta Tag 16 Name","\"owner\"",metaTag.getMetaTagName()); ! assertEquals("Meta Tag 16 Contents","\"se...@ad...\"",metaTag.getMetaContent()); ! assertNull("Meta Tag 16 Http-Equiv",metaTag.getHttpEquiv()); metaTag = (MetaTag) node[17]; ! assertNull("Meta Tag 18 Name",metaTag.getMetaTagName()); ! assertEquals("Meta Tag 18 Contents","\"text/html; charset=ISO-8859-1\"",metaTag.getMetaContent()); ! assertEquals("Meta Tag 18 Http-Equiv","\"content-type\"",metaTag.getHttpEquiv()); assertEquals("This Scanner",scanner,metaTag.getThisScanner()); *************** *** 92,97 **** public void testScanTagsInMeta() throws ParserException { createParser( ! "<META NAME=\"Description\" CONTENT=\"Ethnoburb </I>versus Chinatown: Two Types of Urban Ethnic Communities in Los Angeles\">", "http://www.google.com/test/index.html" ); --- 95,102 ---- public void testScanTagsInMeta() throws ParserException { + String description = "\"Description\""; + String content = "\"Ethnoburb </I>versus Chinatown: Two Types of Urban Ethnic Communities in Los Angeles\""; createParser( ! "<META NAME=" + description + "CONTENT=" + content + ">", "http://www.google.com/test/index.html" ); *************** *** 101,106 **** assertTrue("Node should be meta tag",node[0] instanceof MetaTag); MetaTag metaTag = (MetaTag)node[0]; ! assertEquals("Meta Tag Name","Description",metaTag.getMetaTagName()); ! assertEquals("Content","Ethnoburb </I>versus Chinatown: Two Types of Urban Ethnic Communities in Los Angeles",metaTag.getMetaContent()); } --- 106,111 ---- assertTrue("Node should be meta tag",node[0] instanceof MetaTag); MetaTag metaTag = (MetaTag)node[0]; ! assertEquals("Meta Tag Name",description,metaTag.getMetaTagName()); ! assertEquals("Content",content,metaTag.getMetaContent()); } *************** *** 110,119 **** */ public void testMetaTagBug() throws ParserException { createParser( "<html>" + "<head>" + ! "<meta http-equiv=\"content-type\"" + ! " content=\"text/html;" + ! " charset=windows-1252\">" + "</head>" + "</html>" --- 115,125 ---- */ public void testMetaTagBug() throws ParserException { + String equiv = "\"content-type\""; + String content = "\"text/html; charset=windows-1252\""; createParser( "<html>" + "<head>" + ! "<meta http-equiv=" + equiv + ! " content=" + content + ">" + "</head>" + "</html>" *************** *** 124,129 **** MetaTag metaTag = (MetaTag)node[2]; ! assertStringEquals("http-equiv","content-type",metaTag.getHttpEquiv()); ! assertStringEquals("content","text/html; charset=windows-1252",metaTag.getMetaContent()); } --- 130,135 ---- MetaTag metaTag = (MetaTag)node[2]; ! assertStringEquals("http-equiv",equiv,metaTag.getHttpEquiv()); ! assertStringEquals("content",content,metaTag.getMetaContent()); } *************** *** 133,141 **** */ public void testMetaTagWithOpenTagSymbol() throws ParserException { createParser( "<html>" + "<head>" + "<title>Parser Test 2</title>" + ! "<meta name=\"foo\" content=\"a<b\">" + "</head>" + "<body>" + --- 139,148 ---- */ public void testMetaTagWithOpenTagSymbol() throws ParserException { + String content = "\"a<b\""; createParser( "<html>" + "<head>" + "<title>Parser Test 2</title>" + ! "<meta name=\"foo\" content=" + content + ">" + "</head>" + "<body>" + *************** *** 151,155 **** assertStringEquals( "meta content", ! "a<b", metaTag.getMetaContent() ); --- 158,162 ---- assertStringEquals( "meta content", ! content, metaTag.getMetaContent() ); Index: StyleScannerTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests/StyleScannerTest.java,v retrieving revision 1.28 retrieving revision 1.29 diff -C2 -d -r1.28 -r1.29 *** StyleScannerTest.java 22 Sep 2003 02:40:11 -0000 1.28 --- StyleScannerTest.java 6 Oct 2003 01:43:28 -0000 1.29 *************** *** 47,61 **** } ! public void testScan() { createParser("<STYLE TYPE=\"text/css\"><!--\n\n"+ "</STYLE>","http://www.yle.fi/"); parser.addScanner(new StyleScanner("-s")); ! try { ! parseAndAssertNodeCount(1); ! assertTrue("Should've thrown exception",false); ! } ! catch (ParserException e) { ! ! } } --- 47,56 ---- } ! p... [truncated message content] |
From: <der...@us...> - 2003-10-06 01:43:33
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/parserHelperTests In directory sc8-pr-cvs1:/tmp/cvs-serv2828/tests/parserHelperTests Modified Files: RemarkNodeParserTest.java StringParserTest.java Log Message: Updated tests to correspond to new behaviour. Mostly due to changes in order and case of tag contents. Of the forty odd remaining failing tests, the majority comprise altered functionality that needs to be resolved. Index: RemarkNodeParserTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/parserHelperTests/RemarkNodeParserTest.java,v retrieving revision 1.36 retrieving revision 1.37 diff -C2 -d -r1.36 -r1.37 *** RemarkNodeParserTest.java 22 Sep 2003 02:40:08 -0000 1.36 --- RemarkNodeParserTest.java 6 Oct 2003 01:43:28 -0000 1.37 *************** *** 71,83 **** "</TEST>\n"); Parser.setLineSeparator("\r\n"); ! parseAndAssertNodeCount(8); // The first node should be a HTMLRemarkNode ! assertTrue("First node should be a HTMLRemarkNode",node[0] instanceof RemarkNode); RemarkNode remarkNode = (RemarkNode)node[0]; assertEquals("Text of the remarkNode #1"," saved from url=(0022)http://internet.e-mail ",remarkNode.getText()); ! // The sixth node should be a HTMLRemarkNode ! assertTrue("Sixth node should be a HTMLRemarkNode",node[5] instanceof RemarkNode); ! remarkNode = (RemarkNode)node[5]; ! assertEquals("Text of the remarkNode #6","\r\n Whats gonna happen now ?\r\n",remarkNode.getText()); } --- 71,83 ---- "</TEST>\n"); Parser.setLineSeparator("\r\n"); ! parseAndAssertNodeCount(15); // The first node should be a HTMLRemarkNode ! assertTrue("First node should be a RemarkNode",node[0] instanceof RemarkNode); RemarkNode remarkNode = (RemarkNode)node[0]; assertEquals("Text of the remarkNode #1"," saved from url=(0022)http://internet.e-mail ",remarkNode.getText()); ! // The tenth node should be a HTMLRemarkNode ! assertTrue("Tenth node should be a RemarkNode",node[9] instanceof RemarkNode); ! remarkNode = (RemarkNode)node[9]; ! assertEquals("Text of the remarkNode #10","\n Whats gonna happen now ?\n",remarkNode.getText()); } *************** *** 94,106 **** "</TEST>\n"); Parser.setLineSeparator("\r\n"); ! parseAndAssertNodeCount(8); // The first node should be a HTMLRemarkNode ! assertTrue("First node should be a HTMLRemarkNode",node[0] instanceof RemarkNode); RemarkNode remarkNode = (RemarkNode)node[0]; assertEquals("Plain Text of the remarkNode #1"," saved from url=(0022)http://internet.e-mail ",remarkNode.toPlainTextString()); ! // The sixth node should be a HTMLRemarkNode ! assertTrue("Sixth node should be a HTMLRemarkNode",node[5] instanceof RemarkNode); ! remarkNode = (RemarkNode)node[5]; ! assertEquals("Plain Text of the remarkNode #6","\r\n Whats gonna happen now ?\r\n",remarkNode.getText()); } --- 94,106 ---- "</TEST>\n"); Parser.setLineSeparator("\r\n"); ! parseAndAssertNodeCount(15); // The first node should be a HTMLRemarkNode ! assertTrue("First node should be a RemarkNode",node[0] instanceof RemarkNode); RemarkNode remarkNode = (RemarkNode)node[0]; assertEquals("Plain Text of the remarkNode #1"," saved from url=(0022)http://internet.e-mail ",remarkNode.toPlainTextString()); ! // The tenth node should be a HTMLRemarkNode ! assertTrue("Tenth node should be a RemarkNode",node[9] instanceof RemarkNode); ! remarkNode = (RemarkNode)node[9]; ! assertEquals("Plain Text of the remarkNode #10","\n Whats gonna happen now ?\n",remarkNode.getText()); } *************** *** 118,130 **** "</TEST>\n"); Parser.setLineSeparator("\r\n"); ! parseAndAssertNodeCount(8); // The first node should be a HTMLRemarkNode ! assertTrue("First node should be a HTMLRemarkNode",node[0] instanceof RemarkNode); RemarkNode remarkNode = (RemarkNode)node[0]; assertStringEquals("Raw String of the remarkNode #1","<!-- saved from url=(0022)http://internet.e-mail -->",remarkNode.toHtml()); ! // The sixth node should be a HTMLRemarkNode ! assertTrue("Sixth node should be a HTMLRemarkNode",node[5] instanceof RemarkNode); ! remarkNode = (RemarkNode)node[5]; ! assertStringEquals("Raw String of the remarkNode #6","<!--\r\n Whats gonna happen now ?\r\n-->",remarkNode.toHtml()); } --- 118,130 ---- "</TEST>\n"); Parser.setLineSeparator("\r\n"); ! parseAndAssertNodeCount(15); // The first node should be a HTMLRemarkNode ! assertTrue("First node should be a RemarkNode",node[0] instanceof RemarkNode); RemarkNode remarkNode = (RemarkNode)node[0]; assertStringEquals("Raw String of the remarkNode #1","<!-- saved from url=(0022)http://internet.e-mail -->",remarkNode.toHtml()); ! // The tenth node should be a HTMLRemarkNode ! assertTrue("Tenth node should be a RemarkNode",node[9] instanceof RemarkNode); ! remarkNode = (RemarkNode)node[9]; ! assertStringEquals("Raw String of the remarkNode #6","<!--\n Whats gonna happen now ?\n-->",remarkNode.toHtml()); } *************** *** 156,160 **** assertTrue("Node should be a HTMLRemarkNode",node[0] instanceof RemarkNode); RemarkNode remarkNode = (RemarkNode)node[0]; ! assertEquals("Expected contents","\r\n",remarkNode.getText()); } --- 156,160 ---- assertTrue("Node should be a HTMLRemarkNode",node[0] instanceof RemarkNode); RemarkNode remarkNode = (RemarkNode)node[0]; ! assertEquals("Expected contents","\n\n",remarkNode.getText()); } *************** *** 168,172 **** createParser("<!-->"); parseAndAssertNodeCount(1); ! assertTrue("Node should be a HTMLRemarkNode",node[0] instanceof RemarkNode); RemarkNode remarkNode = (RemarkNode)node[0]; assertEquals("Expected contents","",remarkNode.getText()); --- 168,172 ---- createParser("<!-->"); parseAndAssertNodeCount(1); ! assertTrue("Node should be a RemarkNode",node[0] instanceof RemarkNode); RemarkNode remarkNode = (RemarkNode)node[0]; assertEquals("Expected contents","",remarkNode.getText()); *************** *** 268,274 **** + "</HTML>\n" ); ! parseAndAssertNodeCount(10); ! assertTrue("Node should be a HTMLRemarkNode but was "+node[7],node[7] instanceof RemarkNode); ! RemarkNode remarkNode = (RemarkNode)node[7]; assertEquals("Remark Node contents"," Id: html-sgml.sgm,v 1.5 1995/05/26 21:29:50 connolly Exp ",remarkNode.getText()); } --- 268,274 ---- + "</HTML>\n" ); ! parseAndAssertNodeCount(18); ! assertTrue("Node should be a RemarkNode but was "+node[12],node[12] instanceof RemarkNode); ! RemarkNode remarkNode = (RemarkNode)node[12]; assertEquals("Remark Node contents"," Id: html-sgml.sgm,v 1.5 1995/05/26 21:29:50 connolly Exp ",remarkNode.getText()); } *************** *** 291,297 **** + "</HTML>\n" ); ! parseAndAssertNodeCount(10); ! assertTrue("Node should be a HTMLRemarkNode but was "+node[7],node[7] instanceof RemarkNode); ! RemarkNode remarkNode = (RemarkNode)node[7]; assertEquals("Remark Node contents"," another -- -- comment ",remarkNode.getText()); } --- 291,297 ---- + "</HTML>\n" ); ! parseAndAssertNodeCount(18); ! assertTrue("Node should be a RemarkNode but was "+node[12],node[12] instanceof RemarkNode); ! RemarkNode remarkNode = (RemarkNode)node[12]; assertEquals("Remark Node contents"," another -- -- comment ",remarkNode.getText()); } *************** *** 314,320 **** + "</HTML>\n" ); ! parseAndAssertNodeCount(10); ! assertTrue("Node should be a HTMLRemarkNode but was "+node[7],node[7] instanceof RemarkNode); ! RemarkNode remarkNode = (RemarkNode)node[7]; assertEquals("Remark Node contents","",remarkNode.getText()); } --- 314,320 ---- + "</HTML>\n" ); ! parseAndAssertNodeCount(18); ! assertTrue("Node should be a RemarkNode but was "+node[12],node[12] instanceof RemarkNode); ! RemarkNode remarkNode = (RemarkNode)node[12]; assertEquals("Remark Node contents","",remarkNode.getText()); } *************** *** 364,368 **** + "</html>\n" ); ! parseAndAssertNodeCount (10); } --- 364,368 ---- + "</html>\n" ); ! parseAndAssertNodeCount (18); } Index: StringParserTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/parserHelperTests/StringParserTest.java,v retrieving revision 1.38 retrieving revision 1.39 diff -C2 -d -r1.38 -r1.39 *** StringParserTest.java 22 Sep 2003 02:40:08 -0000 1.38 --- StringParserTest.java 6 Oct 2003 01:43:28 -0000 1.39 *************** *** 55,60 **** createParser("<HTML><HEAD><TITLE>Google</TITLE>"); parseAndAssertNodeCount(5); ! // The fourth node should be a HTMLStringNode- with the text - Google ! assertTrue("Fourth node should be a HTMLStringNode",node[3] instanceof StringNode); StringNode stringNode = (StringNode)node[3]; assertEquals("Text of the StringNode","Google",stringNode.getText()); --- 55,60 ---- createParser("<HTML><HEAD><TITLE>Google</TITLE>"); parseAndAssertNodeCount(5); ! // The fourth node should be a StringNode- with the text - Google ! assertTrue("Fourth node should be a StringNode",node[3] instanceof StringNode); StringNode stringNode = (StringNode)node[3]; assertEquals("Text of the StringNode","Google",stringNode.getText()); *************** *** 77,82 **** parser.addScanner(new LinkScanner("-l")); parseAndAssertNodeCount(3); ! // The first node should be a HTMLStringNode- with the text - view these documents, you must have ! assertTrue("First node should be a HTMLStringNode",node[0] instanceof StringNode); StringNode stringNode = (StringNode)node[0]; assertEquals("Text of the StringNode","view these documents, you must have ",stringNode.getText()); --- 77,82 ---- parser.addScanner(new LinkScanner("-l")); parseAndAssertNodeCount(3); ! // The first node should be a StringNode- with the text - view these documents, you must have ! assertTrue("First node should be a StringNode",node[0] instanceof StringNode); StringNode stringNode = (StringNode)node[0]; assertEquals("Text of the StringNode","view these documents, you must have ",stringNode.getText()); *************** *** 148,154 **** ); parseAndAssertNodeCount(3); ! assertTrue("First node should be HTMLStringNode",node[0] instanceof StringNode); ! assertTrue("Second node should be HTMLRemarkNode",node[1] instanceof RemarkNode); ! assertTrue("Third node should be HTMLStringNode",node[2] instanceof StringNode); StringNode stringNode = (StringNode)node[0]; assertEquals("First String node contents","Before Comment ",stringNode.getText()); --- 148,154 ---- ); parseAndAssertNodeCount(3); ! assertTrue("First node should be StringNode",node[0] instanceof StringNode); ! assertTrue("Second node should be RemarkNode",node[1] instanceof RemarkNode); ! assertTrue("Third node should be StringNode",node[2] instanceof StringNode); StringNode stringNode = (StringNode)node[0]; assertEquals("First String node contents","Before Comment ",stringNode.getText()); *************** *** 167,171 **** createParser("a"); parseAndAssertNodeCount(1); ! assertTrue("First node should be HTMLStringNode",node[0] instanceof StringNode); StringNode stringNode = (StringNode)node[0]; assertEquals("First String node contents","a",stringNode.getText()); --- 167,171 ---- createParser("a"); parseAndAssertNodeCount(1); ! assertTrue("First node should be StringNode",node[0] instanceof StringNode); StringNode stringNode = (StringNode)node[0]; assertEquals("First String node contents","a",stringNode.getText()); *************** *** 173,181 **** public void testStringWithEmptyLine() throws ParserException { ! createParser("a\n\nb"); parseAndAssertNodeCount(1); ! assertTrue("First node should be HTMLStringNode",node[0] instanceof StringNode); StringNode stringNode = (StringNode)node[0]; ! assertStringEquals("First String node contents","a\r\n\r\nb",stringNode.getText()); } --- 173,182 ---- public void testStringWithEmptyLine() throws ParserException { ! String text = "a\n\nb"; ! createParser(text); parseAndAssertNodeCount(1); ! assertTrue("First node should be StringNode",node[0] instanceof StringNode); StringNode stringNode = (StringNode)node[0]; ! assertStringEquals("First String node contents",text,stringNode.getText()); } *************** *** 218,226 **** public void testStringWithLineBreaks() throws Exception { ! createParser("Testing &\nRefactoring"); parseAndAssertNodeCount(1); assertType("first node",StringNode.class,node[0]); StringNode stringNode = (StringNode)node[0]; ! assertStringEquals("text","Testing &\r\nRefactoring",stringNode.toPlainTextString()); } --- 219,228 ---- public void testStringWithLineBreaks() throws Exception { ! String text = "Testing &\nRefactoring"; ! createParser(text); parseAndAssertNodeCount(1); assertType("first node",StringNode.class,node[0]); StringNode stringNode = (StringNode)node[0]; ! assertStringEquals("text",text,stringNode.toPlainTextString()); } |
From: <der...@us...> - 2003-10-06 01:43:33
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserHelper In directory sc8-pr-cvs1:/tmp/cvs-serv2828/parserHelper Modified Files: CompositeTagScannerHelper.java Log Message: Updated tests to correspond to new behaviour. Mostly due to changes in order and case of tag contents. Of the forty odd remaining failing tests, the majority comprise altered functionality that needs to be resolved. Index: CompositeTagScannerHelper.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserHelper/CompositeTagScannerHelper.java,v retrieving revision 1.49 retrieving revision 1.50 diff -C2 -d -r1.49 -r1.50 *** CompositeTagScannerHelper.java 5 Oct 2003 13:49:50 -0000 1.49 --- CompositeTagScannerHelper.java 6 Oct 2003 01:43:27 -0000 1.50 *************** *** 45,55 **** public class CompositeTagScannerHelper { private CompositeTagScanner scanner; ! private Tag tag; private Lexer mLexer; private Tag endTag; private NodeList nodeList; private boolean endTagFound; - private int startingLineNumber; - private int endingLineNumber; private boolean balance_quotes; --- 45,53 ---- public class CompositeTagScannerHelper { private CompositeTagScanner scanner; ! private Tag mTag; private Lexer mLexer; private Tag endTag; private NodeList nodeList; private boolean endTagFound; private boolean balance_quotes; *************** *** 61,65 **** this.scanner = scanner; ! this.tag = tag; mLexer = lexer; this.endTag = null; --- 59,63 ---- this.scanner = scanner; ! mTag = tag; mLexer = lexer; this.endTag = null; *************** *** 70,79 **** public Tag scan() throws ParserException { - startingLineNumber = mLexer.getCurrentLineNumber (); if (shouldCreateEndTagAndExit()) { return createEndTagAndRepositionReader(); } scanner.beforeScanningStarts(); ! Node currentNode = tag; doEmptyXmlTagCheckOn(currentNode); --- 68,76 ---- public Tag scan() throws ParserException { if (shouldCreateEndTagAndExit()) { return createEndTagAndRepositionReader(); } scanner.beforeScanningStarts(); ! Node currentNode = mTag; doEmptyXmlTagCheckOn(currentNode); *************** *** 84,88 **** continue; if (currentNode instanceof Tag) ! doForceCorrectionCheckOn((Tag)currentNode); doEmptyXmlTagCheckOn(currentNode); --- 81,98 ---- continue; if (currentNode instanceof Tag) ! { ! Tag possibleEndTag = (Tag)currentNode; ! if (scanner.isTagToBeEndedFor(possibleEndTag) || ! ( ! !(possibleEndTag.isEndTag ()) && ! !scanner.isAllowSelfChildren() && ! possibleEndTag.getTagName().equals(mTag.getTagName()) ! )) ! { ! createCorrectionEndTagBefore(possibleEndTag.elementBegin()); ! mLexer.setPosition (possibleEndTag.elementBegin ()); ! endTagFound = true; ! } ! } doEmptyXmlTagCheckOn(currentNode); *************** *** 95,99 **** createCorrectionEndTagBefore (mLexer.getCursor ().getPosition ()); - endingLineNumber = mLexer.getCurrentLineNumber (); return createTag(); } --- 105,108 ---- *************** *** 104,109 **** private Tag createEndTagAndRepositionReader() { ! createCorrectionEndTagBefore (tag.elementBegin ()); ! mLexer.setPosition (tag.elementBegin ()); return endTag; } --- 113,118 ---- private Tag createEndTagAndRepositionReader() { ! createCorrectionEndTagBefore (mTag.elementBegin ()); ! mLexer.setPosition (mTag.elementBegin ()); return endTag; } *************** *** 111,115 **** private void createCorrectionEndTagBefore(int position) { ! String endTagName = "/" + tag.getTagName(); Vector attributes = new Vector (); attributes.addElement (new Attribute (endTagName, (String)null, (char)0)); --- 120,124 ---- private void createCorrectionEndTagBefore(int position) { ! String endTagName = "/" + mTag.getRawTagName(); Vector attributes = new Vector (); attributes.addElement (new Attribute (endTagName, (String)null, (char)0)); *************** *** 123,142 **** } ! private void createCorrectionEndTagBefore(Tag possibleEndTagCauser) { ! String endTagName = "/" + tag.getTagName(); ! int endTagBegin = possibleEndTagCauser.elementBegin(); ! int endTagEnd = endTagBegin + endTagName.length() + 2; ! possibleEndTagCauser.setTagBegin(endTagEnd+1); ! Vector attributes = new Vector (); ! attributes.addElement (new Attribute (endTagName, (String)null, (char)0)); ! TagData data = new TagData( ! endTagName, ! endTagBegin, ! attributes, ! mLexer.getPage ().getUrl (), ! false); ! ! endTag = new Tag(data); ! } private Tag createTag() throws ParserException --- 132,151 ---- } ! // private void createCorrectionEndTagBefore(Tag possibleEndTagCauser) { ! // String endTagName = "/" + tag.getTagName(); ! // int endTagBegin = possibleEndTagCauser.elementBegin(); ! // int endTagEnd = endTagBegin + endTagName.length() + 2; ! // possibleEndTagCauser.setTagBegin(endTagEnd+1); ! // Vector attributes = new Vector (); ! // attributes.addElement (new Attribute (endTagName, (String)null, (char)0)); ! // TagData data = new TagData( ! // endTagName, ! // endTagBegin, ! // attributes, ! // mLexer.getPage ().getUrl (), ! // false); ! // ! // endTag = new Tag(data); ! // } private Tag createTag() throws ParserException *************** *** 146,158 **** data = new TagData( mLexer.getPage (), ! tag.elementBegin(), endTag.elementEnd(), ! tag.getAttributesEx (), mLexer.getPage ().getUrl (), ! tag.isEmptyXmlTag ()); CompositeTag newTag = (CompositeTag)scanner.createTag (data, new CompositeTagData( ! tag,endTag,nodeList ) ); --- 155,167 ---- data = new TagData( mLexer.getPage (), ! mTag.elementBegin(), endTag.elementEnd(), ! mTag.getAttributesEx (), mLexer.getPage ().getUrl (), ! mTag.isEmptyXmlTag ()); CompositeTag newTag = (CompositeTag)scanner.createTag (data, new CompositeTagData( ! mTag,endTag,nodeList ) ); *************** *** 187,191 **** private boolean isExpectedEndTag (TagNode possibleEndTag) { ! return (possibleEndTag.getTagName().equals (tag.getTagName ())); } --- 196,200 ---- private boolean isExpectedEndTag (TagNode possibleEndTag) { ! return (possibleEndTag.getTagName().equals (mTag.getTagName ())); } *************** *** 193,197 **** if (currentNode instanceof Tag) { Tag possibleEndTag = (Tag)currentNode; ! if (isXmlEndTag(tag)) { endTag = possibleEndTag; endTagFound = true; --- 202,206 ---- if (currentNode instanceof Tag) { Tag possibleEndTag = (Tag)currentNode; ! if (mTag.isEmptyXmlTag ()) { endTag = possibleEndTag; endTagFound = true; *************** *** 200,229 **** } ! private void doForceCorrectionCheckOn(Tag possibleEndTagCauser) { ! if (isEndTagMissing(possibleEndTagCauser)) { ! createCorrectionEndTagBefore(possibleEndTagCauser); ! ! endTagFound = true; ! } ! } ! ! private boolean isEndTagMissing(Tag possibleEndTag) { ! return ! scanner.isTagToBeEndedFor(possibleEndTag) || ! isSelfChildTagRecievedIncorrectly(possibleEndTag); ! } ! private boolean isSelfChildTagRecievedIncorrectly(Tag possibleEndTag) { ! return ( ! !(possibleEndTag.isEndTag ()) && ! !scanner.isAllowSelfChildren() && ! possibleEndTag.getTagName().equals(tag.getTagName()) ! ); ! } ! public boolean isXmlEndTag(Tag tag) { ! String tagText = tag.getText(); ! int lastSlash = tagText.lastIndexOf("/"); ! return (lastSlash == tagText.length()-1 || tag.isEmptyXmlTag()) && tag.getText().indexOf("://")==-1; ! } } --- 209,229 ---- } ! // private void doForceCorrectionCheckOn(Tag possibleEndTag) { ! // } ! // private boolean isEndTagMissing(Tag possibleEndTag) { ! // return ! // scanner.isTagToBeEndedFor(possibleEndTag) || ! // ( ! // !(possibleEndTag.isEndTag ()) && ! // !scanner.isAllowSelfChildren() && ! // possibleEndTag.getTagName().equals(tag.getTagName()) ! // ); ! // } ! // public boolean isXmlEndTag(Tag tag) { ! // String tagText = tag.getText(); ! // int lastSlash = tagText.lastIndexOf("/"); ! // return (lastSlash == tagText.length()-1 || tag.isEmptyXmlTag()) && tag.getText().indexOf("://")==-1; ! // } } |
From: <der...@us...> - 2003-10-06 01:43:33
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags In directory sc8-pr-cvs1:/tmp/cvs-serv2828/tags Modified Files: JspTag.java Log Message: Updated tests to correspond to new behaviour. Mostly due to changes in order and case of tag contents. Of the forty odd remaining failing tests, the majority comprise altered functionality that needs to be resolved. Index: JspTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/JspTag.java,v retrieving revision 1.30 retrieving revision 1.31 diff -C2 -d -r1.30 -r1.31 *** JspTag.java 28 Sep 2003 15:33:58 -0000 1.30 --- JspTag.java 6 Oct 2003 01:43:28 -0000 1.31 *************** *** 37,41 **** { /** ! * The HTMLJspTag is constructed by providing the beginning posn, ending posn * and the tag contents. * @param tagData The data for this tag. --- 37,41 ---- { /** ! * The JspTag is constructed by providing the beginning posn, ending posn * and the tag contents. * @param tagData The data for this tag. *************** *** 47,51 **** public String toHtml() { ! return "<%"+getTagContents()+"%>"; } --- 47,51 ---- public String toHtml() { ! return "<"+getTagContents()+">"; } |
From: <der...@us...> - 2003-10-05 13:50:27
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserHelper In directory sc8-pr-cvs1:/tmp/cvs-serv9618/parserHelper Modified Files: CompositeTagScannerHelper.java Removed Files: ScriptScannerHelper.java Log Message: Add bean like accessors for positions on Node, AbstractNode and AbstractNodeDecorator. Handle null page in Cursor. Add smartquotes mode in Lexer and CompositeTagScannerHelper. Add simple name constructor in Attribute. Remove emptyxmltag member, replace with computing accessors in TagNode. Removed ScriptScannerHelper and moved scanning logic to ScriptScanner. Reworked extractImageLocn in ImageScanner Implement extractXMLData in TagScanner. Made virtual tags zero length in TagData. Added push() to IteratorImpl. Added single node constructor to NodeList. Numerous and various test adjustments. Still 133 failures. Index: CompositeTagScannerHelper.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserHelper/CompositeTagScannerHelper.java,v retrieving revision 1.48 retrieving revision 1.49 diff -C2 -d -r1.48 -r1.49 *** CompositeTagScannerHelper.java 28 Sep 2003 15:33:58 -0000 1.48 --- CompositeTagScannerHelper.java 5 Oct 2003 13:49:50 -0000 1.49 *************** *** 80,84 **** if (!endTagFound) { do { ! currentNode = mLexer.nextNode (); // balance_quotes ? if (currentNode==null) continue; --- 80,84 ---- if (!endTagFound) { do { ! currentNode = mLexer.nextNode (balance_quotes); if (currentNode==null) continue; --- ScriptScannerHelper.java DELETED --- |
From: <der...@us...> - 2003-10-05 13:50:26
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/nodeDecorators In directory sc8-pr-cvs1:/tmp/cvs-serv9618/nodeDecorators Modified Files: AbstractNodeDecorator.java Log Message: Add bean like accessors for positions on Node, AbstractNode and AbstractNodeDecorator. Handle null page in Cursor. Add smartquotes mode in Lexer and CompositeTagScannerHelper. Add simple name constructor in Attribute. Remove emptyxmltag member, replace with computing accessors in TagNode. Removed ScriptScannerHelper and moved scanning logic to ScriptScanner. Reworked extractImageLocn in ImageScanner Implement extractXMLData in TagScanner. Made virtual tags zero length in TagData. Added push() to IteratorImpl. Added single node constructor to NodeList. Numerous and various test adjustments. Still 133 failures. Index: AbstractNodeDecorator.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/nodeDecorators/AbstractNodeDecorator.java,v retrieving revision 1.11 retrieving revision 1.12 diff -C2 -d -r1.11 -r1.12 *** AbstractNodeDecorator.java 22 Sep 2003 02:39:59 -0000 1.11 --- AbstractNodeDecorator.java 5 Oct 2003 13:49:49 -0000 1.12 *************** *** 61,64 **** --- 61,100 ---- } + /** + * Gets the starting position of the node. + * @return The start position. + */ + public int getStartPosition () + { + return (delegate.getStartPosition ()); + } + + /** + * Sets the starting position of the node. + * @param position The new start position. + */ + public void setStartPosition (int position) + { + delegate.setStartPosition (position); + } + + /** + * Gets the ending position of the node. + * @return The end position. + */ + public int getEndPosition () + { + return (delegate.getEndPosition ()); + } + + /** + * Sets the ending position of the node. + * @param position The new end position. + */ + public void setEndPosition (int position) + { + delegate.setEndPosition (position); + } + public boolean equals(Object arg0) { return delegate.equals(arg0); |
From: <der...@us...> - 2003-10-05 13:50:24
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes In directory sc8-pr-cvs1:/tmp/cvs-serv9618/lexer/nodes Modified Files: Attribute.java TagNode.java Log Message: Add bean like accessors for positions on Node, AbstractNode and AbstractNodeDecorator. Handle null page in Cursor. Add smartquotes mode in Lexer and CompositeTagScannerHelper. Add simple name constructor in Attribute. Remove emptyxmltag member, replace with computing accessors in TagNode. Removed ScriptScannerHelper and moved scanning logic to ScriptScanner. Reworked extractImageLocn in ImageScanner Implement extractXMLData in TagScanner. Made virtual tags zero length in TagData. Added push() to IteratorImpl. Added single node constructor to NodeList. Numerous and various test adjustments. Still 133 failures. Index: Attribute.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes/Attribute.java,v retrieving revision 1.9 retrieving revision 1.10 diff -C2 -d -r1.9 -r1.10 *** Attribute.java 2 Oct 2003 23:48:53 -0000 1.9 --- Attribute.java 5 Oct 2003 13:49:42 -0000 1.10 *************** *** 135,141 **** --- 135,155 ---- public Attribute (String name, String value, char quote) { + mPage = null; + mNameStart = 0; + mNameEnd = 0; + mValueStart = 0; + mValueEnd = 0; mName = name; mValue = value; mQuote = quote; + } + + /** + * Create a standalone attribute with the name given. + * @param name The name of this attribute. + */ + public Attribute (String name) + { + this (name, (String)null, (char)0); } Index: TagNode.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes/TagNode.java,v retrieving revision 1.14 retrieving revision 1.15 diff -C2 -d -r1.14 -r1.15 *** TagNode.java 3 Oct 2003 00:20:44 -0000 1.14 --- TagNode.java 5 Oct 2003 13:49:43 -0000 1.15 *************** *** 49,54 **** AbstractNode { - private boolean emptyXmlTag; - /** * The tag attributes. --- 49,52 ---- *************** *** 116,120 **** super (page, start, end); mAttributes = attributes; - emptyXmlTag = false; } --- 114,117 ---- *************** *** 374,394 **** public String getTagName () { - Vector attributes; String ret; ! ret = null; ! ! attributes = getAttributesEx (); ! if (0 != attributes.size ()) { ! ret = getRawTagName (); ! if (null != ret) ! { ! ret = ret.toUpperCase (); ! if (ret.startsWith ("/")) ! ret = ret.substring (1); ! if (ret.endsWith ("/")) ! ret = ret.substring (0, ret.length () - 1); ! } } --- 371,384 ---- public String getTagName () { String ret; ! ret = getRawTagName (); ! if (null != ret) { ! ret = ret.toUpperCase (); ! if (ret.startsWith ("/")) ! ret = ret.substring (1); ! if (ret.endsWith ("/")) ! ret = ret.substring (0, ret.length () - 1); } *************** *** 607,612 **** } } - if (isEmptyXmlTag ()) - ret.append ("/"); ret.append (">"); --- 597,600 ---- *************** *** 668,683 **** /** ! * Is this an empty xml tag of the form<br> ! * <tag/> ! * @return boolean */ public boolean isEmptyXmlTag () { ! return emptyXmlTag; } public void setEmptyXmlTag (boolean emptyXmlTag) { ! this.emptyXmlTag = emptyXmlTag; } --- 656,772 ---- /** ! * Is this an empty xml tag of the form <tag/>. ! * @return true if the last character of the last attribute is a '/'. */ public boolean isEmptyXmlTag () { ! Vector attributes; ! int size; ! Attribute attribute; ! String name; ! int length; ! boolean ret; ! ! ret = false; ! ! attributes = getAttributesEx (); ! size = attributes.size (); ! if (0 < size) ! { ! attribute = (Attribute)attributes.elementAt (size - 1); ! name = attribute.getName (); ! if (null != name) ! { ! length = name.length (); ! ret = name.charAt (length - 1) == '/'; ! } ! } ! ! return (ret); } + /** + * Set this tag to be an empty xml node, or not. + * Adds or removes an ending slash on the tag. + * @param If true, ensures there is an ending slash in the node, + * i.e. <tag/>, otherwise removes it. + */ public void setEmptyXmlTag (boolean emptyXmlTag) { ! Vector attributes; ! int size; ! Attribute attribute; ! String name; ! String value; ! int length; ! ! attributes = getAttributesEx (); ! size = attributes.size (); ! if (0 < size) ! { ! attribute = (Attribute)attributes.elementAt (size - 1); ! name = attribute.getName (); ! if (null != name) ! { ! length = name.length (); ! value = attribute.getValue (); ! if (null == value) ! if (name.charAt (length - 1) == '/') ! { ! // already exists, remove if requested ! if (!emptyXmlTag) ! if (1 == length) ! attributes.removeElementAt (size - 1); ! else ! { ! // this shouldn't happen, but covers the case ! // where no whitespace separates the slash ! // from the previous attribute ! name = name.substring (0, length - 1); ! attribute = new Attribute (name); ! attributes.removeElementAt (size - 1); ! attributes.addElement (attribute); ! } ! } ! else ! { ! // ends with attribute, add whitespace + slash if requested ! if (emptyXmlTag) ! { ! attribute = new Attribute ((String)null, " ", (char)0); ! attributes.addElement (attribute); ! attribute = new Attribute ("/"); ! attributes.addElement (attribute); ! } ! } ! else ! { ! // some valued attribute, add whitespace + slash if requested ! if (emptyXmlTag) ! { ! attribute = new Attribute ((String)null, " ", (char)0); ! attributes.addElement (attribute); ! attribute = new Attribute ("/"); ! attributes.addElement (attribute); ! } ! } ! } ! else ! { ! // ends with whitespace, add if requested ! if (emptyXmlTag) ! { ! attribute = new Attribute ("/"); ! attributes.addElement (attribute); ! } ! } ! } ! else ! // nothing there, add if requested ! if (emptyXmlTag) ! { ! attribute = new Attribute ("/"); ! attributes.addElement (attribute); ! } } |
From: <der...@us...> - 2003-10-05 13:50:18
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer In directory sc8-pr-cvs1:/tmp/cvs-serv9618/lexer Modified Files: Cursor.java Lexer.java Log Message: Add bean like accessors for positions on Node, AbstractNode and AbstractNodeDecorator. Handle null page in Cursor. Add smartquotes mode in Lexer and CompositeTagScannerHelper. Add simple name constructor in Attribute. Remove emptyxmltag member, replace with computing accessors in TagNode. Removed ScriptScannerHelper and moved scanning logic to ScriptScanner. Reworked extractImageLocn in ImageScanner Implement extractXMLData in TagScanner. Made virtual tags zero length in TagData. Added push() to IteratorImpl. Added single node constructor to NodeList. Numerous and various test adjustments. Still 133 failures. Index: Cursor.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Cursor.java,v retrieving revision 1.10 retrieving revision 1.11 diff -C2 -d -r1.10 -r1.11 *** Cursor.java 29 Sep 2003 00:00:38 -0000 1.10 --- Cursor.java 5 Oct 2003 13:49:41 -0000 1.11 *************** *** 137,146 **** ret = new StringBuffer (9 * 3 + 3); // three ints and delimiters ret.append (getPosition ()); - row = mPage.row (this); - column = mPage.column (this); ret.append ("["); ! ret.append (row); ret.append (","); ! ret.append (column); ret.append ("]"); --- 137,150 ---- ret = new StringBuffer (9 * 3 + 3); // three ints and delimiters ret.append (getPosition ()); ret.append ("["); ! if (null != mPage) ! ret.append (mPage.row (this)); ! else ! ret.append ("?"); ret.append (","); ! if (null != mPage) ! ret.append (mPage.column (this)); ! else ! ret.append ("?"); ret.append ("]"); Index: Lexer.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Lexer.java,v retrieving revision 1.11 retrieving revision 1.12 diff -C2 -d -r1.11 -r1.12 *** Lexer.java 29 Sep 2003 00:00:38 -0000 1.11 --- Lexer.java 5 Oct 2003 13:49:42 -0000 1.12 *************** *** 229,232 **** --- 229,246 ---- ParserException { + return nextNode (false); + } + + /** + * Get the next node from the source. + * @param quotesmart If <code>true</code>, strings ignore quoted contents. + * @return A RemarkNode, StringNode or TagNode, or <code>null</code> if no + * more lexemes are present. + * @exception ParserException If there is a problem with the underlying page. + */ + public Node nextNode (boolean quotesmart) + throws + ParserException + { Cursor probe; char ch; *************** *** 258,262 **** probe.retreat (); // remark and tag need this character if ('-' == ch) ! ret = parseRemark (probe); else { --- 272,276 ---- probe.retreat (); // remark and tag need this character if ('-' == ch) ! ret = parseRemark (probe, quotesmart); else { *************** *** 267,274 **** } else ! ret = parseString (probe); break; default: ! ret = parseString (probe); break; } --- 281,288 ---- } else ! ret = parseString (probe, quotesmart); break; default: ! ret = parseString (probe, quotesmart); break; } *************** *** 282,287 **** * letter is encountered, or the input stream is exhausted, in which * case <code>null</code> is returned. */ ! protected Node parseString (Cursor cursor) throws ParserException --- 296,303 ---- * letter is encountered, or the input stream is exhausted, in which * case <code>null</code> is returned. + * @param cursor The position at which to start scanning. + * @param quotesmart If <code>true</code>, strings ignore quoted contents. */ ! protected Node parseString (Cursor cursor, boolean quotesmart) throws ParserException *************** *** 292,298 **** --- 308,316 ---- int begin; int end; + char quote; Node ret; done = false; + quote = 0; while (!done) { *************** *** 300,304 **** if (0 == ch) done = true; ! else if ('<' == ch) { ch = mPage.getCharacter (cursor); --- 318,326 ---- if (0 == ch) done = true; ! else if (quotesmart && (0 == quote) && (('\'' == ch) || ('"' == ch))) ! quote = ch; // enter quoted state ! else if (quotesmart && (ch == quote)) ! quote = 0; // exit quoted state ! else if ((0 == quote) && ('<' == ch)) { ch = mPage.getCharacter (cursor); *************** *** 314,319 **** else { ! // it's not a tag, so keep going, ! // the extra characters consumed are in this string } } --- 336,341 ---- else { ! // it's not a tag, so keep going, but check for quotes ! cursor.retreat (); } } *************** *** 443,446 **** --- 465,469 ---- * one slot for each whitespace or attribute/value pair. * The first slot is for attribute name (kind of like a standalone attribute). + * @param cursor The position at which to start scanning. */ protected Node parseTag (Cursor cursor) *************** *** 624,629 **** * in the remark text. * We allow terminators like --!> even though this isn't part of the spec. */ ! protected Node parseRemark (Cursor cursor) throws ParserException --- 647,654 ---- * in the remark text. * We allow terminators like --!> even though this isn't part of the spec. + * @param cursor The position at which to start scanning. + * @param quotesmart If <code>true</code>, strings ignore quoted contents. */ ! protected Node parseRemark (Cursor cursor, boolean quotesmart) throws ParserException *************** *** 644,648 **** state = 1; else ! return (parseString (cursor)); break; case 1: // prior to the second open delimiter --- 669,673 ---- state = 1; else ! return (parseString (cursor, quotesmart)); break; case 1: // prior to the second open delimiter *************** *** 650,654 **** state = 2; else ! return (parseString (cursor)); break; case 2: // prior to the first closing delimiter --- 675,679 ---- state = 2; else ! return (parseString (cursor, quotesmart)); break; case 2: // prior to the first closing delimiter *************** *** 656,660 **** state = 3; else if (0 == ch) ! return (parseString (cursor)); // no terminator break; case 3: // prior to the second closing delimiter --- 681,685 ---- state = 3; else if (0 == ch) ! return (parseString (cursor, quotesmart)); // no terminator break; case 3: // prior to the second closing delimiter |