htmlparser-cvs Mailing List for HTML Parser (Page 43)
Brought to you by:
derrickoswald
You can subscribe to this list here.
2003 |
Jan
|
Feb
|
Mar
|
Apr
|
May
(141) |
Jun
(108) |
Jul
(66) |
Aug
(127) |
Sep
(155) |
Oct
(149) |
Nov
(72) |
Dec
(72) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2004 |
Jan
(100) |
Feb
(36) |
Mar
(21) |
Apr
(3) |
May
(87) |
Jun
(28) |
Jul
(84) |
Aug
(5) |
Sep
(14) |
Oct
|
Nov
|
Dec
|
2005 |
Jan
(1) |
Feb
(39) |
Mar
(26) |
Apr
(38) |
May
(14) |
Jun
(10) |
Jul
|
Aug
|
Sep
(13) |
Oct
(8) |
Nov
(10) |
Dec
|
2006 |
Jan
|
Feb
(1) |
Mar
(17) |
Apr
(20) |
May
(28) |
Jun
(24) |
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
2015 |
Jan
|
Feb
|
Mar
(1) |
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
From: <der...@us...> - 2003-09-03 23:36:52
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserapplications In directory sc8-pr-cvs1:/tmp/cvs-serv31228/parserapplications Modified Files: MailRipper.java Robot.java Log Message: Change tabs to spaces in all source files. Index: MailRipper.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserapplications/MailRipper.java,v retrieving revision 1.42 retrieving revision 1.43 diff -C2 -d -r1.42 -r1.43 *** MailRipper.java 24 Aug 2003 21:59:42 -0000 1.42 --- MailRipper.java 3 Sep 2003 23:36:19 -0000 1.43 *************** *** 46,113 **** public class MailRipper { private org.htmlparser.Parser parser; ! /** ! * MailRipper c'tor takes the url to be ripped ! * @param resourceLocation url to be ripped ! */ ! public MailRipper(String resourceLocation) { ! try { ! parser = new Parser(resourceLocation,new DefaultParserFeedback()); ! parser.registerScanners(); ! } ! catch (ParserException e) { ! System.err.println("Could not create parser object"); ! e.printStackTrace(); ! } ! } ! public static void main(String[] args) { ! System.out.println("Mail Ripper v" + Parser.getVersion ()); ! if (args.length<1 || args[0].equals("-help")) ! { ! System.out.println(); ! System.out.println("Syntax : java -classpath htmlparser.jar org.htmlparser.parserapplications.MailRipper <resourceLocn/website>"); ! System.out.println(); ! System.out.println(" <resourceLocn> the name of the file to be parsed (with complete path "); ! System.out.println(" if not in current directory)"); ! System.out.println(" -help This screen"); ! System.out.println(); ! System.out.println("HTML Parser home page : http://htmlparser.sourceforge.net"); ! System.out.println(); ! System.out.println("Example : java -classpath htmlparser.jar com.kizna.parserapplications.MailRipper http://htmlparser.sourceforge.net"); ! System.out.println(); ! System.out.println("If you have any doubts, please join the HTMLParser mailing list (user/developer) from the HTML Parser home page instead of mailing any of the contributors directly. You will be surprised with the quality of open source support. "); ! System.exit(-1); ! } ! String resourceLocation = "http://htmlparser.sourceforge.net"; ! if (args.length!=0) resourceLocation = args[0]; ! ! MailRipper ripper = new MailRipper(resourceLocation); ! System.out.println("Ripping Site "+resourceLocation); ! try { ! for (Enumeration e=ripper.rip();e.hasMoreElements();) { ! LinkTag tag = (LinkTag)e.nextElement(); ! System.out.println("Ripped mail address : "+tag.getLink()); ! } ! } ! catch (ParserException e) { ! e.printStackTrace(); ! } ! } ! /** ! * Rip all mail addresses from the given url, and return an enumeration of such mail addresses. ! * @return Enumeration of mail addresses (a vector of LinkTag) ! */ ! public Enumeration rip() throws ParserException { ! Node node; ! Vector mailAddresses = new Vector(); ! for (NodeIterator e = parser.elements();e.hasMoreNodes();) ! { ! node = e.nextNode(); ! if (node instanceof LinkTag) ! { ! LinkTag linkTag = (LinkTag)node; ! if (linkTag.isMailLink()) mailAddresses.addElement(linkTag); ! } ! } ! return mailAddresses.elements(); ! } } --- 46,113 ---- public class MailRipper { private org.htmlparser.Parser parser; ! /** ! * MailRipper c'tor takes the url to be ripped ! * @param resourceLocation url to be ripped ! */ ! public MailRipper(String resourceLocation) { ! try { ! parser = new Parser(resourceLocation,new DefaultParserFeedback()); ! parser.registerScanners(); ! } ! catch (ParserException e) { ! System.err.println("Could not create parser object"); ! e.printStackTrace(); ! } ! } ! public static void main(String[] args) { ! System.out.println("Mail Ripper v" + Parser.getVersion ()); ! if (args.length<1 || args[0].equals("-help")) ! { ! System.out.println(); ! System.out.println("Syntax : java -classpath htmlparser.jar org.htmlparser.parserapplications.MailRipper <resourceLocn/website>"); ! System.out.println(); ! System.out.println(" <resourceLocn> the name of the file to be parsed (with complete path "); ! System.out.println(" if not in current directory)"); ! System.out.println(" -help This screen"); ! System.out.println(); ! System.out.println("HTML Parser home page : http://htmlparser.sourceforge.net"); ! System.out.println(); ! System.out.println("Example : java -classpath htmlparser.jar com.kizna.parserapplications.MailRipper http://htmlparser.sourceforge.net"); ! System.out.println(); ! System.out.println("If you have any doubts, please join the HTMLParser mailing list (user/developer) from the HTML Parser home page instead of mailing any of the contributors directly. You will be surprised with the quality of open source support. "); ! System.exit(-1); ! } ! String resourceLocation = "http://htmlparser.sourceforge.net"; ! if (args.length!=0) resourceLocation = args[0]; ! ! MailRipper ripper = new MailRipper(resourceLocation); ! System.out.println("Ripping Site "+resourceLocation); ! try { ! for (Enumeration e=ripper.rip();e.hasMoreElements();) { ! LinkTag tag = (LinkTag)e.nextElement(); ! System.out.println("Ripped mail address : "+tag.getLink()); ! } ! } ! catch (ParserException e) { ! e.printStackTrace(); ! } ! } ! /** ! * Rip all mail addresses from the given url, and return an enumeration of such mail addresses. ! * @return Enumeration of mail addresses (a vector of LinkTag) ! */ ! public Enumeration rip() throws ParserException { ! Node node; ! Vector mailAddresses = new Vector(); ! for (NodeIterator e = parser.elements();e.hasMoreNodes();) ! { ! node = e.nextNode(); ! if (node instanceof LinkTag) ! { ! LinkTag linkTag = (LinkTag)node; ! if (linkTag.isMailLink()) mailAddresses.addElement(linkTag); ! } ! } ! return mailAddresses.elements(); ! } } Index: Robot.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserapplications/Robot.java,v retrieving revision 1.44 retrieving revision 1.45 diff -C2 -d -r1.44 -r1.45 *** Robot.java 24 Aug 2003 21:59:42 -0000 1.44 --- Robot.java 3 Sep 2003 23:36:19 -0000 1.45 *************** *** 40,138 **** public class Robot { private org.htmlparser.Parser parser; ! /** ! * Robot crawler - Provide the starting url ! */ ! public Robot(String resourceLocation) { ! try { ! parser = new Parser(resourceLocation,new DefaultParserFeedback()); ! parser.registerScanners(); ! } ! catch (ParserException e) { ! System.err.println("Error, could not create parser object"); ! e.printStackTrace(); ! } ! } ! /** ! * Crawl using a given crawl depth. ! * @param crawlDepth Depth of crawling ! */ ! public void crawl(int crawlDepth) throws ParserException ! { ! try { ! crawl(parser,crawlDepth); ! } ! catch (ParserException e) { ! throw new ParserException("HTMLParserException at crawl("+crawlDepth+")",e); ! } ! } ! /** ! * Crawl using a given parser object, and a given crawl depth. ! * @param parser Parser object ! * @param crawlDepth Depth of crawling ! */ ! public void crawl(Parser parser,int crawlDepth) throws ParserException { ! System.out.println(" crawlDepth = "+crawlDepth); ! for (NodeIterator e = parser.elements();e.hasMoreNodes();) ! { ! Node node = e.nextNode(); ! if (node instanceof LinkTag) ! { ! LinkTag linkTag = (LinkTag)node; ! { ! if (!linkTag.isMailLink()) ! { ! if (linkTag.getLink().toUpperCase().indexOf("HTM")!=-1 || ! linkTag.getLink().toUpperCase().indexOf("COM")!=-1 || ! linkTag.getLink().toUpperCase().indexOf("ORG")!=-1) ! { ! if (crawlDepth>0) ! { ! Parser newParser = new Parser(linkTag.getLink(),new DefaultParserFeedback()); ! newParser.registerScanners(); ! System.out.print("Crawling to "+linkTag.getLink()); ! crawl(newParser,crawlDepth-1); ! } ! else System.out.println(linkTag.getLink()); ! } ! } ! } ! } ! } ! } ! public static void main(String[] args) ! { ! System.out.println("Robot Crawler v" + Parser.getVersion ()); ! if (args.length<2 || args[0].equals("-help")) ! { ! System.out.println(); ! System.out.println("Syntax : java -classpath htmlparser.jar org.htmlparser.parserapplications.Robot <resourceLocn/website> <depth>"); ! System.out.println(); ! System.out.println(" <resourceLocn> the name of the file to be parsed (with complete path "); ! System.out.println(" if not in current directory)"); ! System.out.println(" <depth> No of links to be followed from each link"); ! System.out.println(" -help This screen"); ! System.out.println(); ! System.out.println("HTML Parser home page : http://htmlparser.sourceforge.net"); ! System.out.println(); ! System.out.println("Example : java -classpath htmlparser.jar com.kizna.parserapplications.Robot http://www.google.com 3"); ! System.out.println(); ! System.out.println("If you have any doubts, please join the HTMLParser mailing list (user/developer) from the HTML Parser home page instead of mailing any of the contributors directly. You will be surprised with the quality of open source support. "); ! System.exit(-1); ! } ! String resourceLocation=""; ! int crawlDepth = 1; ! if (args.length!=0) resourceLocation = args[0]; ! if (args.length==2) crawlDepth=Integer.valueOf(args[1]).intValue(); ! ! ! Robot robot = new Robot(resourceLocation); ! System.out.println("Crawling Site "+resourceLocation); ! try { ! robot.crawl(crawlDepth); ! } ! catch (ParserException e) { ! e.printStackTrace(); ! } ! } } --- 40,138 ---- public class Robot { private org.htmlparser.Parser parser; ! /** ! * Robot crawler - Provide the starting url ! */ ! public Robot(String resourceLocation) { ! try { ! parser = new Parser(resourceLocation,new DefaultParserFeedback()); ! parser.registerScanners(); ! } ! catch (ParserException e) { ! System.err.println("Error, could not create parser object"); ! e.printStackTrace(); ! } ! } ! /** ! * Crawl using a given crawl depth. ! * @param crawlDepth Depth of crawling ! */ ! public void crawl(int crawlDepth) throws ParserException ! { ! try { ! crawl(parser,crawlDepth); ! } ! catch (ParserException e) { ! throw new ParserException("HTMLParserException at crawl("+crawlDepth+")",e); ! } ! } ! /** ! * Crawl using a given parser object, and a given crawl depth. ! * @param parser Parser object ! * @param crawlDepth Depth of crawling ! */ ! public void crawl(Parser parser,int crawlDepth) throws ParserException { ! System.out.println(" crawlDepth = "+crawlDepth); ! for (NodeIterator e = parser.elements();e.hasMoreNodes();) ! { ! Node node = e.nextNode(); ! if (node instanceof LinkTag) ! { ! LinkTag linkTag = (LinkTag)node; ! { ! if (!linkTag.isMailLink()) ! { ! if (linkTag.getLink().toUpperCase().indexOf("HTM")!=-1 || ! linkTag.getLink().toUpperCase().indexOf("COM")!=-1 || ! linkTag.getLink().toUpperCase().indexOf("ORG")!=-1) ! { ! if (crawlDepth>0) ! { ! Parser newParser = new Parser(linkTag.getLink(),new DefaultParserFeedback()); ! newParser.registerScanners(); ! System.out.print("Crawling to "+linkTag.getLink()); ! crawl(newParser,crawlDepth-1); ! } ! else System.out.println(linkTag.getLink()); ! } ! } ! } ! } ! } ! } ! public static void main(String[] args) ! { ! System.out.println("Robot Crawler v" + Parser.getVersion ()); ! if (args.length<2 || args[0].equals("-help")) ! { ! System.out.println(); ! System.out.println("Syntax : java -classpath htmlparser.jar org.htmlparser.parserapplications.Robot <resourceLocn/website> <depth>"); ! System.out.println(); ! System.out.println(" <resourceLocn> the name of the file to be parsed (with complete path "); ! System.out.println(" if not in current directory)"); ! System.out.println(" <depth> No of links to be followed from each link"); ! System.out.println(" -help This screen"); ! System.out.println(); ! System.out.println("HTML Parser home page : http://htmlparser.sourceforge.net"); ! System.out.println(); ! System.out.println("Example : java -classpath htmlparser.jar com.kizna.parserapplications.Robot http://www.google.com 3"); ! System.out.println(); ! System.out.println("If you have any doubts, please join the HTMLParser mailing list (user/developer) from the HTML Parser home page instead of mailing any of the contributors directly. You will be surprised with the quality of open source support. "); ! System.exit(-1); ! } ! String resourceLocation=""; ! int crawlDepth = 1; ! if (args.length!=0) resourceLocation = args[0]; ! if (args.length==2) crawlDepth=Integer.valueOf(args[1]).intValue(); ! ! ! Robot robot = new Robot(resourceLocation); ! System.out.println("Crawling Site "+resourceLocation); ! try { ! robot.crawl(crawlDepth); ! } ! catch (ParserException e) { ! e.printStackTrace(); ! } ! } } |
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/nodeDecorators In directory sc8-pr-cvs1:/tmp/cvs-serv31228/nodeDecorators Modified Files: AbstractNodeDecorator.java DecodingNode.java EscapeCharacterRemovingNode.java NonBreakingSpaceConvertingNode.java Log Message: Change tabs to spaces in all source files. Index: AbstractNodeDecorator.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/nodeDecorators/AbstractNodeDecorator.java,v retrieving revision 1.7 retrieving revision 1.8 diff -C2 -d -r1.7 -r1.8 *** AbstractNodeDecorator.java 24 Aug 2003 21:59:41 -0000 1.7 --- AbstractNodeDecorator.java 3 Sep 2003 23:36:19 -0000 1.8 *************** *** 35,79 **** public abstract class AbstractNodeDecorator implements Node { ! protected Node delegate; ! ! protected AbstractNodeDecorator(Node delegate) { ! this.delegate = delegate; ! } ! public void accept(Object visitor) { ! delegate.accept(visitor); ! } ! public void collectInto(NodeList collectionList, Class nodeType) { ! delegate.collectInto(collectionList, nodeType); ! } ! public void collectInto(NodeList collectionList, String filter) { ! delegate.collectInto(collectionList, filter); ! } ! public int elementBegin() { ! return delegate.elementBegin(); ! } ! public int elementEnd() { ! return delegate.elementEnd(); ! } ! public boolean equals(Object arg0) { ! return delegate.equals(arg0); ! } ! public Node getParent() { ! return delegate.getParent(); ! } ! public String getText() { ! return delegate.getText(); ! } ! public void setParent(Node node) { ! delegate.setParent(node); ! } /** --- 35,79 ---- public abstract class AbstractNodeDecorator implements Node { ! protected Node delegate; ! ! protected AbstractNodeDecorator(Node delegate) { ! this.delegate = delegate; ! } ! public void accept(Object visitor) { ! delegate.accept(visitor); ! } ! public void collectInto(NodeList collectionList, Class nodeType) { ! delegate.collectInto(collectionList, nodeType); ! } ! public void collectInto(NodeList collectionList, String filter) { ! delegate.collectInto(collectionList, filter); ! } ! public int elementBegin() { ! return delegate.elementBegin(); ! } ! public int elementEnd() { ! return delegate.elementEnd(); ! } ! public boolean equals(Object arg0) { ! return delegate.equals(arg0); ! } ! public Node getParent() { ! return delegate.getParent(); ! } ! public String getText() { ! return delegate.getText(); ! } ! public void setParent(Node node) { ! delegate.setParent(node); ! } /** *************** *** 81,85 **** * @return The list of children contained by this node, if it's been set, <code>null</code> otherwise. */ ! public NodeList getChildren () { return (delegate.getChildren ()); --- 81,85 ---- * @return The list of children contained by this node, if it's been set, <code>null</code> otherwise. */ ! public NodeList getChildren () { return (delegate.getChildren ()); *************** *** 90,113 **** * @param children The new list of children this node contains. */ ! public void setChildren (NodeList children) { delegate.setChildren (children); } ! public void setText(String text) { ! delegate.setText(text); ! } ! public String toHtml() { ! return delegate.toHtml(); ! } ! public String toPlainTextString() { ! return delegate.toPlainTextString(); ! } ! public String toString() { ! return delegate.toString(); ! } } --- 90,113 ---- * @param children The new list of children this node contains. */ ! public void setChildren (NodeList children) { delegate.setChildren (children); } ! public void setText(String text) { ! delegate.setText(text); ! } ! public String toHtml() { ! return delegate.toHtml(); ! } ! public String toPlainTextString() { ! return delegate.toPlainTextString(); ! } ! public String toString() { ! return delegate.toString(); ! } } Index: DecodingNode.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/nodeDecorators/DecodingNode.java,v retrieving revision 1.7 retrieving revision 1.8 diff -C2 -d -r1.7 -r1.8 *** DecodingNode.java 24 Aug 2003 21:59:41 -0000 1.7 --- DecodingNode.java 3 Sep 2003 23:36:19 -0000 1.8 *************** *** 35,44 **** public class DecodingNode extends AbstractNodeDecorator { ! public DecodingNode(Node node) { ! super(node); ! } ! public String toPlainTextString() { ! return Translate.decode(delegate.toPlainTextString()); ! } } --- 35,44 ---- public class DecodingNode extends AbstractNodeDecorator { ! public DecodingNode(Node node) { ! super(node); ! } ! public String toPlainTextString() { ! return Translate.decode(delegate.toPlainTextString()); ! } } Index: EscapeCharacterRemovingNode.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/nodeDecorators/EscapeCharacterRemovingNode.java,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -d -r1.5 -r1.6 *** EscapeCharacterRemovingNode.java 24 Aug 2003 21:59:41 -0000 1.5 --- EscapeCharacterRemovingNode.java 3 Sep 2003 23:36:19 -0000 1.6 *************** *** 35,44 **** public class EscapeCharacterRemovingNode extends AbstractNodeDecorator { ! public EscapeCharacterRemovingNode(Node newDelegate) { ! super(newDelegate); ! } ! public String toPlainTextString() { ! return ParserUtils.removeEscapeCharacters(delegate.toPlainTextString()); ! } } --- 35,44 ---- public class EscapeCharacterRemovingNode extends AbstractNodeDecorator { ! public EscapeCharacterRemovingNode(Node newDelegate) { ! super(newDelegate); ! } ! public String toPlainTextString() { ! return ParserUtils.removeEscapeCharacters(delegate.toPlainTextString()); ! } } Index: NonBreakingSpaceConvertingNode.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/nodeDecorators/NonBreakingSpaceConvertingNode.java,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -d -r1.5 -r1.6 *** NonBreakingSpaceConvertingNode.java 24 Aug 2003 21:59:41 -0000 1.5 --- NonBreakingSpaceConvertingNode.java 3 Sep 2003 23:36:19 -0000 1.6 *************** *** 34,44 **** public class NonBreakingSpaceConvertingNode extends AbstractNodeDecorator { ! public NonBreakingSpaceConvertingNode(Node newDelegate) { ! super(newDelegate); ! } ! public String toPlainTextString() { ! String result = delegate.toPlainTextString(); ! return result.replace ('\u00a0',' '); ! } } --- 34,44 ---- public class NonBreakingSpaceConvertingNode extends AbstractNodeDecorator { ! public NonBreakingSpaceConvertingNode(Node newDelegate) { ! super(newDelegate); ! } ! public String toPlainTextString() { ! String result = delegate.toPlainTextString(); ! return result.replace ('\u00a0',' '); ! } } |
From: <der...@us...> - 2003-09-03 23:36:52
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer In directory sc8-pr-cvs1:/tmp/cvs-serv31228/lexer Modified Files: Page.java PageIndex.java Log Message: Change tabs to spaces in all source files. Index: Page.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Page.java,v retrieving revision 1.10 retrieving revision 1.11 diff -C2 -d -r1.10 -r1.11 *** Page.java 24 Aug 2003 21:59:41 -0000 1.10 --- Page.java 3 Sep 2003 23:36:18 -0000 1.11 *************** *** 311,315 **** if (null != content) { ! index = content.indexOf (CHARSET_STRING); if (index != -1) --- 311,315 ---- if (null != content) { ! index = content.indexOf (CHARSET_STRING); if (index != -1) *************** *** 323,327 **** content = content.substring (0, index); ! //remove any double quotes from around charset string if (content.startsWith ("\"") && content.endsWith ("\"") && (1 < content.length ())) content = content.substring (1, content.length () - 1); --- 323,327 ---- content = content.substring (0, index); ! //remove any double quotes from around charset string if (content.startsWith ("\"") && content.endsWith ("\"") && (1 < content.length ())) content = content.substring (1, content.length () - 1); *************** *** 336,340 **** // that is, case is always ignored when comparing charset names. if (!ret.equalsIgnoreCase (content)) ! { System.out.println ( "detected charset \"" --- 336,340 ---- // that is, case is always ignored when comparing charset names. if (!ret.equalsIgnoreCase (content)) ! { System.out.println ( "detected charset \"" *************** *** 343,347 **** + ret + "\""); ! } } } --- 343,347 ---- + ret + "\""); ! } } } *************** *** 351,400 **** } ! /** ! * Lookup a character set name. ! * <em>Vacuous for JVM's without <code>java.nio.charset</code>.</em> ! * This uses reflection so the code will still run under prior JDK's but ! * in that case the default is always returned. ! * @param name The name to look up. One of the aliases for a character set. ! * @param _default The name to return if the lookup fails. ! */ ! public String findCharset (String name, String _default) ! { ! String ret; ! try ! { ! Class cls; ! Method method; ! Object object; cls = Class.forName ("java.nio.charset.Charset"); ! method = cls.getMethod ("forName", new Class[] { String.class }); ! object = method.invoke (null, new Object[] { name }); ! method = cls.getMethod ("name", new Class[] { }); ! object = method.invoke (object, new Object[] { }); ! ret = (String)object; ! } ! catch (ClassNotFoundException cnfe) ! { ! // for reflection exceptions, assume the name is correct ! ret = name; ! } ! catch (NoSuchMethodException nsme) ! { ! // for reflection exceptions, assume the name is correct ! ret = name; ! } ! catch (IllegalAccessException ia) ! { ! // for reflection exceptions, assume the name is correct ! ret = name; ! } ! catch (InvocationTargetException ita) ! { ! // java.nio.charset.IllegalCharsetNameException ! // and java.nio.charset.UnsupportedCharsetException ! // return the default ! ret = _default; System.out.println ( "unable to determine cannonical charset name for " --- 351,400 ---- } ! /** ! * Lookup a character set name. ! * <em>Vacuous for JVM's without <code>java.nio.charset</code>.</em> ! * This uses reflection so the code will still run under prior JDK's but ! * in that case the default is always returned. ! * @param name The name to look up. One of the aliases for a character set. ! * @param _default The name to return if the lookup fails. ! */ ! public String findCharset (String name, String _default) ! { ! String ret; ! try ! { ! Class cls; ! Method method; ! Object object; cls = Class.forName ("java.nio.charset.Charset"); ! method = cls.getMethod ("forName", new Class[] { String.class }); ! object = method.invoke (null, new Object[] { name }); ! method = cls.getMethod ("name", new Class[] { }); ! object = method.invoke (object, new Object[] { }); ! ret = (String)object; ! } ! catch (ClassNotFoundException cnfe) ! { ! // for reflection exceptions, assume the name is correct ! ret = name; ! } ! catch (NoSuchMethodException nsme) ! { ! // for reflection exceptions, assume the name is correct ! ret = name; ! } ! catch (IllegalAccessException ia) ! { ! // for reflection exceptions, assume the name is correct ! ret = name; ! } ! catch (InvocationTargetException ita) ! { ! // java.nio.charset.IllegalCharsetNameException ! // and java.nio.charset.UnsupportedCharsetException ! // return the default ! ret = _default; System.out.println ( "unable to determine cannonical charset name for " *************** *** 402,409 **** + " - using " + _default); ! } ! return (ret); ! } /** --- 402,409 ---- + " - using " + _default); ! } ! return (ret); ! } /** Index: PageIndex.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/PageIndex.java,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -d -r1.5 -r1.6 *** PageIndex.java 24 Aug 2003 21:59:41 -0000 1.5 --- PageIndex.java 3 Sep 2003 23:36:18 -0000 1.6 *************** *** 1,397 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML ! // Copyright (C) Dec 31, 2000 Somik Raha ! // ! // This library is free software; you can redistribute it and/or ! // modify it under the terms of the GNU Lesser General Public ! // License as published by the Free Software Foundation; either ! // version 2.1 of the License, or (at your option) any later version. ! // ! // This library is distributed in the hope that it will be useful, ! // but WITHOUT ANY WARRANTY; without even the implied warranty of ! // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ! // Lesser General Public License for more details. ! // ! // You should have received a copy of the GNU Lesser General Public ! // License along with this library; if not, write to the Free Software ! // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ! // ! // For any questions or suggestions, you can write to me at : ! // Email :so...@in... ! // ! // Postal Address : ! // Somik Raha ! // Extreme Programmer & Coach ! // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, ! // CA 94708, USA ! // Website : http://www.industriallogic.com ! // ! // This class was contributed by ! // Derrick Oswald ! // ! ! package org.htmlparser.lexer; ! ! import org.htmlparser.util.sort.Ordered; ! import org.htmlparser.util.sort.Sort; ! import org.htmlparser.util.sort.Sortable; ! ! /** ! * A sorted array of integers which are the positions of end of line characters. ! * Maintains a list of integers which are (the positions of the first ! * characters of each line. ! * To facilitate processing the first element should be maintained at position 0. ! * Facilities to add, remove, search and determine row and column are provided. ! * This class provides similar functionality to a Vector but ! * does not incur the overhead of an <code>Integer</code> object per element. ! */ ! public class PageIndex implements Sortable ! { ! /** ! * Increment for allocations. ! */ ! protected static final int mIncrement = 100; ! ! /** ! * The number of valid elements. ! */ ! protected int mCount; ! ! /** ! * The elements. ! */ ! protected int[] mIndices; ! ! /** ! * The page associated with this index. ! */ ! protected Page mPage; ! ! /** ! * Create an empty index. ! * @param page The page associated with this index. ! */ ! public PageIndex (Page page) ! { ! mPage = page; ! mIndices = new int[mIncrement]; ! mCount = 0; ! } ! ! /** ! * Create an index with the one element given. ! * @param page The page associated with this index. ! * @param cursor The single element for the new index. ! */ ! public PageIndex (Page page, int cursor) ! { ! this (page); ! mIndices[0] = cursor; ! mCount = 1; ! } ! ! /** ! * Create an index with the elements given. ! * @param page The page associated with this index. ! * @param cursors The initial elements of the index. ! * NOTE: The list must be sorted in ascending order. ! */ ! public PageIndex (Page page, int[] cursors) ! { ! mPage = page; ! mIndices = cursors; ! mCount = cursors.length; ! } ! ! /** ! * Get this index's page. ! * @return The page associated with this index. ! */ ! public Page getPage () ! { ! return (mPage); ! } ! ! /** ! * Get the count of elements. ! * @return The number of valid elements. ! */ ! public int size () ! { ! return (mCount); ! } ! ! /** ! * Get the capacity for elements without reallocation. ! * @return The number of spaces for elements. ! */ ! public int capacity () ! { ! return (mIndices.length); ! } ! ! /** ! * Add an element to the list ! * @param cursor The element to add. ! * @return The position at which the element was inserted or ! * the index of the existing element if it is a duplicate. ! */ ! public int add (Cursor cursor) ! { ! int position; ! int ret; ! ! // find where it goes ! ret = Sort.bsearch (this, cursor); ! ! // insert, but not twice ! position = cursor.getPosition (); ! if (!((ret < size ()) && (position == mIndices[ret]))) ! insertElementAt (position, ret); ! ! return (ret); ! } ! ! /** ! * Add an element to the list ! * @param cursor The element to add. ! * @return The position at which the element was inserted or ! * the index of the existing element if it is a duplicate. ! */ ! public int add (int cursor) ! { ! return (add (new Cursor (getPage (), cursor))); ! } ! ! /** ! * Remove an element from the list ! * @param cursor The element to remove. ! */ ! public void remove (Cursor cursor) ! { ! int i; ! ! // find it ! i = Sort.bsearch (this, cursor); ! ! // remove ! if ((i < size ()) && (cursor.getPosition () == mIndices[i])) ! removeElementAt (i); ! } ! ! /** ! * Remove an element from the list ! * @param cursor The element to remove. ! */ ! public void remove (int cursor) ! { ! remove (new Cursor (getPage (), cursor)); ! } ! ! /** ! * Get an element from the list. ! * @param index The index of the element to get. ! * @return The element. ! */ ! public int elementAt (int index) ! { ! return (mIndices[index]); ! } ! ! /** ! * Get the line number for a cursor. ! * @param cursor The character offset into the page. ! * @return The line number the character is in. ! */ ! public int row (Cursor cursor) ! { ! int ret; ! ! ret = Sort.bsearch (this, cursor); ! // handle line transition, the search returns the index if it matches ! // exactly one of the line end positions, so we advance one line if ! // it's equal to the offset at the row index, since that position is ! // actually the beginning of the next line ! if ((ret < mCount) && (cursor.getPosition () == mIndices[ret])) ! ret++; ! ! return (ret); ! } ! ! /** ! * Get the line number for a position. ! * @param cursor The character offset into the page. ! * @return The line number the character is in. ! */ ! public int row (int cursor) ! { ! return (row (new Cursor (getPage (), cursor))); ! } ! ! /** ! * Get the column number for a cursor. ! * @param cursor The character offset into the page. ! * @return The character offset into the line this cursor is on. ! */ ! public int column (Cursor cursor) ! { ! int row; ! int previous; ! ! row = row (cursor); ! if (0 != row) ! previous = this.elementAt (row - 1); ! else ! previous = 0; ! ! return (cursor.getPosition () - previous); ! } ! ! /** ! * Get the column number for a position. ! * @param cursor The character offset into the page. ! * @return The character offset into the line this cursor is on. ! */ ! public int column (int cursor) ! { ! return (column (new Cursor (getPage (), cursor))); ! } ! ! /** ! * Get the elements as an array of int. ! * @return A new array containing the elements, ! * i.e. a snapshot of the index. ! */ ! public int[] get () ! { ! int[] ret = new int[size ()]; ! System.arraycopy (mIndices, 0, ret, 0, size ()); ! ! return (ret); ! } ! ! /** ! * Binary search for the element. ! * @param cursor The element to search for. ! * @return The index at which the element was found or is to be inserted. ! */ ! protected int bsearch (int cursor) ! { ! return (Sort.bsearch (this, new Cursor (getPage (), cursor))); ! } ! ! /** ! * Binary search for the element. ! * @param cursor The element to search for. ! * @param first The index to start at. ! * @param last The index to stop at. ! * @return The index at which the element was found or is to be inserted. ! */ ! protected int bsearch (int cursor, int first, int last) ! { ! return (Sort.bsearch (this, new Cursor (getPage (), cursor), first, last)); ! } ! ! /** ! * Inserts an element into the list. ! * The index must be a value greater than or equal to 0 and less than ! * or equal to the current size of the array. ! * @param cursor The element to insert. ! * @param index The index in the list to insert it at. ! */ ! protected void insertElementAt (int cursor, int index) ! { ! if ((index >= capacity ()) || (size () == capacity ())) ! { // allocate more space ! int new_values[] = new int[Math.max (capacity () + mIncrement, index + 1)]; ! if (index < capacity ()) ! { ! // copy and shift up in two pieces ! System.arraycopy (mIndices, 0, new_values, 0, index); ! System.arraycopy (mIndices, index, new_values, index + 1, capacity () - index); ! } ! else ! System.arraycopy (mIndices, 0, new_values, 0, capacity ()); ! mIndices = new_values; ! } ! else if (index < size ()) ! // shift up ! System.arraycopy (mIndices, index, mIndices, index + 1, capacity () - (index + 1)); ! mIndices[index] = cursor; ! mCount++; ! } ! ! /** ! * Remove an element from the list. ! * @param index The index of the item to remove. ! */ ! protected void removeElementAt (int index) ! { ! // shift ! System.arraycopy (mIndices, index + 1, mIndices, index, capacity () - (index + 1)); ! mIndices[capacity() - 1] = 0; ! mCount--; ! } ! ! // ! // Sortable interface ! // ! ! /** ! * Returns the first index of the Sortable. ! * @return The index of the first element. ! */ ! public int first () ! { ! return (0); ! } ! ! /** ! * Returns the last index of the Sortable. ! * @return The index of the last element. ! * If this were an array object this would be (object.length - 1). ! */ ! public int last () ! { ! return (mCount - 1); ! } ! ! /** ! * Fetch the object at the given index. ! * @param index The item number to get. ! * @param reuse If this argument is not null, it is an object ! * acquired from a previous fetch that is no longer needed and ! * may be returned as the result if it makes mores sense to alter ! * and return it than to fetch or create a new element. That is, the ! * reuse object is garbage and may be used to avoid allocating a new ! * object if that would normally be the strategy. ! * @return The Ordered object at that index. ! */ ! public Ordered fetch (int index, Ordered reuse) ! { ! Cursor ret; ! ! if (null != reuse) ! { ! ret = (Cursor)reuse; ! ret.mPosition = mIndices[index]; ! ret.mPage = getPage (); // redundant ! } ! else ! ret = new Cursor (getPage (), mIndices[index]); ! ! return (ret); ! } ! ! /** ! * Swaps the elements at the given indicies. ! * @param i One index. ! * @param j The other index. ! */ ! public void swap (int i, int j) ! { ! int temp = mIndices[i]; ! mIndices[i] = mIndices[j]; ! mIndices[j] = temp; ! } ! } --- 1,397 ---- ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML ! // Copyright (C) Dec 31, 2000 Somik Raha ! // ! // This library is free software; you can redistribute it and/or ! // modify it under the terms of the GNU Lesser General Public ! // License as published by the Free Software Foundation; either ! // version 2.1 of the License, or (at your option) any later version. ! // ! // This library is distributed in the hope that it will be useful, ! // but WITHOUT ANY WARRANTY; without even the implied warranty of ! // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ! // Lesser General Public License for more details. ! // ! // You should have received a copy of the GNU Lesser General Public ! // License along with this library; if not, write to the Free Software ! // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ! // ! // For any questions or suggestions, you can write to me at : ! // Email :so...@in... ! // ! // Postal Address : ! // Somik Raha ! // Extreme Programmer & Coach ! // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, ! // CA 94708, USA ! // Website : http://www.industriallogic.com ! // ! // This class was contributed by ! // Derrick Oswald ! // ! ! package org.htmlparser.lexer; ! ! import org.htmlparser.util.sort.Ordered; ! import org.htmlparser.util.sort.Sort; ! import org.htmlparser.util.sort.Sortable; ! ! /** ! * A sorted array of integers which are the positions of end of line characters. ! * Maintains a list of integers which are (the positions of the first ! * characters of each line. ! * To facilitate processing the first element should be maintained at position 0. ! * Facilities to add, remove, search and determine row and column are provided. ! * This class provides similar functionality to a Vector but ! * does not incur the overhead of an <code>Integer</code> object per element. ! */ ! public class PageIndex implements Sortable ! { ! /** ! * Increment for allocations. ! */ ! protected static final int mIncrement = 100; ! ! /** ! * The number of valid elements. ! */ ! protected int mCount; ! ! /** ! * The elements. ! */ ! protected int[] mIndices; ! ! /** ! * The page associated with this index. ! */ ! protected Page mPage; ! ! /** ! * Create an empty index. ! * @param page The page associated with this index. ! */ ! public PageIndex (Page page) ! { ! mPage = page; ! mIndices = new int[mIncrement]; ! mCount = 0; ! } ! ! /** ! * Create an index with the one element given. ! * @param page The page associated with this index. ! * @param cursor The single element for the new index. ! */ ! public PageIndex (Page page, int cursor) ! { ! this (page); ! mIndices[0] = cursor; ! mCount = 1; ! } ! ! /** ! * Create an index with the elements given. ! * @param page The page associated with this index. ! * @param cursors The initial elements of the index. ! * NOTE: The list must be sorted in ascending order. ! */ ! public PageIndex (Page page, int[] cursors) ! { ! mPage = page; ! mIndices = cursors; ! mCount = cursors.length; ! } ! ! /** ! * Get this index's page. ! * @return The page associated with this index. ! */ ! public Page getPage () ! { ! return (mPage); ! } ! ! /** ! * Get the count of elements. ! * @return The number of valid elements. ! */ ! public int size () ! { ! return (mCount); ! } ! ! /** ! * Get the capacity for elements without reallocation. ! * @return The number of spaces for elements. ! */ ! public int capacity () ! { ! return (mIndices.length); ! } ! ! /** ! * Add an element to the list ! * @param cursor The element to add. ! * @return The position at which the element was inserted or ! * the index of the existing element if it is a duplicate. ! */ ! public int add (Cursor cursor) ! { ! int position; ! int ret; ! ! // find where it goes ! ret = Sort.bsearch (this, cursor); ! ! // insert, but not twice ! position = cursor.getPosition (); ! if (!((ret < size ()) && (position == mIndices[ret]))) ! insertElementAt (position, ret); ! ! return (ret); ! } ! ! /** ! * Add an element to the list ! * @param cursor The element to add. ! * @return The position at which the element was inserted or ! * the index of the existing element if it is a duplicate. ! */ ! public int add (int cursor) ! { ! return (add (new Cursor (getPage (), cursor))); ! } ! ! /** ! * Remove an element from the list ! * @param cursor The element to remove. ! */ ! public void remove (Cursor cursor) ! { ! int i; ! ! // find it ! i = Sort.bsearch (this, cursor); ! ! // remove ! if ((i < size ()) && (cursor.getPosition () == mIndices[i])) ! removeElementAt (i); ! } ! ! /** ! * Remove an element from the list ! * @param cursor The element to remove. ! */ ! public void remove (int cursor) ! { ! remove (new Cursor (getPage (), cursor)); ! } ! ! /** ! * Get an element from the list. ! * @param index The index of the element to get. ! * @return The element. ! */ ! public int elementAt (int index) ! { ! return (mIndices[index]); ! } ! ! /** ! * Get the line number for a cursor. ! * @param cursor The character offset into the page. ! * @return The line number the character is in. ! */ ! public int row (Cursor cursor) ! { ! int ret; ! ! ret = Sort.bsearch (this, cursor); ! // handle line transition, the search returns the index if it matches ! // exactly one of the line end positions, so we advance one line if ! // it's equal to the offset at the row index, since that position is ! // actually the beginning of the next line ! if ((ret < mCount) && (cursor.getPosition () == mIndices[ret])) ! ret++; ! ! return (ret); ! } ! ! /** ! * Get the line number for a position. ! * @param cursor The character offset into the page. ! * @return The line number the character is in. ! */ ! public int row (int cursor) ! { ! return (row (new Cursor (getPage (), cursor))); ! } ! ! /** ! * Get the column number for a cursor. ! * @param cursor The character offset into the page. ! * @return The character offset into the line this cursor is on. ! */ ! public int column (Cursor cursor) ! { ! int row; ! int previous; ! ! row = row (cursor); ! if (0 != row) ! previous = this.elementAt (row - 1); ! else ! previous = 0; ! ! return (cursor.getPosition () - previous); ! } ! ! /** ! * Get the column number for a position. ! * @param cursor The character offset into the page. ! * @return The character offset into the line this cursor is on. ! */ ! public int column (int cursor) ! { ! return (column (new Cursor (getPage (), cursor))); ! } ! ! /** ! * Get the elements as an array of int. ! * @return A new array containing the elements, ! * i.e. a snapshot of the index. ! */ ! public int[] get () ! { ! int[] ret = new int[size ()]; ! System.arraycopy (mIndices, 0, ret, 0, size ()); ! ! return (ret); ! } ! ! /** ! * Binary search for the element. ! * @param cursor The element to search for. ! * @return The index at which the element was found or is to be inserted. ! */ ! protected int bsearch (int cursor) ! { ! return (Sort.bsearch (this, new Cursor (getPage (), cursor))); ! } ! ! /** ! * Binary search for the element. ! * @param cursor The element to search for. ! * @param first The index to start at. ! * @param last The index to stop at. ! * @return The index at which the element was found or is to be inserted. ! */ ! protected int bsearch (int cursor, int first, int last) ! { ! return (Sort.bsearch (this, new Cursor (getPage (), cursor), first, last)); ! } ! ! /** ! * Inserts an element into the list. ! * The index must be a value greater than or equal to 0 and less than ! * or equal to the current size of the array. ! * @param cursor The element to insert. ! * @param index The index in the list to insert it at. ! */ ! protected void insertElementAt (int cursor, int index) ! { ! if ((index >= capacity ()) || (size () == capacity ())) ! { // allocate more space ! int new_values[] = new int[Math.max (capacity () + mIncrement, index + 1)]; ! if (index < capacity ()) ! { ! // copy and shift up in two pieces ! System.arraycopy (mIndices, 0, new_values, 0, index); ! System.arraycopy (mIndices, index, new_values, index + 1, capacity () - index); ! } ! else ! System.arraycopy (mIndices, 0, new_values, 0, capacity ()); ! mIndices = new_values; ! } ! else if (index < size ()) ! // shift up ! System.arraycopy (mIndices, index, mIndices, index + 1, capacity () - (index + 1)); ! mIndices[index] = cursor; ! mCount++; ! } ! ! /** ! * Remove an element from the list. ! * @param index The index of the item to remove. ! */ ! protected void removeElementAt (int index) ! { ! // shift ! System.arraycopy (mIndices, index + 1, mIndices, index, capacity () - (index + 1)); ! mIndices[capacity() - 1] = 0; ! mCount--; ! } ! ! // ! // Sortable interface ! // ! ! /** ! * Returns the first index of the Sortable. ! * @return The index of the first element. ! */ ! public int first () ! { ! return (0); ! } ! ! /** ! * Returns the last index of the Sortable. ! * @return The index of the last element. ! * If this were an array object this would be (object.length - 1). ! */ ! public int last () ! { ! return (mCount - 1); ! } ! ! /** ! * Fetch the object at the given index. ! * @param index The item number to get. ! * @param reuse If this argument is not null, it is an object ! * acquired from a previous fetch that is no longer needed and ! * may be returned as the result if it makes mores sense to alter ! * and return it than to fetch or create a new element. That is, the ! * reuse object is garbage and may be used to avoid allocating a new ! * object if that would normally be the strategy. ! * @return The Ordered object at that index. ! */ ! public Ordered fetch (int index, Ordered reuse) ! { ! Cursor ret; ! ! if (null != reuse) ! { ! ret = (Cursor)reuse; ! ret.mPosition = mIndices[index]; ! ret.mPage = getPage (); // redundant ! } ! else ! ret = new Cursor (getPage (), mIndices[index]); ! ! return (ret); ! } ! ! /** ! * Swaps the elements at the given indicies. ! * @param i One index. ! * @param j The other index. ! */ ! public void swap (int i, int j) ! { ! int temp = mIndices[i]; ! mIndices[i] = mIndices[j]; ! mIndices[j] = temp; ! } ! } |
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests In directory sc8-pr-cvs1:/tmp/cvs-serv31228/tests/scannersTests Modified Files: AllTests.java BaseHREFScannerTest.java BodyScannerTest.java BulletListScannerTest.java BulletScannerTest.java CompositeTagScannerTest.java DivScannerTest.java FormScannerTest.java FrameScannerTest.java FrameSetScannerTest.java HeadScannerTest.java HtmlTest.java ImageScannerTest.java InputTagScannerTest.java JspScannerTest.java LabelScannerTest.java LinkScannerTest.java MetaTagScannerTest.java OptionTagScannerTest.java ScriptScannerTest.java SelectTagScannerTest.java SpanScannerTest.java StyleScannerTest.java TableScannerTest.java TagScannerTest.java TextareaTagScannerTest.java TitleScannerTest.java XmlEndTagScanningTest.java Log Message: Change tabs to spaces in all source files. Index: AllTests.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests/AllTests.java,v retrieving revision 1.45 retrieving revision 1.46 diff -C2 -d -r1.45 -r1.46 *** AllTests.java 24 Aug 2003 21:59:43 -0000 1.45 --- AllTests.java 3 Sep 2003 23:36:21 -0000 1.46 *************** *** 54,93 **** { ! public AllTests(String name) { ! super(name); ! } ! ! public static TestSuite suite() { ! TestSuite suite = new TestSuite("Scanner Tests"); ! suite.addTestSuite(TagScannerTest.class); ! suite.addTestSuite(AppletScannerTest.class); ! suite.addTestSuite(ScriptScannerTest.class); ! suite.addTestSuite(ImageScannerTest.class); ! suite.addTestSuite(LinkScannerTest.class); ! suite.addTestSuite(StyleScannerTest.class); ! suite.addTestSuite(MetaTagScannerTest.class); ! suite.addTestSuite(TitleScannerTest.class); ! suite.addTestSuite(FormScannerTest.class); ! suite.addTestSuite(FrameScannerTest.class); ! suite.addTestSuite(FrameSetScannerTest.class); ! suite.addTestSuite(InputTagScannerTest.class); ! suite.addTestSuite(OptionTagScannerTest.class); ! suite.addTestSuite(SelectTagScannerTest.class); ! suite.addTestSuite(TextareaTagScannerTest.class); ! suite.addTestSuite(BaseHREFScannerTest.class); ! suite.addTestSuite(JspScannerTest.class); ! suite.addTestSuite(TableScannerTest.class); ! suite.addTestSuite(SpanScannerTest.class); ! suite.addTestSuite(DivScannerTest.class); ! suite.addTestSuite(LabelScannerTest.class); ! suite.addTestSuite(BodyScannerTest.class); ! suite.addTestSuite(CompositeTagScannerTest.class); ! suite.addTestSuite(HeadScannerTest.class); ! suite.addTestSuite(BulletListScannerTest.class); ! suite.addTestSuite(BulletScannerTest.class); ! suite.addTestSuite(HtmlTest.class); ! suite.addTestSuite(XmlEndTagScanningTest.class); ! return suite; ! } /** --- 54,93 ---- { ! public AllTests(String name) { ! super(name); ! } ! ! public static TestSuite suite() { ! TestSuite suite = new TestSuite("Scanner Tests"); ! suite.addTestSuite(TagScannerTest.class); ! suite.addTestSuite(AppletScannerTest.class); ! suite.addTestSuite(ScriptScannerTest.class); ! suite.addTestSuite(ImageScannerTest.class); ! suite.addTestSuite(LinkScannerTest.class); ! suite.addTestSuite(StyleScannerTest.class); ! suite.addTestSuite(MetaTagScannerTest.class); ! suite.addTestSuite(TitleScannerTest.class); ! suite.addTestSuite(FormScannerTest.class); ! suite.addTestSuite(FrameScannerTest.class); ! suite.addTestSuite(FrameSetScannerTest.class); ! suite.addTestSuite(InputTagScannerTest.class); ! suite.addTestSuite(OptionTagScannerTest.class); ! suite.addTestSuite(SelectTagScannerTest.class); ! suite.addTestSuite(TextareaTagScannerTest.class); ! suite.addTestSuite(BaseHREFScannerTest.class); ! suite.addTestSuite(JspScannerTest.class); ! suite.addTestSuite(TableScannerTest.class); ! suite.addTestSuite(SpanScannerTest.class); ! suite.addTestSuite(DivScannerTest.class); ! suite.addTestSuite(LabelScannerTest.class); ! suite.addTestSuite(BodyScannerTest.class); ! suite.addTestSuite(CompositeTagScannerTest.class); ! suite.addTestSuite(HeadScannerTest.class); ! suite.addTestSuite(BulletListScannerTest.class); ! suite.addTestSuite(BulletScannerTest.class); ! suite.addTestSuite(HtmlTest.class); ! suite.addTestSuite(XmlEndTagScanningTest.class); ! return suite; ! } /** *************** *** 102,106 **** * All other options are passed on to the junit framework. */ ! public static void main(String[] args) { String runner; --- 102,106 ---- * All other options are passed on to the junit framework. */ ! public static void main(String[] args) { String runner; *************** *** 152,156 **** + ")"); } ! } } --- 152,156 ---- + ")"); } ! } } Index: BaseHREFScannerTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests/BaseHREFScannerTest.java,v retrieving revision 1.23 retrieving revision 1.24 diff -C2 -d -r1.23 -r1.24 *** BaseHREFScannerTest.java 24 Aug 2003 21:59:43 -0000 1.23 --- BaseHREFScannerTest.java 3 Sep 2003 23:36:21 -0000 1.24 *************** *** 39,80 **** public class BaseHREFScannerTest extends ParserTestCase { ! private BaseHrefScanner scanner; ! public BaseHREFScannerTest(String arg0) { ! super(arg0); ! } ! ! protected void setUp() { ! scanner=new BaseHrefScanner(); ! } ! public void testRemoveLastSlash() { ! String url1 = "http://www.yahoo.com/"; ! String url2 = "http://www.google.com"; ! String modifiedUrl1 = LinkProcessor.removeLastSlash(url1); ! String modifiedUrl2 = LinkProcessor.removeLastSlash(url2); ! assertEquals("Url1","http://www.yahoo.com",modifiedUrl1); ! assertEquals("Url2","http://www.google.com",modifiedUrl2); ! } ! ! public void testEvaluate() { ! String testData1 = "BASE HREF=\"http://www.abc.com/\""; ! assertTrue("Data 1 Should have evaluated true",scanner.evaluate(testData1,null)); ! String testData2 = "Base href=\"http://www.abc.com/\""; ! assertTrue("Data 2 Should have evaluated true",scanner.evaluate(testData2,null)); ! } ! ! public void testScan() throws ParserException{ ! createParser("<html><head><TITLE>test page</TITLE><BASE HREF=\"http://www.abc.com/\"><a href=\"home.cfm\">Home</a>...</html>","http://www.google.com/test/index.html"); ! LinkScanner linkScanner = new LinkScanner("-l"); ! parser.addScanner(linkScanner); ! parser.addScanner(new TitleScanner("-t")); ! parser.addScanner(linkScanner.createBaseHREFScanner("-b")); ! parseAndAssertNodeCount(7); ! //Base href tag should be the 4th tag ! assertTrue(node[3] instanceof BaseHrefTag); ! BaseHrefTag baseRefTag = (BaseHrefTag)node[3]; ! assertEquals("Base HREF Url","http://www.abc.com",baseRefTag.getBaseUrl()); ! } ! } --- 39,80 ---- public class BaseHREFScannerTest extends ParserTestCase { ! private BaseHrefScanner scanner; ! public BaseHREFScannerTest(String arg0) { ! super(arg0); ! } ! ! protected void setUp() { ! scanner=new BaseHrefScanner(); ! } ! public void testRemoveLastSlash() { ! String url1 = "http://www.yahoo.com/"; ! String url2 = "http://www.google.com"; ! String modifiedUrl1 = LinkProcessor.removeLastSlash(url1); ! String modifiedUrl2 = LinkProcessor.removeLastSlash(url2); ! assertEquals("Url1","http://www.yahoo.com",modifiedUrl1); ! assertEquals("Url2","http://www.google.com",modifiedUrl2); ! } ! ! public void testEvaluate() { ! String testData1 = "BASE HREF=\"http://www.abc.com/\""; ! assertTrue("Data 1 Should have evaluated true",scanner.evaluate(testData1,null)); ! String testData2 = "Base href=\"http://www.abc.com/\""; ! assertTrue("Data 2 Should have evaluated true",scanner.evaluate(testData2,null)); ! } ! ! public void testScan() throws ParserException{ ! createParser("<html><head><TITLE>test page</TITLE><BASE HREF=\"http://www.abc.com/\"><a href=\"home.cfm\">Home</a>...</html>","http://www.google.com/test/index.html"); ! LinkScanner linkScanner = new LinkScanner("-l"); ! parser.addScanner(linkScanner); ! parser.addScanner(new TitleScanner("-t")); ! parser.addScanner(linkScanner.createBaseHREFScanner("-b")); ! parseAndAssertNodeCount(7); ! //Base href tag should be the 4th tag ! assertTrue(node[3] instanceof BaseHrefTag); ! BaseHrefTag baseRefTag = (BaseHrefTag)node[3]; ! assertEquals("Base HREF Url","http://www.abc.com",baseRefTag.getBaseUrl()); ! } ! } Index: BodyScannerTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests/BodyScannerTest.java,v retrieving revision 1.9 retrieving revision 1.10 diff -C2 -d -r1.9 -r1.10 *** BodyScannerTest.java 24 Aug 2003 21:59:43 -0000 1.9 --- BodyScannerTest.java 3 Sep 2003 23:36:21 -0000 1.10 *************** *** 39,109 **** public class BodyScannerTest extends ParserTestCase { ! public BodyScannerTest(String name) { ! super(name); ! } ! public void testSimpleBody() throws ParserException { ! createParser("<html><head><title>Test 1</title></head><body>This is a body tag</body></html>"); ! parser.registerScanners(); ! BodyScanner bodyScanner = new BodyScanner("-b"); ! parser.addScanner(bodyScanner); ! parseAndAssertNodeCount(6); ! assertTrue(node[4] instanceof BodyTag); ! // check the body node ! BodyTag bodyTag = (BodyTag) node[4]; ! assertEquals("Body","This is a body tag",bodyTag.getBody()); ! assertEquals("Body","<BODY>This is a body tag</BODY>",bodyTag.toHtml()); ! assertEquals("Body Scanner",bodyScanner,bodyTag.getThisScanner()); ! } ! ! public void testBodywithJsp() throws ParserException { ! createParser("<html><head><title>Test 1</title></head><body><%=BodyValue%></body></html>"); ! parser.registerScanners(); ! BodyScanner bodyScanner = new BodyScanner("-b"); ! parser.addScanner(bodyScanner); ! parseAndAssertNodeCount(6); ! assertTrue(node[4] instanceof BodyTag); ! // check the body node ! BodyTag bodyTag = (BodyTag) node[4]; ! assertStringEquals("Body","<BODY><%=BodyValue%></BODY>",bodyTag.toHtml()); ! assertEquals("Body Scanner",bodyScanner,bodyTag.getThisScanner()); ! } ! ! public void testBodyMixed() throws ParserException { ! createParser("<html><head><title>Test 1</title></head><body>before jsp<%=BodyValue%>after jsp</body></html>"); ! parser.registerScanners(); ! BodyScanner bodyScanner = new BodyScanner("-b"); ! parser.addScanner(bodyScanner); ! parseAndAssertNodeCount(6); ! assertTrue(node[4] instanceof BodyTag); ! // check the body node ! BodyTag bodyTag = (BodyTag) node[4]; ! assertEquals("Body","<BODY>before jsp<%=BodyValue%>after jsp</BODY>",bodyTag.toHtml()); ! assertEquals("Body Scanner",bodyScanner,bodyTag.getThisScanner()); ! } ! ! public void testBodyEnding() throws ParserException { ! createParser("<html><body>before jsp<%=BodyValue%>after jsp</html>"); ! parser.registerScanners(); ! BodyScanner bodyScanner = new BodyScanner("-b"); ! parser.addScanner(bodyScanner); ! parseAndAssertNodeCount(3); ! assertTrue(node[1] instanceof BodyTag); ! // check the body node ! BodyTag bodyTag = (BodyTag) node[1]; ! assertEquals("Body","<BODY>before jsp<%=BodyValue%>after jsp</BODY>",bodyTag.toHtml()); ! assertEquals("Body Scanner",bodyScanner,bodyTag.getThisScanner()); ! } ! ! public static TestSuite suite() ! { ! return new TestSuite(BodyScannerTest.class); ! } ! ! public static void main(String[] args) ! { ! new junit.awtui.TestRunner().start(new String[] {BodyScannerTest.class.getName()}); ! } ! } --- 39,109 ---- public class BodyScannerTest extends ParserTestCase { ! public BodyScannerTest(String name) { ! super(name); ! } ! public void testSimpleBody() throws ParserException { ! createParser("<html><head><title>Test 1</title></head><body>This is a body tag</body></html>"); ! parser.registerScanners(); ! BodyScanner bodyScanner = new BodyScanner("-b"); ! parser.addScanner(bodyScanner); ! parseAndAssertNodeCount(6); ! assertTrue(node[4] instanceof BodyTag); ! // check the body node ! BodyTag bodyTag = (BodyTag) node[4]; ! assertEquals("Body","This is a body tag",bodyTag.getBody()); ! assertEquals("Body","<BODY>This is a body tag</BODY>",bodyTag.toHtml()); ! assertEquals("Body Scanner",bodyScanner,bodyTag.getThisScanner()); ! } ! ! public void testBodywithJsp() throws ParserException { ! createParser("<html><head><title>Test 1</title></head><body><%=BodyValue%></body></html>"); ! parser.registerScanners(); ! BodyScanner bodyScanner = new BodyScanner("-b"); ! parser.addScanner(bodyScanner); ! parseAndAssertNodeCount(6); ! assertTrue(node[4] instanceof BodyTag); ! // check the body node ! BodyTag bodyTag = (BodyTag) node[4]; ! assertStringEquals("Body","<BODY><%=BodyValue%></BODY>",bodyTag.toHtml()); ! assertEquals("Body Scanner",bodyScanner,bodyTag.getThisScanner()); ! } ! ! public void testBodyMixed() throws ParserException { ! createParser("<html><head><title>Test 1</title></head><body>before jsp<%=BodyValue%>after jsp</body></html>"); ! parser.registerScanners(); ! BodyScanner bodyScanner = new BodyScanner("-b"); ! parser.addScanner(bodyScanner); ! parseAndAssertNodeCount(6); ! assertTrue(node[4] instanceof BodyTag); ! // check the body node ! BodyTag bodyTag = (BodyTag) node[4]; ! assertEquals("Body","<BODY>before jsp<%=BodyValue%>after jsp</BODY>",bodyTag.toHtml()); ! assertEquals("Body Scanner",bodyScanner,bodyTag.getThisScanner()); ! } ! ! public void testBodyEnding() throws ParserException { ! createParser("<html><body>before jsp<%=BodyValue%>after jsp</html>"); ! parser.registerScanners(); ! BodyScanner bodyScanner = new BodyScanner("-b"); ! parser.addScanner(bodyScanner); ! parseAndAssertNodeCount(3); ! assertTrue(node[1] instanceof BodyTag); ! // check the body node ! BodyTag bodyTag = (BodyTag) node[1]; ! assertEquals("Body","<BODY>before jsp<%=BodyValue%>after jsp</BODY>",bodyTag.toHtml()); ! assertEquals("Body Scanner",bodyScanner,bodyTag.getThisScanner()); ! } ! ! public static TestSuite suite() ! { ! return new TestSuite(BodyScannerTest.class); ! } ! ! public static void main(String[] args) ! { ! new junit.awtui.TestRunner().start(new String[] {BodyScannerTest.class.getName()}); ! } ! } Index: BulletListScannerTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests/BulletListScannerTest.java,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -d -r1.5 -r1.6 *** BulletListScannerTest.java 24 Aug 2003 21:59:43 -0000 1.5 --- BulletListScannerTest.java 3 Sep 2003 23:36:21 -0000 1.6 *************** *** 46,102 **** public class BulletListScannerTest extends ParserTestCase { ! public BulletListScannerTest(String name) { ! super(name); ! } ! public void testScan() throws ParserException { ! createParser( ! "<ul TYPE=DISC>" + ! "<ul TYPE=\"DISC\"><li>Energy supply\n"+ ! " (Campbell) <A HREF=\"/hansard/37th3rd/h20307p.htm#1646\">1646</A>\n"+ ! " (MacPhail) <A HREF=\"/hansard/37th3rd/h20307p.htm#1646\">1646</A>\n"+ ! "</ul><A NAME=\"calpinecorp\"></A><B>Calpine Corp.</B>\n"+ ! "<ul TYPE=\"DISC\"><li>Power plant projects\n"+ ! " (Neufeld) <A HREF=\"/hansard/37th3rd/h20314p.htm#1985\">1985</A>\n"+ ! "</ul>" + ! "</ul>" ! ); ! parser.registerScanners(); ! parseAndAssertNodeCount(1); ! ! NodeList nestedBulletLists = ! ((CompositeTag)node[0]).searchFor( ! BulletList.class ! ); ! assertEquals( ! "bullets in first list", ! 2, ! nestedBulletLists.size() ! ); ! BulletList firstList = ! (BulletList)nestedBulletLists.elementAt(0); ! Bullet firstBullet = ! (Bullet)firstList.childAt(0); ! Node firstNodeInFirstBullet = ! firstBullet.childAt(0); ! assertType( ! "first child in bullet", ! StringNode.class, ! firstNodeInFirstBullet ! ); ! assertStringEquals( ! "expected text", ! "Energy supply\r\n" + ! " (Campbell) ", ! firstNodeInFirstBullet.toPlainTextString() ! ); ! } ! public void testMissingendtag () throws ParserException { ! createParser ("<li>item 1<li>item 2"); ! parser.registerScanners (); ! parseAndAssertNodeCount (2); assertStringEquals ("item 1 not correct", "item 1", ((Bullet)node[0]).childAt (0).toHtml ()); assertStringEquals ("item 2 not correct", "item 2", ((Bullet)node[1]).childAt (0).toHtml ()); --- 46,102 ---- public class BulletListScannerTest extends ParserTestCase { ! public BulletListScannerTest(String name) { ! super(name); ! } ! public void testScan() throws ParserException { ! createParser( ! "<ul TYPE=DISC>" + ! "<ul TYPE=\"DISC\"><li>Energy supply\n"+ ! " (Campbell) <A HREF=\"/hansard/37th3rd/h20307p.htm#1646\">1646</A>\n"+ ! " (MacPhail) <A HREF=\"/hansard/37th3rd/h20307p.htm#1646\">1646</A>\n"+ ! "</ul><A NAME=\"calpinecorp\"></A><B>Calpine Corp.</B>\n"+ ! "<ul TYPE=\"DISC\"><li>Power plant projects\n"+ ! " (Neufeld) <A HREF=\"/hansard/37th3rd/h20314p.htm#1985\">1985</A>\n"+ ! "</ul>" + ! "</ul>" ! ); ! parser.registerScanners(); ! parseAndAssertNodeCount(1); ! ! NodeList nestedBulletLists = ! ((CompositeTag)node[0]).searchFor( ! BulletList.class ! ); ! assertEquals( ! "bullets in first list", ! 2, ! nestedBulletLists.size() ! ); ! BulletList firstList = ! (BulletList)nestedBulletLists.elementAt(0); ! Bullet firstBullet = ! (Bullet)firstList.childAt(0); ! Node firstNodeInFirstBullet = ! firstBullet.childAt(0); ! assertType( ! "first child in bullet", ! StringNode.class, ! firstNodeInFirstBullet ! ); ! assertStringEquals( ! "expected text", ! "Energy supply\r\n" + ! " (Campbell) ", ! firstNodeInFirstBullet.toPlainTextString() ! ); ! } ! public void testMissingendtag () throws ParserException { ! createParser ("<li>item 1<li>item 2"); ! parser.registerScanners (); ! parseAndAssertNodeCount (2); assertStringEquals ("item 1 not correct", "item 1", ((Bullet)node[0]).childAt (0).toHtml ()); assertStringEquals ("item 2 not correct", "item 2", ((Bullet)node[1]).childAt (0).toHtml ()); Index: BulletScannerTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests/BulletScannerTest.java,v retrieving revision 1.6 retrieving revision 1.7 diff -C2 -d -r1.6 -r1.7 *** BulletScannerTest.java 24 Aug 2003 21:59:43 -0000 1.6 --- BulletScannerTest.java 3 Sep 2003 23:36:21 -0000 1.7 *************** *** 38,100 **** public class BulletScannerTest extends ParserTestCase { ! public BulletScannerTest(String name) { ! super(name); ! } ! public void testBulletFound() throws Exception { ! createParser( ! "<LI><A HREF=\"collapseHierarchy.html\">Collapse Hierarchy</A>\n"+ ! "</LI>" ! ); ! parser.registerScanners(); ! parseAndAssertNodeCount(1); ! assertType("should be a bullet",Bullet.class,node[0]); ! } ! ! ! public void testOutOfMemoryBug() throws ParserException { ! createParser( ! "<html>" + ! "<head>" + ! "<title>Foo</title>" + ! "</head>" + ! "<body>" + ! " <ul>" + ! " <li>" + ! " <a href=\"http://foo.com/c.html\">bibliographies on:" + ! " <ul>" + ! " <li>chironomidae</li>" + ! " </ul>" + ! " </a>" + ! " </li>" + ! " </ul>" + ! "" + ! "</body>" + ! "</html>" ! ); ! parser.registerScanners(); ! for (NodeIterator i = parser.elements();i.hasMoreNodes();) { ! Node node = i.nextNode(); ! System.out.println(node.toHtml()); ! } ! } ! ! public void testNonEndedBullets() throws ParserException { ! createParser( ! "<li>forest practices legislation penalties for non-compliance\n"+ ! " (Kwan) <A HREF=\"/hansard/37th3rd/h21107a.htm#4384\">4384-5</A>\n"+ ! "<li>passenger rail service\n"+ ! " (MacPhail) <A HREF=\"/hansard/37th3rd/h21021p.htm#3904\">3904</A>\n"+ ! "<li>referendum on principles for treaty negotiations\n"+ ! " (MacPhail) <A HREF=\"/hansard/37th3rd/h20313p.htm#1894\">1894</A>\n"+ ! "<li>transportation infrastructure projects\n"+ ! " (MacPhail) <A HREF=\"/hansard/37th3rd/h21022a.htm#3945\">3945-7</A>\n"+ ! "<li>tuition fee freeze" ! ); ! parser.registerScanners(); ! parseAndAssertNodeCount(5); ! for (int i=0;i<nodeCount;i++) { ! assertType("node "+i,Bullet.class,node[i]); ! } ! } } --- 38,100 ---- public class BulletScannerTest extends ParserTestCase { ! public BulletScannerTest(String name) { ! super(name); ! } ! public void testBulletFound() throws Exception { ! createParser( ! "<LI><A HREF=\"collapseHierarchy.html\">Collapse Hierarchy</A>\n"+ ! "</LI>" ! ); ! parser.registerScanners(); ! parseAndAssertNodeCount(1); ! assertType("should be a bullet",Bullet.class,node[0]); ! } ! ! ! public void testOutOfMemoryBug() throws ParserException { ! createParser( ! "<html>" + ! "<head>" + ! "<title>Foo</title>" + ! "</head>" + ! "<body>" + ! " <ul>" + ! " <li>" + ! " <a href=\"http://foo.com/c.html\">bibliographies on:" + ! " <ul>" + ! " <li>chironomidae</li>" + ! " </ul>" + ! " </a>" + ! " </li>" + ! " </ul>" + ! "" + ! "</body>" + ! "</html>" ! ); ! parser.registerScanners(); ! for (NodeIterator i = parser.elements();i.hasMoreNodes();) { ! Node node = i.nextNode(); ! System.out.println(node.toHtml()); ! } ! } ! ! public void testNonEndedBullets() throws ParserException { ! createParser( ! "<li>forest practices legislation penalties for non-compliance\n"+ ! " (Kwan) <A HREF=\"/hansard/37th3rd/h21107a.htm#4384\">4384-5</A>\n"+ ! "<li>passenger rail service\n"+ ! " (MacPhail) <A HREF=\"/hansard/37th3rd/h21021p.htm#3904\">3904</A>\n"+ ! "<li>referendum on principles for treaty negotiations\n"+ ! " (MacPhail) <A HREF=\"/hansard/37th3rd/h20313p.htm#1894\">1894</A>\n"+ ! "<li>transportation infrastructure projects\n"+ ! " (MacPhail) <A HREF=\"/hansard/37th3rd/h21022a.htm#3945\">3945-7</A>\n"+ ! "<li>tuition fee freeze" ! ); ! parser.registerScanners(); ! parseAndAssertNodeCount(5); ! for (int i=0;i<nodeCount;i++) { ! assertType("node "+i,Bullet.class,node[i]); ! } ! } } Index: CompositeTagScannerTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests/CompositeTagScannerTest.java,v retrieving revision 1.35 retrieving revision 1.36 diff -C2 -d -r1.35 -r1.36 *** CompositeTagScannerTest.java 24 Aug 2003 21:59:44 -0000 1.35 --- CompositeTagScannerTest.java 3 Sep 2003 23:36:21 -0000 1.36 *************** *** 41,627 **** public class CompositeTagScannerTest extends ParserTestCase { ! private CompositeTagScanner scanner; ! private String url; ! ! public CompositeTagScannerTest(String name) { ! super(name); ! } ! protected void setUp() { [...1145 lines suppressed...] ! return false; ! } ! ! } ! public static class CustomTag extends CompositeTag { ! public TagData tagData; ! public CustomTag(TagData tagData, CompositeTagData compositeTagData) { ! super(tagData,compositeTagData); ! this.tagData = tagData; ! } ! } ! ! public static class AnotherTag extends CompositeTag { ! public AnotherTag(TagData tagData, CompositeTagData compositeTagData) { ! super(tagData,compositeTagData); ! } ! } } Index: DivScannerTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests/DivScannerTest.java,v retrieving revision 1.29 retrieving revision 1.30 diff -C2 -d -r1.29 -r1.30 *** DivScannerTest.java 24 Aug 2003 21:59:44 -0000 1.29 --- DivScannerTest.java 3 Sep 2003 23:36:21 -0000 1.30 *************** *** 40,71 **** public class DivScannerTest extends ParserTestCase { ! public DivScannerTest(String name) { ! super(name); ! } ! public void testScan() throws ParserException { ! createParser("<table><div align=\"left\">some text</div></table>"); ! parser.registerScanners(); ! parser.addScanner(new TableScanner(parser)); ! parser.addScanner(new DivScanner()); ! parseAndAssertNodeCount(1); ! assertType("node should be table",TableTag.class,node[0]); ! TableTag tableTag = (TableTag)node[0]; ! Div div = (Div)tableTag.searchFor(Div.class).toNodeArray()[0]; ! assertEquals("div contents","some text",div.toPlainTextString()); ! } /** * Test case for bug #735193 Explicit tag type recognition for CompositTags not working. */ ! public void testInputInDiv() throws ParserException ! { ! createParser("<div><INPUT type=\"text\" name=\"X\">Hello</INPUT></div>"); ! parser.addScanner(new DivScanner()); ! parser.addScanner(new InputTagScanner()); ! parseAndAssertNodeCount(1); ! assertType("node should be div",Div.class,node[0]); ! Div div = (Div)node[0]; ! assertType("child not input",InputTag.class,div.getChild (0)); ! } } --- 40,71 ---- public class DivScannerTest extends ParserTestCase { ! public DivScannerTest(String name) { ! super(name); ! } ! public void testScan() throws ParserException { ! createParser("<table><div align=\"left\">some text</div></table>"); ! parser.registerScanners(); ! parser.addScanner(new TableScanner(parser)); ! parser.addScanner(new DivScanner()); ! parseAndAssertNodeCount(1); ! assertType("node should be table",TableTag.class,node[0]); ! TableTag tableTag = (TableTag)node[0]; ! Div div = (Div)tableTag.searchFor(Div.class).toNodeArray()[0]; ! assertEquals("div contents","some text",div.toPlainTextString()); ! } /** * Test case for bug #735193 Explicit tag type recognition for CompositTags not working. */ ! public void testInputInDiv() throws ParserException ! { ! createParser("<div><INPUT type=\"text\" name=\"X\">Hello</INPUT></div>"); ! parser.addScanner(new DivScanner()); ! parser.addScanner(new InputTagScanner()); ! parseAndAssertNodeCount(1); ! assertType("node should be div",Div.class,node[0]); ! Div div = (Div)node[0]; ! assertType("child not input",InputTag.class,div.getChild (0)); ! } } Index: FormScannerTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests/FormScannerTest.java,v retrieving revision 1.29 retrieving revision 1.30 diff -C2 -d -r1.29 -r1.30 *** FormScannerTest.java 24 Aug 2003 21:59:44 -0000 1.29 --- FormScannerTest.java 3 Sep 2003 23:36:21 -0000 1.30 *************** *** 45,299 **** public class FormScannerTest extends ParserTestCase { ! public static final String FORM_HTML = ! "<FORM METHOD=\""+FormTag.POST+"\" ACTION=\"do_login.php\" NAME=\"login_form\" onSubmit=\"return CheckData()\">\n"+ ! "<TR><TD ALIGN=\"center\"> </TD></TR>\n"+ ! "<TR><TD ALIGN=\"center\"><FONT face=\"Arial, verdana\" size=2><b>User Name</b></font></TD></TR>\n"+ ! "<TR><TD ALIGN=\"center\"><INPUT TYPE=\"text\" NAME=\"name\" SIZE=\"20\"></TD></TR>\n"+ ! "<TR><TD ALIGN=\"center\"><FONT face=\"Arial, verdana\" size=2><b>Password</b></font></TD></TR>\n"+ ! "<TR><TD ALIGN=\"center\"><INPUT TYPE=\"password\" NAME=\"passwd\" SIZE=\"20\"></TD></TR>\n"+ ! "<TR><TD ALIGN=\"center\"> </TD></TR>\n"+ ! "<TR><TD ALIGN=\"center\"><INPUT TYPE=\"submit\" NAME=\"submit\" VALUE=\"Login\"></TD></TR>\n"+ ! "<TR><TD ALIGN=\"center\"> </TD></TR>\n"+ ! "<TEXTAREA name=\"Description\" rows=\"15\" cols=\"55\" wrap=\"virtual\" class=\"composef\" tabindex=\"5\">Contents of TextArea</TEXTAREA>\n"+ ! // "<TEXTAREA name=\"AnotherDescription\" rows=\"15\" cols=\"55\" wrap=\"virtual\" class=\"composef\" tabindex=\"5\">\n"+ ! "<INPUT TYPE=\"hidden\" NAME=\"password\" SIZE=\"20\">\n"+ ! "<INPUT TYPE=\"submit\">\n"+ ! "</FORM>"; ! ! public static final String EXPECTED_FORM_HTML_FORMLINE="<FORM ACTION=\"http://www.google.com/test/do_login.php\" NAME=\"login_form\" ONSUBMIT=\"return CheckData()\" METHOD=\""+FormTag.POST+"\">\r\n"; ! public static final String EXPECTED_FORM_HTML_REST_OF_FORM= ! "<TR><TD ALIGN=\"center\"> </TD></TR>\r\n"+ ! "<TR><TD ALIGN=\"center\"><FONT FACE=\"Arial, verdana\" SIZE=\"2\"><B>User Name</B></FONT></TD></TR>\r\n"+ ! "<TR><TD ALIGN=\"center\"><INPUT NAME=\"name\" SIZE=\"20\" TYPE=\"text\"></TD></TR>\r\n"+ ! "<TR><TD ALIGN=\"center\"><FONT FACE=\"Arial, verdana\" SIZE=\"2\"><B>Password</B></FONT></TD></TR>\r\n"+ ! "<TR><TD ALIGN=\"center\"><INPUT NAME=\"passwd\" SIZE=\"20\" TYPE=\"password\"></TD></TR>\r\n"+ ! "<TR><TD ALIGN=\"center\"> </TD></TR>\r\n"+ ! "<TR><TD ALIGN=\"center\"><INPUT VALUE=\"Login\" NAME=\"submit\" TYPE=\"submit\"></TD></TR>\r\n"+ ! "<TR><TD ALIGN=\"center\"> </TD></TR>\r\n"+ ! "<TEXTAREA TABINDEX=\"5\" ROWS=\"15\" COLS=\"55\" CLASS=\"composef\" NAME=\"Description\" WRAP=\"virtual\">Contents of TextArea</TEXTAREA>\r\n"+ ! // "<TEXTAREA TABINDEX=\"5\" ROWS=\"15\" COLS=\"55\" CLASS=\"composef\" NAME=\"AnotherDescription\" WRAP=\"virtual\">\r\n"+ ! "<INPUT NAME=\"password\" SIZE=\"20\" TYPE=\"hidden\">\r\n"+ ! "<INPUT TYPE=\"submit\">\r\n"+ ! "</FORM>"; ! public static final String EXPECTED_FORM_HTML = EXPECTED_FORM_HTML_FORMLINE+EXPECTED_FORM_HTML_REST_OF_FORM; ! ! public FormScannerTest(String name) { ! super(name); ! } ! ! public void testEvaluate() { ! String line1="form method=\"post\" onsubmit=\"return implementsearch()\" name=frmsearch id=form"; ! String line2="FORM method=\"post\" onsubmit=\"return implementsearch()\" name=frmsearch id=form"; ! String line3="Form method=\"post\" onsubmit=\"return implementsearch()\" name=frmsearch id=form"; ! FormScanner formScanner = new FormScanner("",Parser.createParser("")); ! assertTrue("Line 1",formScanner.evaluate(line1,null)); ! assertTrue("Line 2",formScanner.evaluate(line2,null)); ! assertTrue("Line 3",formScanner.evaluate(line3,null)); ! } ! ! public void assertTypeNameSize(String description,String type,String name,String size,InputTag inputTag) { ! assertEquals(description+" type",type,inputTag.getAttribute("TYPE")); ! assertEquals(description+" name",name,inputTag.getAttribute("NAME")); ! assertEquals(description+" size",size,inputTag.getAttribute("SIZE")); ! } ! public void assertTypeNameValue(String description,String type,String name,String value,InputTag inputTag) { ! assertEquals(description+" type",type,inputTag.getAttribute("TYPE")); ! assertEquals(description+" name",name,inputTag.getAttribute("NAME")); ! assertEquals(description+" value",value,inputTag.getAttribute("VALUE")); ! } ! public void testScan() throws ParserException { ! createParser(FORM_HTML,"http://www.google.com/test/index.html"); ! parser.addScanner(new FormScanner("",parser)); ! parseAndAssertNodeCount(1); ! assertTrue("Node 0 should be Form Tag",node[0] instanceof FormTag); ! FormTag formTag = (FormTag)node[0]; ! assertStringEquals("Method",FormTag.POST,formTag.getFormMethod()); ! assertStringEquals("Location","http://www.google.com/test/do_login.php",formTag.getFormLocation()); ! assertStringEquals("Name","login_form",formTag.getFormName()); ! InputTag nameTag = formTag.getInputTag("name"); ! InputTag passwdTag = formTag.getInputTag("passwd"); ! InputTag submitTag = formTag.getInputTag("submit"); ! InputTag dummyTag = formTag.getInputTag("dummy"); ! assertNotNull("Input Name Tag should not be null",nameTag); ! assertNotNull("Input Password Tag should not be null",passwdTag); ! assertNotNull("Input Submit Tag should not be null",submitTag); ! assertNull("Input dummy tag should be null",dummyTag); ! ! assertTypeNameSize("Input Name Tag","text","name","20",nameTag); ! assertTypeNameSize("Input Password Tag","password","passwd","20",passwdTag); ! assertTypeNameValue("Input Submit Tag","submit","submit","Login",submitTag); ! ! TextareaTag textAreaTag = formTag.getTextAreaTag("Description"); ! assertNotNull("Text Area Tag should have been found",textAreaTag); ! assertEquals("Text Area Tag Contents","Contents of TextArea",textAreaTag.getValue()); ! assertNull("Should have been null",formTag.getTextAreaTag("junk")); ! ! assertStringEquals("toHTML",EXPECTED_FORM_HTML,formTag.toHtml()); ! } ! ! public void testScanFormWithNoEnding() throws Exception { ! createParser( ! "<TABLE>\n"+ ! "<FORM METHOD=\"post\" ACTION=\"do_login.php\" NAME=\"login_form\" onSubmit=\"return CheckData()\">\n"+ ! "<TR><TD ALIGN=\"center\"> </TD></TR>\n"+ ! "<TR><TD ALIGN=\"center\"><FONT face=\"Arial, verdana\" size=2><b>User Name</b></font></TD></TR>\n"+ ! "<TR><TD ALIGN=\"center\"><INPUT TYPE=\"text\" NAME=\"name\" SIZE=\"20\"></TD></TR>\n"+ ! "<TR><TD ALIGN=\"center\"><FONT face=\"Arial, verdana\" size=2><b>Password</b></font></TD></TR>\n"+ ! "<TR><TD ALIGN=\"center\"><INPUT TYPE=\"password\" NAME=\"passwd\" SIZE=\"20\"></TD></TR>\n"+ ! "<TR><TD ALIGN=\"center\"> </TD></TR>\n"+ ! "<TR><TD ALIGN=\"center\"><INPUT TYPE=\"submit\" NAME=\"submit\" VALUE=\"Login\"></TD></TR>\n"+ ! "<TR><TD ALIGN=\"center\"> </TD></TR>\n"+ ! "<INPUT TYPE=\"hidden\" NAME=\"password\" SIZE=\"20\">\n"+ ! "</TABLE>","http://www.google.com/test/index.html"); ! ! parser.addScanner(new FormScanner("",parser)); ! ! parseAndAssertNodeCount(2); ! } ! /** ! * Bug reported by Pavan Podila - forms with links are not being parsed ! * Sample html is from google ! */ ! public void testScanFormWithLinks() throws ParserException { ! createParser( ! "<form action=\"/search\" name=f><table cellspacing=0 cellpadding=0><tr><td width=75> "+ ! "</td><td align=center><input type=hidden name=hl value=en><input type=hidden name=ie "+ ! "value=\"UTF-8\"><input type=hidden name=oe value=\"UTF-8\"><input maxLength=256 size=55"+ ! " name=q value=\"\"><br><input type=submit value=\"Google Search\" name=btnG><input type="+ ! "submit value=\"I'm Feeling Lucky\" name=btnI></td><td valign=top nowrap><font size=-2>"+ ! " • <a href=/advanced_search?hl=en>Advanced Search</a><br> •"+ ! " <a href=/preferences?hl=en>Preferences</a><br> • <a href=/"+ ! "language_tools?hl=en>Language Tools</a></font></td></tr></table></form>" ! ); ! ! parser.addScanner(new FormScanner("",parser)); ! parser.addScanner(new LinkScanner()); ! parseAndAssertNodeCount(1); ! assertTrue("Should be a HTMLFormTag",node[0] instanceof FormTag); ! FormTag formTag = (FormTag)node[0]; ! LinkTag [] linkTag = new LinkTag[10]; ! int i = 0; ! for (SimpleNodeIterator e=formTag.children();e.hasMoreNodes();) { ! Node formNode = e.nextNode(); ! if (formNode instanceof LinkTag) { ! linkTag[i++] = (LinkTag)formNode; ! } ! } ! assertEquals("Link Tag Count",3,i); ! assertEquals("First Link Tag Text","Advanced Search",linkTag[0].getLinkText()); ! assertEquals("Second Link Tag Text","Preferences",linkTag[1].getLinkText()); ! assertEquals("Third Link Tag Text","Language Tools",linkTag[2].getLinkText()); ! } ! /** ! * Bug 652674 - forms with comments are not being parsed ! */ ! public void testScanFormWithComments() throws ParserException { ! createParser( ! "<form action=\"/search\" name=f><table cellspacing=0 cellpadding=0><tr><td width=75> "+ ! "</td><td align=center><input type=hidden name=hl value=en><input type=hidden name=ie "+ ! "value=\"UTF-8\"><input type=hidden name=oe value=\"UTF-8\"><!-- Hello World -->"+ ! "<input maxLength=256 size=55"+ ! " name=q value=\"\"><br><input type=submit value=\"Google Search\" name=btnG><input type="+ ! "submit value=\"I'm Feeling Lucky\" name=btnI></td><td valign=top nowrap><font size=-2>"+ ! " • <a href=/advanced_search?hl=en>Advanced Search</a><br> •"+ ! " <a href=/preferences?hl=en>Preferences</a><br> • <a href=/"+ ! "language_tools?hl=en>Language Tools</a></font></td></tr></table></form>" ! ); ! ! parser.addScanner(new FormScanner("",parser)); ! parseAndAssertNodeCount(1); ! assertTrue("Should be a HTMLFormTag",node[0] instanceof FormTag); ! FormTag formTag = (FormTag)node[0]; ! RemarkNode [] remarkNode = new RemarkNode[10]; ! int i = 0; ! for (SimpleNodeIterator e=formTag.children();e.hasMoreNodes();) { ! Node formNode = (Node)e.nextNode(); ! if (formNode instanceof RemarkNode) { ! remarkNode[i++] = (RemarkNode)formNode; ! } ! } ! assertEquals("Remark Node Count",1,i); ! assertEquals("First Remark Node"," Hello World ",remarkNode[0].toPlainTextString()); ! } ! /** ! * Bug 652674 - forms with comments are not being parsed ! */ ! public void testScanFormWithComments2() throws ParserException { ! createParser( ! "<FORM id=\"id\" name=\"name\" action=\"http://some.site/aPage.asp?id=97\" method=\"post\">\n"+ ! " <!--\n"+ ! " Just a Comment\n"+ ! " -->\n"+ ! "</FORM>"); ! parser.registerScanners(); ! parseAndAssertNodeCount(1); ! assertTrue("Should be a HTMLFormTag",node[0] instanceof FormTag); ! FormTag formTag = (FormTag)node[0]; ! RemarkNode [] remarkNode = new RemarkNode[10]; ! int i = 0; ! for (SimpleNodeIterator e=formTag.children();e.hasMoreNodes();) { ! Node formNode = (Node)e.nextNode(); ! if (formNode instanceof RemarkNode) { ! remarkNode[i++] = (RemarkNode)formNode; ! } ! } ! assertEquals("Remark Node Count",1,i); ! } ! ! /** ! * Bug 656870 - a form tag with a previously open link causes infinite loop ! * on encounter ! */ ! public void testScanFormWithPreviousOpenLink() throws ParserException { ! createParser( ! "<A HREF=\"http://www.oygevalt.org/\">Home</A>\n"+ ! "<P>\n"+ ! "And now, the good stuff:\n"+ ! "<P>\n"+ ! "<A HREF=\"http://www.yahoo.com\">Yahoo!\n"+ ! "<FORM ACTION=\".\" METHOD=\"GET\">\n"+ ! "<INPUT TYPE=\"TEXT\">\n"+ ! "<BR>\n"+ ! "<A HREF=\"http://www.helpme.com\">Help</A> " + ! "<INPUT TYPE=\"checkbox\">\n"+ ! "<P>\n"+ ! "<INPUT TYPE=\"SUBMIT\">\n"+ ! "</FORM>" ! ); ! parser.addScanner(new FormScanner("",parser)); ! parser.addScanner(new LinkScanner()); ! parseAndAssertNodeCount(6); ! assertTrue("Fifth Node is a link",node[4] instanceof LinkTag); ! LinkTag linkTag = (LinkTag)node[4]; ! assertEquals("Link Text","Yahoo!\r\n",linkTag.getLinkText()); ! assertEquals("Link URL","http://www.yahoo.com",linkTag.getLink()); ! assertType("Sixth Node",FormTag.class,node[5]); ! } ! /** ! * Bug 713907 reported by Dhaval Udani, erroneous ! * parsing of form tag (even when form scanner is not ! * registered) ! */ ! public void testFormScanningShouldNotHappen() throws Exception { ! String testHTML = ! "<HTML><HEAD><TITLE>Test Form Tag</TITLE></HEAD>" + ! "<BODY><FORM name=\"form0\"><INPUT type=\"text\" name=\"text0\"></FORM>" + ! "</BODY></HTML>"; ! createParser( ! testHTML ! ); ! parser.registerScanners(); ! parser.removeScanner(new FormScanner("",parser)); ! Node [] nodes = ! parser.extractAllNodesThatAre( ! FormTag.class ! ); ! assertEquals( ! "shouldnt have found form tag", ! 0, ! nodes.length ! ); ! } /** --- 45,299 ---- public class FormScannerTest extends ParserTestCase { ! public static final String FORM_HTML = ! "<FORM METHOD=\""+FormTag.POST+"\" ACTION=\"do_login.php\" NAME=\"login_form\" onSubmit=\"return CheckData()\">\n"+ ! "<TR><TD ALIGN=\"center\"> </TD></TR>\n"+ ! "<TR><TD ALIGN=\"center\"><FONT face=\"Arial, verdana\" size=2><b>User Name</b></font></TD></TR>\n"+ ! "<TR><TD ALIGN=\"center\"><INPUT TYPE=\"text\" NAME=\"name\" SIZE=\"20\"></TD></TR>\n"+ ! "<TR><TD ALIGN=\"center\"><FONT face=\"Arial, verdana\" size=2><b>Password</b></font></TD></TR>\n"+ ! "<TR><TD ALIGN=\"center\"><INPUT TYPE=\"password\" NAME=\"passwd\" SIZE=\"20\"></TD></TR>\n"+ ! "<TR><TD ALIGN=\"center\"> </TD></TR>\n"+ ! "<TR><TD ALIGN=\"center\"><INPUT TYPE=\"submit\" NAME=\"submit\" VALUE=\"Login\"></TD></TR>\n"+ ! "<TR><TD ALIGN=\"center\"> </TD></TR>\n"+ ! "<TEXTAREA name=\"Description\" rows=\"15\" cols=\"55\" wrap=\"virtual\" class=\"composef\" tabindex=\"5\">Contents of TextArea</TEXTAREA>\n"+ ! // "<TEXTAREA name=\"AnotherDescription\" rows=\"15\" cols=\"55\" wrap=\"virtual\" class=\"composef\" tabindex=\"5\">\n"+ ! "<INPUT TYPE=\"hidden\" NAME=\"password\" SIZE=\"20\">\n"+ ! "<INPUT TYPE=\"submit\">\n"+ ! "</FORM>"; ! ! public static final String EXPECTED_FORM_HTML_FORMLINE="<FORM ACTION=\"http://www.google.com/test/do_login.php\" NAME=\"login_form\" ONSUBMIT=\"return CheckData()\" METHOD=\""+FormTag.POST+"\">\r\n"; ! public static final String EXPECTED_FORM_HTML_REST_OF_FORM= ! "<TR><TD ALIGN=\"center\"> </TD></TR>\r\n"+ ! "<TR><TD ALIGN=\"center\"><FONT FACE=\"Arial, verdana\" SIZE=\"2\"><B>User Name</B></FONT></TD></TR>\r\n"+ ! "<TR><TD ALIGN=\"center\"><INPUT NAME=\"name\" SIZE=\"20\" TYPE=\"text\"></TD></TR>\r\n"+ ! "<TR><TD ALIGN=\"center\"><FONT FACE=\"Arial, verdana\" SIZE=\"2\"><B>Password</B></FONT></TD></TR>\r\n"+ ! "<TR><TD ALIGN=\"center\"><INPUT NAME=\"passwd\" SIZE=\"20\" TYPE=\"password\"></TD></TR>\r\n"+ ! "<TR><TD ALIGN=\"center\"> </TD></TR>\r\n"+ ! "<TR><TD ALIGN=\"center\"><INPUT VALUE=\"Login\" NAME=\"submit\" TYPE=\"submit\"></TD></TR>\r\n"+ ! "<TR><TD ALIGN=\"center\"> </TD></TR>\r\n"+ ! "<TEXTAREA TABINDEX=\"5\" ROWS=\"15\" COLS=\"55\" CLASS=\"composef\" NAME=\"Description\" WRAP=\"virtual\">Contents of TextArea</TEXTAREA>\r\n"+ ! // "<TEXTAREA TABINDEX=\"5\" ROWS=\"15\" COLS=\"55\" CLASS=\"composef\" NAME=\"AnotherDescription\" WRAP=\"virtual\">\r\n"+ ! "<INPUT NAME=\"password\" SIZE=\"20\" TYPE=\"hidden\">\r\n"+ ! "<INPUT TYPE=\"submit\">\r\n"+ ! "</FORM>"; ! public static final String EXPECTED_FORM_HTML = EXPECTED_FORM_HTML_FORMLINE+EXPECTED_FORM_HTML_REST_OF_FORM; ! ! public FormScannerTest(String name) { ! super(name); ! } ! ! public void testEvaluate() { ! String line1="form method=\"post\" onsubmit=\"return implementsearch()\" name=frmsearch id=form"; ! String line2="FORM method=\"post\" onsubmit=\"return implementsearch()\" name=frmsearch id=form"; ! String line3="Form method=\"post\" onsubmit=\"return implementsearch()\" name=frmsearch id=form"; ! FormScanner formScanner = new FormScanner("",Parser.createParser("")); ! assertTrue("Line 1",formScanner.evaluate(line1,null)); ! assertTrue("Line 2",formScanner.evaluate(line2,null)); ! assertTrue("Line 3",formScanner.evaluate(line3,null)); ! } ! ! public void assertTypeNameSize(String description,String type,String name,String size,InputTag inputTag) { ! assertEquals(description+" type",type,inputTag.getAttribute("TYPE")); ! assertEquals(description+" name",name,inputTag.getAttribute("NAME")); ! assertEquals(description+" size",size,inputTag.getAttribute("SIZE")); ! } ! public void assertTypeNameValue(String description,String type,String name,String value,InputTag inputTag) { ! assertEquals(description+" type",type,inputTag.getAttribute("TYPE")); ! assertEquals(description+" name",name,inputTag.getAttribute("NAME")); ! assertEquals(description+" value",value,inputTag.getAttribute("VALUE")); ! } ! public void testScan() throws ParserException { ! createParser(FORM_HTML,"http://www.google.com/test/index.html"); ! parser.addScanner(new FormScanner("",parser)); ! parseAndAssertNodeCount(1); ! assertTrue("Node 0 should be Form Tag",node[0] instanceof FormTag); ! FormTag formTag = (FormTag)node[0]; ! assertStringEquals("Method",FormTag.POST,formTag.getFormMethod()); ! assertStringEquals("Location","http://www.google.com/test/do_login.php",formTag.getFormLocation()); ! assertStringEquals("Name","login_form",formTag.getFormName()); ! InputTag nameTag = formTag.getInputTag("name"); ! InputTag passwdTag = formTag.getInputTag("passwd"); ! InputTag submitTag = formTag.getInputTag("submit"); ! InputTag dummyTag = formTag.getInputTag("dummy"); ! assertNotNull("Input Name Tag should not be null",nameTag); ! assertNotNull("Input Password Tag should not be null",passwdTag); ! assertNotNull("Input Submit Tag should not be null",submitTag); ! assertNull("Input dummy tag should be null",dummyTag); ! ! assertTypeNameSize("Input Name Tag","text","name","20",nameTag); ! assertTypeNameSize("Input Password Tag","password","passwd","20",passwdTag); ! assertTypeNameValue("Input Submit Tag","submit","submit","Login",submitTag); ! ! TextareaTag textAreaTag = formTag.getTextAreaTag("Description"); ! assertNotNull("Text Area Tag should have been found",textAreaTag); ! assertEquals("Text Area Tag Contents","Contents of TextArea",textAreaTag.getValue()); ! assertNull("Should have been null",formTag.getTextAreaTag("junk")); ! ! assertStringEquals("toHTML",EXPECTED_FORM_HTML,formTag.toHtml()); ! } ! ! public void testScanFormWithNoEnding() throws Exception { ! createParser( ! "<TABLE>\n"+ ! "<FORM METHOD=\"post\" ACTION=\"do_login.php\" NAME=\"login_form\" onSubmit=\"return CheckData()\">\n"+ ! "<TR><TD ALIGN=\"center\"> </TD></TR>\n"+ ! "<TR><TD ALIGN=\"center\"><FONT face=\"Arial, verdana\" size=2><b>User Name</b></font></TD></TR>\n"+ ! "<TR><TD ALIGN=\"center\"><INPUT TYPE=\"text\" NAME=\"name\" SIZE=\"20\"></TD></TR>\n"+ ! "<TR><TD ALIGN=\"center\"><FONT face=\"Arial, verdana\" size=2><b>Password</b></font></TD></TR>\n"+ ! "<TR><TD ALIGN=\"center\"><INPUT TYPE=\"password\" NAME=\"passwd\" SIZE=\"20\"></TD></TR>\n"+ ! "<TR><TD ALIGN=\"center\"> </TD></TR>\n"+ ! "<TR><TD ALIGN=\"center\"><INPUT TYPE=\"submit\" NAME=\"submit\" VALUE=\"Login\"></TD></TR>\n"+ ! "<TR><TD ALIGN=\"center\"> </TD></TR>\n"+ ! "<INPUT TYPE=\"hidden\" NAME=\"password\" SIZE=\"20\">\n"+ ! "</TABLE>","http://www.google.com/test/index.html"); ! ! parser.addScanner(new FormScanner("",parser)); ! ! parseAndAssertNodeCount(2); ! } ! /** ! * Bug reported by Pavan Podila - forms with links are not being parsed ! * Sample html is from google ! */ ! public void testScanFormWithLinks() throws ParserException { ! createParser( ! "<form action=\"/search\" name=f><table cellspacing=0 cellpadding=0><tr><td width=75> "+ ! "</td><td align=center><input type=hidden name=hl value=en><input type=hidden name=ie "+ ! "value=\"UTF-8\"><input type=hidden name=oe value=\"UTF-8\"><input maxLength=256 size=55"+ ! " name=q value=\"\"><br><input type=submit value=\"Google Search\" name=btnG><input type="+ ! "submit value=\"I'm Feeling Lucky\" name=btnI></td><td valign=top nowrap><font size=-2>"+ ! " • <a href=/advanced_search?hl=en>Advanced Search</a><br> •"+ ! " <a href=/preferences?hl=en>Preferences</a><br> • <a href=/"+ ! "language_tools?hl=en>Language Tools</a></font></td></tr></table></form>" ! ); ! ! parser.addScanner(new FormScanner("",parser)); ! parser.addScanner(new LinkScanner()); ! parseAndAssertNodeCount(1); ! assertTrue("Should be a HTMLFormTag",node[0] instanceof FormTag); ! FormTag formTag = (FormTag)node[0]; ! LinkTag [] linkTag = new LinkTag[10]; ! int i = 0; ! for (SimpleNodeIterator e=formTag.children();e.hasMoreNodes();) { ! Node formNode = e.nextNode(); ! if (formNode instanceof LinkTag) { ! linkTag[i++] = (LinkTag)formNode; ! } ! } ! assertEquals("Link Tag Count",3,i); ! assertEquals("First Link Tag Text","Advanced Search",linkTag[0].getLinkText()); ! assertEquals("Second Link Tag Text","Preferences",linkTag[1].getLinkText()); ! assertEquals("Third Link Tag Text","Language Tools",linkTag[2].getLinkText()); ! } ! /** ! * Bug 652674 - forms with comments are not being parsed ! */ ! public void testScanFormWithComments() throws ParserException { ! createParser( ! "<form action=\"/search\" name=f><table cellspacing=0 cellpadding=0><tr><td width=75> "+ ! "</td><td align=center><input type=hidden name=hl value=en><input type=hidden name=ie "+ ! "value=\"UTF-8\"><input type=hidden name=oe value=\"UTF-8\"><!-- Hello World -->"+ ! "<input maxLength=256 size=55"+ ! " name=q value=\"\"><br><input type=submit value=\"Google Search\" name=btnG><input type="+ ! "submit value=\"I'm Feeling Lucky\" name=btnI></td><td valign=top nowrap><font size=-2>"+ ! " • <a href=/advanced_search?hl=en>Advanced Search</a><br> •"+ ! " <a href=/preferences?hl=en>Preferences</a><br> • <a href=/"+ ! "language_tools?hl=en>Language Tools</a></font></td></tr></table></form>" ! ); ! ! parser.addScanner(new FormScanner("",parser)); ! parseAndAssertNodeCount(1); ! assertTrue("Should be a HTMLFormTag",node[0] instanceof FormTag); ! FormTag formTag = (FormTag)node[0]; ! RemarkNode [] remarkNode = new RemarkNode[10]; ! int i = 0; ! for (SimpleNodeIterator e=formTag.children();e.hasMoreNodes();) { ! Node formNode = (Node)e.nextNode(); ! if (formNode instanceof RemarkNode) { ! remarkNode[i++] = (RemarkNode)formNode; ! } ! } ! assertEquals("Remark Node Count",1,i); ! assertEquals("First Remark Node"," Hello World ",remarkNode[0].toPlainTextString()); ! } ! /** ! * Bug 652674 - forms with comments are not being parsed ! */ ! public void testScanFormWithComments2() throws ParserException { ! createParser( ! "<FORM id=\"id\" name=\"name\" action=\"http://some.site/aPage.asp?id=97\" method=\"post\">\n"+ ! " <!--\n"+ ! " Just a Comment\n"+ ! " -->\n"+ ! "</FORM>"); ! parser.registerScanners(); ! parseAndAssertNodeCount(1); ! assertTrue("Should be a HTMLFormTag",node[0] instanceof FormTag); ! FormTag formTag = (FormTag)node[0]; ! RemarkNode [] remarkNode = new RemarkNode[10]; ! int i = 0; ! for (SimpleNodeIterator e=formTag.children();e.hasMoreNodes();) { ! Node formNode = (Node)e.nextNode(); ! if (formNode instanceof RemarkNode) { ! remarkNode[i++] = (RemarkNode)formNode; ! } ! } ! assertEquals("Remark Node Count",1,i); ! } ! ! /** ! * Bug 656870 - a form tag with a previously open link causes infinite loop ! * on encounter ! */ ! public void testScanFormWithPreviousOpenLink() throws ParserException { ! createParser( ! "<A HREF=\"http://www.oygevalt.org/\">Home</A>\n"+ ! "<P>\n"+ ! "And now, the good stuff:\n"+ ! "<P>\n"+ ! "<A HREF=\"http://www.yahoo.com\">Yahoo!\n"+ ! "<FORM ACTION=\".\" METHOD=\"GET\">\n"+ ! "<INPUT TYPE=\"TEXT\">\n"+ ! "<BR>\n"+ ! "<A HREF=\"http://www.helpme.com\">Help</A> " + ! "<INPUT TYPE=\"checkbox\">\n"+ ! "<P>\n"+ ! "<INPUT TYPE=\"SUBMIT\">\n"+ ! "</FORM>" ! ); ! parser.addScanner(new FormScanner("",parser)); ! parser.addScanner(new LinkScanner()); ! parseAndAssertNodeCount(6); ! assertTrue("Fifth Node is a link",node[4] instanceof LinkTag); ! LinkTag linkTag = (LinkTag)node[4]; ! assertEquals("Link Text","Yahoo!\r\n",linkTag.getLinkText()); ! assertEquals("Link URL","http://www.yahoo.com",linkTag.getLink()); ! assertType("Sixth Node",FormTag.class,node[5]); ! } ! /** ! * Bug 713907 reported by Dhaval Udani, erroneous ! * parsing of form tag (even when form scanner is not ! * registered) ! */ ! public void testFormScanningShouldNotHappen() throws Exception { ! String testHTML = ! "<HTML><HEAD><TITLE>Test Form Tag</TITLE></HEAD>" + ! "<BODY><FORM name=\"form0\"><INPUT type=\"text\" name=\"text0\"></FORM>" + ! "</BODY></HTML>"; ! createParser( ! testHTML ! ); ! parser.registerScanners(); ! parser.removeScanner(new FormScanner("",parser)); ! Node [] nodes = ! parser.extractAllNodesThatAre( ! FormTag.class ! ); ! assertEquals( ! "shouldnt have found form tag", ! 0, ! nodes.length ! ); ! } /** *************** *** 301,316 **** * Under Windows this throws a stack overflow exception. */ ! public void testUnclosedOptions () throws ParserException { ! String url = "http://htmlparser.sourceforge.net/test/overflowpage.html"; int i; Node[] nodes; ! ! parser = new Parser(url); parser.registerScanners (); i = 0; nodes = new AbstractNode[50]; ! for (NodeIterator e = parser.elements(); e.hasMoreNodes();) ! nodes[i++] = e.nextNode(); assertEquals ("Expected nodes", 39, i); } --- 301,316 ---- * Under Windows this throws a stack overflow exception. */ ! public void testUnclosedOptions () throws ParserException { ! String url = "http://htmlparser.sourceforge.net/test/overflowpage.html"; int i; Node[] nodes; ! ! parser = new Parser(url); parser.registerScanners (); i = 0; nodes = new AbstractNode[50]; ! for (NodeIterator e = parser.elements(); e.hasMoreNodes();) ! nodes[i++] = e.nextNode(); assertEquals ("Expected nodes", 39, i); } Index: FrameScannerTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests/FrameScannerTest.java,v retrieving revision 1.23 retrieving revision 1.24 diff -C2 -d -r1.23 -r1.24 *** FrameScannerTest.java 24 Aug 2003 21:59:44 -0000 1.23 --- FrameScannerTest.java 3 Sep 2003 23:36:21 -0000 1.24 *************** *** 36,65 **** public class FrameScannerTest extends ParserTestCase { ! public FrameScannerTest(String name) { ! super(name); ! } ! ! public void testScan() throws ParserException { ! createParser( ! "<frameset rows=\"115,*\" frameborder=\"NO\" border=\"0\" framespacing=\"0\">\n"+ ! "<frame name=\"topFrame\" noresize src=\"demo_bc_top.html\" scrolling=\"NO\" frameborder=\"NO\">\n"+ ! "<frame name=\"mainFrame\" src=\"http://www.kizna.com/web_e/\" scrolling=\"AUTO\">\n"+ ! "</frameset>","http://www.google.com/test/index.html"); ! ! parser.addScanner(new FrameScanner("")); ! ! parseAndAssertNodeCount(4); ! ! assertTrue("Node 1 should be Frame Tag",node[1] instanceof FrameTag); ! assertTrue("Node 2 should be Frame Tag",node[2] instanceof FrameTag); ! FrameTag frameTag1 = (FrameTag)node[1]; ! FrameTag frameTag2 = (FrameTag)node[2]; ! assertEquals("Frame 1 Locn","http://www.google.com/test/demo_bc_top.html",frameTag1.getFrameLocation()); ! assertEquals("Frame 1 Name","topFrame",frameTag1.getFrameName()); ! assertEquals("Frame 2 Locn","http://www.kizna.com/web_e/",frameTag2.getFrameLocation()); ! assertEquals("Frame 2 Name","mainFrame",frameTag2.getFrameName()); ! assertEquals("Frame 1 Scrolling","NO",frameTag1.getAttribute("scrolling")); ! assertEquals("Frame 1 Border","NO",frameTag1.getAttribute("frameborder")); ! } } --- 36,65 ---- public class FrameScannerTest extends ParserTestCase { ! public FrameScannerTest(String name) { ! super(name); ! } ! ! public void testScan() throws ParserException { ! createParser( ! "<frameset rows=\"115,*\" frameborder=\"NO\" border=\"0\" framespacing=\"0\">\n"+ ! "<frame name=\"topFrame\" noresize src=\"demo_bc_top.html\" scrolling=\"NO\" frameborder=\"NO\">\n"+ ! "<frame name=\"mainFrame\" src=\"http://www.kizna.com/web_e/\" scrolling=\"AUTO\">\n"+ ! "</frameset>","http://www.google.com/test/index.html"); ! ! parser.addScanner(new FrameScanner("")); ! ! parseAndAssertNodeCount(4); ! ! assertTrue("Node 1 should be Frame Tag",node[1] instanceof FrameTag); ! assertTrue("Node 2 should be Frame Tag",node[2] instanceof FrameTag); ! FrameTag frameTag1 = (FrameTag)node[1]; ! FrameTag frameTag2 = (FrameTag)node[2]; ! assertEquals("Frame 1 Locn","http://www.google.com/test/demo_bc_top.html",frameTag1.getFrameLocation()); ! assertEquals("Frame 1 Name","topFrame",frameTag1.getFrameName()); ! assertEquals("Frame 2 Locn","http://www.kizna.com/web_e/",frameTag2.getFrameLocation()); ! assertEquals("Frame 2 Name","mainFrame",frameTag2.getFrameName()); ! assertEquals("Frame 1 Scrolling","NO",frameTag1.getAttribute("scrolling")); ! assertEquals("Frame 1 Border","NO",frameTag1.... [truncated message content] |
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests In directory sc8-pr-cvs1:/tmp/cvs-serv31228/tests/tagTests Modified Files: AllTests.java BaseHrefTagTest.java BodyTagTest.java CompositeTagTest.java DoctypeTagTest.java EndTagTest.java FormTagTest.java FrameSetTagTest.java FrameTagTest.java InputTagTest.java MetaTagTest.java ObjectCollectionTest.java OptionTagTest.java ScriptTagTest.java SelectTagTest.java StyleTagTest.java TagTest.java TextareaTagTest.java TitleTagTest.java Log Message: Change tabs to spaces in all source files. Index: AllTests.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/AllTests.java,v retrieving revision 1.40 retrieving revision 1.41 diff -C2 -d -r1.40 -r1.41 *** AllTests.java 24 Aug 2003 21:59:44 -0000 1.40 --- AllTests.java 3 Sep 2003 23:36:21 -0000 1.41 *************** *** 33,66 **** public class AllTests extends junit.framework.TestCase { ! public AllTests(String name) { ! super(name); ! } ! public static TestSuite suite() { ! TestSuite suite = new TestSuite("Tag Tests"); ! suite.addTestSuite(JspTagTest.class); ! suite.addTestSuite(ScriptTagTest.class); ! suite.addTestSuite(ImageTagTest.class); ! suite.addTestSuite(LinkTagTest.class); ! suite.addTestSuite(TagTest.class); ! suite.addTestSuite(TitleTagTest.class); ! suite.addTestSuite(DoctypeTagTest.class); ! suite.addTestSuite(EndTagTest.class); ! suite.addTestSuite(MetaTagTest.class); ! suite.addTestSuite(StyleTagTest.class); ! suite.addTestSuite(AppletTagTest.class); ! suite.addTestSuite(FrameTagTest.class); ! suite.addTestSuite(FrameSetTagTest.class); ! suite.addTestSuite(InputTagTest.class); ! suite.addTestSuite(OptionTagTest.class); ! suite.addTestSuite(SelectTagTest.class); ! suite.addTestSuite(TextareaTagTest.class); ! suite.addTestSuite(FormTagTest.class); ! suite.addTestSuite(BaseHrefTagTest.class); ! suite.addTestSuite(ObjectCollectionTest.class); ! suite.addTestSuite(BodyTagTest.class); ! suite.addTestSuite(CompositeTagTest.class); ! return suite; ! } /** --- 33,66 ---- public class AllTests extends junit.framework.TestCase { ! public AllTests(String name) { ! super(name); ! } ! public static TestSuite suite() { ! TestSuite suite = new TestSuite("Tag Tests"); ! suite.addTestSuite(JspTagTest.class); ! suite.addTestSuite(ScriptTagTest.class); ! suite.addTestSuite(ImageTagTest.class); ! suite.addTestSuite(LinkTagTest.class); ! suite.addTestSuite(TagTest.class); ! suite.addTestSuite(TitleTagTest.class); ! suite.addTestSuite(DoctypeTagTest.class); ! suite.addTestSuite(EndTagTest.class); ! suite.addTestSuite(MetaTagTest.class); ! suite.addTestSuite(StyleTagTest.class); ! suite.addTestSuite(AppletTagTest.class); ! suite.addTestSuite(FrameTagTest.class); ! suite.addTestSuite(FrameSetTagTest.class); ! suite.addTestSuite(InputTagTest.class); ! suite.addTestSuite(OptionTagTest.class); ! suite.addTestSuite(SelectTagTest.class); ! suite.addTestSuite(TextareaTagTest.class); ! suite.addTestSuite(FormTagTest.class); ! suite.addTestSuite(BaseHrefTagTest.class); ! suite.addTestSuite(ObjectCollectionTest.class); ! suite.addTestSuite(BodyTagTest.class); ! suite.addTestSuite(CompositeTagTest.class); ! return suite; ! } /** Index: BaseHrefTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/BaseHrefTagTest.java,v retrieving revision 1.23 retrieving revision 1.24 diff -C2 -d -r1.23 -r1.24 *** BaseHrefTagTest.java 24 Aug 2003 21:59:44 -0000 1.23 --- BaseHrefTagTest.java 3 Sep 2003 23:36:21 -0000 1.24 *************** *** 36,56 **** public class BaseHrefTagTest extends ParserTestCase { ! public BaseHrefTagTest(String name) { ! super(name); ! } ! ! public void testConstruction() { ! BaseHrefTag baseRefTag = new BaseHrefTag(new TagData(0,0,"",""),"http://www.abc.com"); ! assertEquals("Expected Base URL","http://www.abc.com",baseRefTag.getBaseUrl()); ! } ! ! public void testNotHREFBaseTag() throws ParserException { ! createParser("<base target=\"_top\">"); ! parser.registerScanners(); ! parseAndAssertNodeCount(1); ! assertTrue("Should be a base tag but was "+node[0].getClass().getName(),node[0] instanceof BaseHrefTag); ! BaseHrefTag baseTag = (BaseHrefTag)node[0]; ! assertStringEquals("Base Tag HTML","<BASE TARGET=\"_top\">",baseTag.toHtml()); ! } } --- 36,56 ---- public class BaseHrefTagTest extends ParserTestCase { ! public BaseHrefTagTest(String name) { ! super(name); ! } ! ! public void testConstruction() { ! BaseHrefTag baseRefTag = new BaseHrefTag(new TagData(0,0,"",""),"http://www.abc.com"); ! assertEquals("Expected Base URL","http://www.abc.com",baseRefTag.getBaseUrl()); ! } ! ! public void testNotHREFBaseTag() throws ParserException { ! createParser("<base target=\"_top\">"); ! parser.registerScanners(); ! parseAndAssertNodeCount(1); ! assertTrue("Should be a base tag but was "+node[0].getClass().getName(),node[0] instanceof BaseHrefTag); ! BaseHrefTag baseTag = (BaseHrefTag)node[0]; ! assertStringEquals("Base Tag HTML","<BASE TARGET=\"_top\">",baseTag.toHtml()); ! } } Index: BodyTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/BodyTagTest.java,v retrieving revision 1.9 retrieving revision 1.10 diff -C2 -d -r1.9 -r1.10 *** BodyTagTest.java 24 Aug 2003 21:59:44 -0000 1.9 --- BodyTagTest.java 3 Sep 2003 23:36:21 -0000 1.10 *************** *** 40,71 **** public class BodyTagTest extends ParserTestCase { ! private BodyTag bodyTag; ! public BodyTagTest(String name) { ! super(name); ! } ! ! protected void setUp() throws Exception { ! super.setUp(); ! createParser("<html><head><title>body tag test</title></head><body>Yahoo!</body></html>"); ! parser.registerScanners(); ! parser.addScanner(new BodyScanner("-b")); ! parseAndAssertNodeCount(6); ! assertTrue(node[4] instanceof BodyTag); ! bodyTag = (BodyTag) node[4]; ! } ! ! public void testToPlainTextString() throws ParserException { ! // check the label node ! assertEquals("Body","Yahoo!",bodyTag.toPlainTextString()); ! } ! public void testToHTML() throws ParserException { ! assertStringEquals("Raw String","<BODY>Yahoo!</BODY>",bodyTag.toHtml()); ! } ! public void testToString() throws ParserException { ! assertEquals("Body","BODY: Yahoo!",bodyTag.toString()); ! } public void testAttributes () --- 40,71 ---- public class BodyTagTest extends ParserTestCase { ! private BodyTag bodyTag; ! public BodyTagTest(String name) { ! super(name); ! } ! ! protected void setUp() throws Exception { ! super.setUp(); ! createParser("<html><head><title>body tag test</title></head><body>Yahoo!</body></html>"); ! parser.registerScanners(); ! parser.addScanner(new BodyScanner("-b")); ! parseAndAssertNodeCount(6); ! assertTrue(node[4] instanceof BodyTag); ! bodyTag = (BodyTag) node[4]; ! } ! ! public void testToPlainTextString() throws ParserException { ! // check the label node ! assertEquals("Body","Yahoo!",bodyTag.toPlainTextString()); ! } ! public void testToHTML() throws ParserException { ! assertStringEquals("Raw String","<BODY>Yahoo!</BODY>",bodyTag.toHtml()); ! } ! public void testToString() throws ParserException { ! assertEquals("Body","BODY: Yahoo!",bodyTag.toString()); ! } public void testAttributes () *************** *** 101,113 **** } } ! ! public static TestSuite suite() ! { ! return new TestSuite(BodyTagTest.class); ! } ! ! public static void main(String[] args) ! { ! new junit.awtui.TestRunner().start(new String[] {BodyTagTest.class.getName()}); ! } } --- 101,113 ---- } } ! ! public static TestSuite suite() ! { ! return new TestSuite(BodyTagTest.class); ! } ! ! public static void main(String[] args) ! { ! new junit.awtui.TestRunner().start(new String[] {BodyTagTest.class.getName()}); ! } } Index: CompositeTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/CompositeTagTest.java,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -d -r1.5 -r1.6 *** CompositeTagTest.java 24 Aug 2003 21:59:44 -0000 1.5 --- CompositeTagTest.java 3 Sep 2003 23:36:21 -0000 1.6 *************** *** 37,99 **** public class CompositeTagTest extends ParserTestCase { ! public CompositeTagTest(String name) { ! super(name); ! } ! public void testDigupStringNode() throws ParserException { ! createParser( ! "<table>" + ! "<table>" + ! "<tr>" + ! "<td>" + ! "Hello World" + ! "</td>" + ! "</tr>" + ! "</table>" + ! "</table>" ! ); ! parser.registerScanners(); ! parseAndAssertNodeCount(1); ! TableTag tableTag = (TableTag)node[0]; ! StringNode [] stringNode = ! tableTag.digupStringNode("Hello World"); ! ! assertEquals("number of string nodes",1,stringNode.length); ! assertNotNull("should have found string node",stringNode); ! Node parent = stringNode[0].getParent(); ! assertType("should be column",TableColumn.class,parent); ! parent = parent.getParent(); ! assertType("should be row",TableRow.class,parent); ! parent = parent.getParent(); ! assertType("should be table",TableTag.class,parent); ! parent = parent.getParent(); ! assertType("should be table again",TableTag.class,parent); ! assertSame("should be original table",tableTag,parent); ! } ! ! public void testFindPositionOf() throws ParserException { ! createParser( ! "<table>" + ! "<table>" + ! "<tr>" + ! "<td>" + ! "Hi There<a><b>sdsd</b>" + ! "Hello World" + ! "</td>" + ! "</tr>" + ! "</table>" + ! "</table>" ! ); ! parser.registerScanners(); ! parseAndAssertNodeCount(1); ! TableTag tableTag = (TableTag)node[0]; ! StringNode [] stringNode = ! tableTag.digupStringNode("Hello World"); ! ! assertEquals("number of string nodes",1,stringNode.length); ! assertNotNull("should have found string node",stringNode); ! CompositeTag parent = (CompositeTag)stringNode[0].getParent(); ! int pos = parent.findPositionOf(stringNode[0]); ! assertEquals("position",5,pos); ! } } --- 37,99 ---- public class CompositeTagTest extends ParserTestCase { ! public CompositeTagTest(String name) { ! super(name); ! } ! public void testDigupStringNode() throws ParserException { ! createParser( ! "<table>" + ! "<table>" + ! "<tr>" + ! "<td>" + ! "Hello World" + ! "</td>" + ! "</tr>" + ! "</table>" + ! "</table>" ! ); ! parser.registerScanners(); ! parseAndAssertNodeCount(1); ! TableTag tableTag = (TableTag)node[0]; ! StringNode [] stringNode = ! tableTag.digupStringNode("Hello World"); ! ! assertEquals("number of string nodes",1,stringNode.length); ! assertNotNull("should have found string node",stringNode); ! Node parent = stringNode[0].getParent(); ! assertType("should be column",TableColumn.class,parent); ! parent = parent.getParent(); ! assertType("should be row",TableRow.class,parent); ! parent = parent.getParent(); ! assertType("should be table",TableTag.class,parent); ! parent = parent.getParent(); ! assertType("should be table again",TableTag.class,parent); ! assertSame("should be original table",tableTag,parent); ! } ! ! public void testFindPositionOf() throws ParserException { ! createParser( ! "<table>" + ! "<table>" + ! "<tr>" + ! "<td>" + ! "Hi There<a><b>sdsd</b>" + ! "Hello World" + ! "</td>" + ! "</tr>" + ! "</table>" + ! "</table>" ! ); ! parser.registerScanners(); ! parseAndAssertNodeCount(1); ! TableTag tableTag = (TableTag)node[0]; ! StringNode [] stringNode = ! tableTag.digupStringNode("Hello World"); ! ! assertEquals("number of string nodes",1,stringNode.length); ! assertNotNull("should have found string node",stringNode); ! CompositeTag parent = (CompositeTag)stringNode[0].getParent(); ! int pos = parent.findPositionOf(stringNode[0]); ! assertEquals("position",5,pos); ! } } Index: DoctypeTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/DoctypeTagTest.java,v retrieving revision 1.24 retrieving revision 1.25 diff -C2 -d -r1.24 -r1.25 *** DoctypeTagTest.java 24 Aug 2003 21:59:44 -0000 1.24 --- DoctypeTagTest.java 3 Sep 2003 23:36:21 -0000 1.25 *************** *** 35,60 **** public class DoctypeTagTest extends ParserTestCase { ! public DoctypeTagTest(String name) { ! super(name); ! } ! public void testToHTML() throws ParserException { ! String testHTML = new String( ! "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0//EN\">\n"+ ! "<HTML>\n"+ ! "<HEAD>\n"+ ! "<TITLE>Cogs of Chicago</TITLE>\n"+ ! "</HEAD>\n"+ ! "<BODY>\n"+ ! "...\n"+ ! "</BODY>\n"+ ! "</HTML>\n"); ! createParser(testHTML); ! parser.registerScanners(); ! parseAndAssertNodeCount(9); ! // The node should be an HTMLLinkTag ! assertTrue("Node should be a HTMLDoctypeTag",node[0] instanceof DoctypeTag); ! DoctypeTag docTypeTag = (DoctypeTag)node[0]; ! assertStringEquals("toHTML()","<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0//EN\">",docTypeTag.toHtml()); ! } } --- 35,60 ---- public class DoctypeTagTest extends ParserTestCase { ! public DoctypeTagTest(String name) { ! super(name); ! } ! public void testToHTML() throws ParserException { ! String testHTML = new String( ! "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0//EN\">\n"+ ! "<HTML>\n"+ ! "<HEAD>\n"+ ! "<TITLE>Cogs of Chicago</TITLE>\n"+ ! "</HEAD>\n"+ ! "<BODY>\n"+ ! "...\n"+ ! "</BODY>\n"+ ! "</HTML>\n"); ! createParser(testHTML); ! parser.registerScanners(); ! parseAndAssertNodeCount(9); ! // The node should be an HTMLLinkTag ! assertTrue("Node should be a HTMLDoctypeTag",node[0] instanceof DoctypeTag); ! DoctypeTag docTypeTag = (DoctypeTag)node[0]; ! assertStringEquals("toHTML()","<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0//EN\">",docTypeTag.toHtml()); ! } } Index: EndTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/EndTagTest.java,v retrieving revision 1.25 retrieving revision 1.26 diff -C2 -d -r1.25 -r1.26 *** EndTagTest.java 24 Aug 2003 21:59:44 -0000 1.25 --- EndTagTest.java 3 Sep 2003 23:36:21 -0000 1.26 *************** *** 36,61 **** public class EndTagTest extends ParserTestCase { ! public EndTagTest(String name) { ! super(name); ! } ! public void testToHTML() throws ParserException { ! createParser("<HTML></HTML>"); ! // Register the image scanner ! parser.registerScanners(); ! parseAndAssertNodeCount(2); ! // The node should be an HTMLLinkTag ! assertTrue("Node should be a HTMLEndTag",node[1] instanceof EndTag); ! EndTag endTag = (EndTag)node[1]; ! assertEquals("Raw String","</HTML>",endTag.toHtml()); ! } ! ! public void testEndTagFind() { ! String testHtml = ! "<SCRIPT>document.write(d+\".com\")</SCRIPT>"; ! int pos = testHtml.indexOf("</SCRIPT>"); ! EndTag endTag = (EndTag)EndTag.find(testHtml,pos); ! assertEquals("endtag element begin",32,endTag.elementBegin()); ! assertEquals("endtag element end",40,endTag.elementEnd()); ! } } --- 36,61 ---- public class EndTagTest extends ParserTestCase { ! public EndTagTest(String name) { ! super(name); ! } ! public void testToHTML() throws ParserException { ! createParser("<HTML></HTML>"); ! // Register the image scanner ! parser.registerScanners(); ! parseAndAssertNodeCount(2); ! // The node should be an HTMLLinkTag ! assertTrue("Node should be a HTMLEndTag",node[1] instanceof EndTag); ! EndTag endTag = (EndTag)node[1]; ! assertEquals("Raw String","</HTML>",endTag.toHtml()); ! } ! ! public void testEndTagFind() { ! String testHtml = ! "<SCRIPT>document.write(d+\".com\")</SCRIPT>"; ! int pos = testHtml.indexOf("</SCRIPT>"); ! EndTag endTag = (EndTag)EndTag.find(testHtml,pos); ! assertEquals("endtag element begin",32,endTag.elementBegin()); ! assertEquals("endtag element end",40,endTag.elementEnd()); ! } } Index: FormTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/FormTagTest.java,v retrieving revision 1.28 retrieving revision 1.29 diff -C2 -d -r1.28 -r1.29 *** FormTagTest.java 24 Aug 2003 21:59:44 -0000 1.28 --- FormTagTest.java 3 Sep 2003 23:36:21 -0000 1.29 *************** *** 42,145 **** public class FormTagTest extends ParserTestCase { ! public FormTagTest(String name) { ! super(name); ! } ! public void testSetFormLocation() throws ParserException{ ! createParser(FormScannerTest.FORM_HTML); ! parser.registerScanners(); ! parseAndAssertNodeCount(1); ! assertTrue("Node 0 should be Form Tag",node[0] instanceof FormTag); ! FormTag formTag = (FormTag)node[0]; ! formTag.setFormLocation("http://www.yahoo.com/yahoo/do_not_login.jsp"); ! String expectedHTML = "<FORM ACTION=\"http://www.yahoo.com/yahoo/do_not_login.jsp\" NAME=\"login_form\" ONSUBMIT=\"return CheckData()\" METHOD=\""+FormTag.POST+"\">\r\n"+ ! FormScannerTest.EXPECTED_FORM_HTML_REST_OF_FORM; ! assertStringEquals("Raw String",expectedHTML,formTag.toHtml()); ! } ! ! public void testToPlainTextString() throws ParserException { ! createParser(FormScannerTest.FORM_HTML); ! parser.registerScanners(); ! parseAndAssertNodeCount(1); ! assertTrue("Node 0 should be Form Tag",node[0] instanceof FormTag); ! FormTag formTag = (FormTag)node[0]; ! assertStringEquals("Form Tag string representation"," User NamePassword Contents of TextArea",formTag.toPlainTextString()); ! } ! ! public void testSearchFor() throws ParserException { ! createParser(FormScannerTest.FORM_HTML); ! parser.addScanner(new FormScanner(parser)); ! parseAndAssertNodeCount(1); ! assertTrue("Node 0 should be Form Tag",node[0] instanceof FormTag); ! FormTag formTag = (FormTag)node[0]; ! NodeList nodeList = formTag.searchFor("USER NAME"); ! assertEquals("Should have found nodes",1,nodeList.size()); ! ! Node[] nodes = nodeList.toNodeArray(); ! ! assertEquals("Number of nodes found",1,nodes.length); ! assertType("search result node",StringNode.class,nodes[0]); ! StringNode stringNode = (StringNode)nodes[0]; ! assertEquals("Expected contents of string node","User Name",stringNode.getText()); ! } ! public void testSearchForCaseSensitive() throws ParserException { ! createParser(FormScannerTest.FORM_HTML); ! parser.registerScanners(); ! parseAndAssertNodeCount(1); ! assertTrue("Node 0 should be Form Tag",node[0] instanceof FormTag); ! FormTag formTag = (FormTag)node[0]; ! NodeList nodeList = formTag.searchFor("USER NAME",true); ! assertEquals("Should have not found nodes",0,nodeList.size()); ! ! nodeList = formTag.searchFor("User Name",true); ! assertNotNull("Should have not found nodes",nodeList); ! } ! ! public void testSearchByName() throws ParserException { ! createParser(FormScannerTest.FORM_HTML); ! parser.addScanner(new FormScanner(parser)); ! parseAndAssertNodeCount(1); ! assertTrue("Node 0 should be Form Tag",node[0] instanceof FormTag); ! FormTag formTag = (FormTag)node[0]; ! Tag tag= formTag.searchByName("passwd"); ! assertNotNull("Should have found the password node",tag); ! assertType("tag found",InputTag.class,tag); ! } ! ! /** ! * Bug 713907 reported by Dhaval Udani, erroneous ! * attributes being reported. ! */ ! public void testFormRendering() throws Exception { ! String testHTML = ! "<HTML><HEAD><TITLE>Test Form Tag</TITLE></HEAD>" + ! "<BODY><FORM name=\"form0\"><INPUT type=\"text\" name=\"text0\"></FORM>" + ! "</BODY></HTML>"; ! createParser( ! testHTML ! ); ! parser.registerScanners(); ! FormTag formTag = ! (FormTag)(parser.extractAllNodesThatAre( ! FormTag.class ! )[0]); ! assertNotNull("Should have found a form tag",formTag); ! assertStringEquals("name","form0",formTag.getFormName()); ! assertNull("action",formTag.getAttribute("ACTION")); ! assertXmlEquals( ! "html", ! "<FORM NAME=\"form0\">" + "<INPUT TYPE=\"text\" NAME=\"text0\">" + "</FORM>", ! formTag.toHtml() ! ); ! } } --- 42,147 ---- public class FormTagTest extends ParserTestCase { ! public FormTagTest(String name) { ! super(name); ! } ! public void testSetFormLocation() throws ParserException{ ! createParser(FormScannerTest.FORM_HTML); ! parser.registerScanners(); ! parseAndAssertNodeCount(1); ! assertTrue("Node 0 should be Form Tag",node[0] instanceof FormTag); ! FormTag formTag = (FormTag)node[0]; ! formTag.setFormLocation("http://www.yahoo.com/yahoo/do_not_login.jsp"); ! String expectedHTML = "<FORM ACTION=\"http://www.yahoo.com/yahoo/do_not_login.jsp\" NAME=\"login_form\" ONSUBMIT=\"return CheckData()\" METHOD=\""+FormTag.POST+"\">\r\n"+ ! FormScannerTest.EXPECTED_FORM_HTML_REST_OF_FORM; ! assertStringEquals("Raw String",expectedHTML,formTag.toHtml()); ! } ! ! public void testToPlainTextString() throws ParserException { ! createParser(FormScannerTest.FORM_HTML); ! parser.registerScanners(); ! parseAndAssertNodeCount(1); ! assertTrue("Node 0 should be Form Tag",node[0] instanceof FormTag); ! FormTag formTag = (FormTag)node[0]; ! assertStringEquals("Form Tag string representation"," User NamePassword Contents of TextArea",formTag.toPlainTextString()); ! } ! ! public void testSearchFor() throws ParserException { ! createParser(FormScannerTest.FORM_HTML); ! parser.addScanner(new FormScanner(parser)); ! parseAndAssertNodeCount(1); ! assertTrue("Node 0 should be Form Tag",node[0] instanceof FormTag); ! FormTag formTag = (FormTag)node[0]; ! NodeList nodeList = formTag.searchFor("USER NAME"); ! assertEquals("Should have found nodes",1,nodeList.size()); ! ! Node[] nodes = nodeList.toNodeArray(); ! ! assertEquals("Number of nodes found",1,nodes.length); ! assertType("search result node",StringNode.class,nodes[0]); ! StringNode stringNode = (StringNode)nodes[0]; ! assertEquals("Expected contents of string node","User Name",stringNode.getText()); ! } ! public void testSearchForCaseSensitive() throws ParserException { ! createParser(FormScannerTest.FORM_HTML); ! parser.registerScanners(); ! parseAndAssertNodeCount(1); ! assertTrue("Node 0 should be Form Tag",node[0] instanceof FormTag); ! FormTag formTag = (FormTag)node[0]; ! NodeList nodeList = formTag.searchFor("USER NAME",true); ! assertEquals("Should have not found nodes",0,nodeList.size()); ! ! nodeList = formTag.searchFor("User Name",true); ! assertNotNull("Should have not found nodes",nodeList); ! } ! ! public void testSearchByName() throws ParserException { ! createParser(FormScannerTest.FORM_HTML); ! parser.addScanner(new FormScanner(parser)); ! parseAndAssertNodeCount(1); ! assertTrue("Node 0 should be Form Tag",node[0] instanceof FormTag); ! FormTag formTag = (FormTag)node[0]; ! Tag tag= formTag.searchByName("passwd"); ! assertNotNull("Should have found the password node",tag); ! assertType("tag found",InputTag.class,tag); ! } ! ! /** ! * Bug 713907 reported by Dhaval Udani, erroneous ! * attributes being reported. ! */ ! public void testFormRendering() throws Exception { ! String testHTML = ! "<HTML><HEAD><TITLE>Test Form Tag</TITLE></HEAD>" + ! "<BODY><FORM name=\"form0\"><INPUT type=\"text\" name=\"text0\"></FORM>" + ! "</BODY></HTML>"; ! createParser( ! testHTML ! ); ! parser.registerScanners(); ! FormTag formTag = ! (FormTag)(parser.extractAllNodesThatAre( ! FormTag.class ! )[0]); ! assertNotNull("Should have found a form tag",formTag); ! assertStringEquals("name","form0",formTag.getFormName()); ! assertNull("action",formTag.getAttribute("ACTION")); ! assertXmlEquals( ! "html", ! "<FORM NAME=\"form0\">" + ! "<INPUT TYPE=\"text\" NAME=\"text0\">" + ! "</FORM>", ! formTag.toHtml() ! ); ! } } Index: FrameSetTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/FrameSetTagTest.java,v retrieving revision 1.25 retrieving revision 1.26 diff -C2 -d -r1.25 -r1.26 *** FrameSetTagTest.java 24 Aug 2003 21:59:44 -0000 1.25 --- FrameSetTagTest.java 3 Sep 2003 23:36:21 -0000 1.26 *************** *** 37,64 **** public class FrameSetTagTest extends ParserTestCase { ! public FrameSetTagTest(String name) { ! super(name); ! } ! public void testToHTML() throws ParserException{ ! createParser( ! "<frameset rows=\"115,*\" frameborder=\"NO\" border=\"0\" framespacing=\"0\">\n"+ ! "<frame name=\"topFrame\" noresize src=\"demo_bc_top.html\" scrolling=\"NO\" frameborder=\"NO\">\n"+ ! "<frame name=\"mainFrame\" src=\"http://www.kizna.com/web_e/\" scrolling=\"AUTO\">\n"+ ! "</frameset>"); ! parser.addScanner(new FrameSetScanner("")); ! parser.addScanner(new FrameScanner("")); ! ! parseAndAssertNodeCount(1); ! assertTrue("Node 0 should be End Tag",node[0] instanceof FrameSetTag); ! FrameSetTag frameSetTag = (FrameSetTag)node[0]; ! assertStringEquals("HTML Contents", ! "<FRAMESET BORDER=\"0\" ROWS=\"115,*\" FRAMESPACING=\"0\" FRAMEBORDER=\"NO\">\r\n"+ ! "<FRAME SCROLLING=\"NO\" FRAMEBORDER=\"NO\" SRC=\"demo_bc_top.html\" NAME=\"topFrame\" NORESIZE>\r\n"+ ! "<FRAME SCROLLING=\"AUTO\" SRC=\"http://www.kizna.com/web_e/\" NAME=\"mainFrame\">\r\n"+ ! "</FRAMESET>", ! frameSetTag.toHtml()); ! } } --- 37,64 ---- public class FrameSetTagTest extends ParserTestCase { ! public FrameSetTagTest(String name) { ! super(name); ! } ! public void testToHTML() throws ParserException{ ! createParser( ! "<frameset rows=\"115,*\" frameborder=\"NO\" border=\"0\" framespacing=\"0\">\n"+ ! "<frame name=\"topFrame\" noresize src=\"demo_bc_top.html\" scrolling=\"NO\" frameborder=\"NO\">\n"+ ! "<frame name=\"mainFrame\" src=\"http://www.kizna.com/web_e/\" scrolling=\"AUTO\">\n"+ ! "</frameset>"); ! parser.addScanner(new FrameSetScanner("")); ! parser.addScanner(new FrameScanner("")); ! ! parseAndAssertNodeCount(1); ! assertTrue("Node 0 should be End Tag",node[0] instanceof FrameSetTag); ! FrameSetTag frameSetTag = (FrameSetTag)node[0]; ! assertStringEquals("HTML Contents", ! "<FRAMESET BORDER=\"0\" ROWS=\"115,*\" FRAMESPACING=\"0\" FRAMEBORDER=\"NO\">\r\n"+ ! "<FRAME SCROLLING=\"NO\" FRAMEBORDER=\"NO\" SRC=\"demo_bc_top.html\" NAME=\"topFrame\" NORESIZE>\r\n"+ ! "<FRAME SCROLLING=\"AUTO\" SRC=\"http://www.kizna.com/web_e/\" NAME=\"mainFrame\">\r\n"+ ! "</FRAMESET>", ! frameSetTag.toHtml()); ! } } Index: FrameTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/FrameTagTest.java,v retrieving revision 1.25 retrieving revision 1.26 diff -C2 -d -r1.25 -r1.26 *** FrameTagTest.java 24 Aug 2003 21:59:44 -0000 1.25 --- FrameTagTest.java 3 Sep 2003 23:36:22 -0000 1.26 *************** *** 36,61 **** public class FrameTagTest extends ParserTestCase { ! public FrameTagTest(String name) { ! super(name); ! } ! public void testToHTML() throws ParserException { ! createParser( ! "<frameset rows=\"115,*\" frameborder=\"NO\" border=\"0\" framespacing=\"0\">\n"+ ! "<frame name=\"topFrame\" noresize src=\"demo_bc_top.html\" scrolling=\"NO\" frameborder=\"NO\">\n"+ ! "<frame name=\"mainFrame\" src=\"http://www.kizna.com/web_e/\" scrolling=\"AUTO\">\n"+ ! "</frameset>"); ! parser.addScanner(new FrameScanner("")); ! ! parseAndAssertNodeCount(4); ! assertTrue("Node 1 should be Frame Tag",node[1] instanceof FrameTag); ! assertTrue("Node 2 should be Frame Tag",node[2] instanceof FrameTag); ! FrameTag frameTag1 = (FrameTag)node[1]; ! FrameTag frameTag2 = (FrameTag)node[2]; ! ! assertStringEquals("Frame 1 toHTML()","<FRAME SCROLLING=\"NO\" FRAMEBORDER=\"NO\" SRC=\"demo_bc_top.html\" NAME=\"topFrame\" NORESIZE>",frameTag1.toHtml()); ! assertStringEquals("Frame 2 toHTML()","<FRAME SCROLLING=\"AUTO\" SRC=\"http://www.kizna.com/web_e/\" NAME=\"mainFrame\">",frameTag2.toHtml()); ! } } --- 36,61 ---- public class FrameTagTest extends ParserTestCase { ! public FrameTagTest(String name) { ! super(name); ! } ! public void testToHTML() throws ParserException { ! createParser( ! "<frameset rows=\"115,*\" frameborder=\"NO\" border=\"0\" framespacing=\"0\">\n"+ ! "<frame name=\"topFrame\" noresize src=\"demo_bc_top.html\" scrolling=\"NO\" frameborder=\"NO\">\n"+ ! "<frame name=\"mainFrame\" src=\"http://www.kizna.com/web_e/\" scrolling=\"AUTO\">\n"+ ! "</frameset>"); ! parser.addScanner(new FrameScanner("")); ! ! parseAndAssertNodeCount(4); ! assertTrue("Node 1 should be Frame Tag",node[1] instanceof FrameTag); ! assertTrue("Node 2 should be Frame Tag",node[2] instanceof FrameTag); ! FrameTag frameTag1 = (FrameTag)node[1]; ! FrameTag frameTag2 = (FrameTag)node[2]; ! ! assertStringEquals("Frame 1 toHTML()","<FRAME SCROLLING=\"NO\" FRAMEBORDER=\"NO\" SRC=\"demo_bc_top.html\" NAME=\"topFrame\" NORESIZE>",frameTag1.toHtml()); ! assertStringEquals("Frame 2 toHTML()","<FRAME SCROLLING=\"AUTO\" SRC=\"http://www.kizna.com/web_e/\" NAME=\"mainFrame\">",frameTag2.toHtml()); ! } } Index: InputTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/InputTagTest.java,v retrieving revision 1.27 retrieving revision 1.28 diff -C2 -d -r1.27 -r1.28 *** InputTagTest.java 24 Aug 2003 21:59:44 -0000 1.27 --- InputTagTest.java 3 Sep 2003 23:36:22 -0000 1.28 *************** *** 38,93 **** public class InputTagTest extends ParserTestCase { ! private String testHTML = new String("<INPUT type=\"text\" name=\"Google\">"); ! public InputTagTest(String name) ! { ! super(name); ! } ! ! protected void setUp() throws Exception ! { ! super.setUp(); ! createParser(testHTML,"http://www.google.com/test/index.html"); ! parser.addScanner(new InputTagScanner("-i")); ! } ! ! public void testToHTML() throws ParserException ! { ! parseAndAssertNodeCount(1); ! assertTrue("Node 1 should be INPUT Tag",node[0] instanceof InputTag); ! InputTag InputTag; ! InputTag = (InputTag) node[0]; ! assertStringEquals ("HTML String","<INPUT NAME=\"Google\" TYPE=\"text\">",InputTag.toHtml()); ! } ! ! public void testToString() throws ParserException ! { ! parseAndAssertNodeCount(1); ! assertTrue("Node 1 should be INPUT Tag",node[0] instanceof InputTag); ! InputTag InputTag; ! InputTag = (InputTag) node[0]; ! assertEquals("HTML Raw String","INPUT TAG\n--------\nNAME : Google\nTYPE : text\n",InputTag.toString()); ! } ! ! /** ! * Reproduction of bug report 663038 ! * @throws ParserException ! */ ! public void testToHTML2() throws ParserException ! { ! String testHTML = new String("<INPUT type=\"checkbox\" " ! +"name=\"cbCheck\" checked>"); ! createParser(testHTML); ! parser.addScanner(new InputTagScanner("-i")); ! parseAndAssertNodeCount(1); ! assertTrue("Node 1 should be INPUT Tag", ! node[0] instanceof InputTag); ! InputTag InputTag; ! InputTag = (InputTag) node[0]; ! assertStringEquals("HTML String", ! "<INPUT CHECKED NAME=\"cbCheck\" TYPE=\"checkbox\">", ! InputTag.toHtml()); ! } ! } --- 38,93 ---- public class InputTagTest extends ParserTestCase { ! private String testHTML = new String("<INPUT type=\"text\" name=\"Google\">"); ! public InputTagTest(String name) ! { ! super(name); ! } ! ! protected void setUp() throws Exception ! { ! super.setUp(); ! createParser(testHTML,"http://www.google.com/test/index.html"); ! parser.addScanner(new InputTagScanner("-i")); ! } ! ! public void testToHTML() throws ParserException ! { ! parseAndAssertNodeCount(1); ! assertTrue("Node 1 should be INPUT Tag",node[0] instanceof InputTag); ! InputTag InputTag; ! InputTag = (InputTag) node[0]; ! assertStringEquals ("HTML String","<INPUT NAME=\"Google\" TYPE=\"text\">",InputTag.toHtml()); ! } ! ! public void testToString() throws ParserException ! { ! parseAndAssertNodeCount(1); ! assertTrue("Node 1 should be INPUT Tag",node[0] instanceof InputTag); ! InputTag InputTag; ! InputTag = (InputTag) node[0]; ! assertEquals("HTML Raw String","INPUT TAG\n--------\nNAME : Google\nTYPE : text\n",InputTag.toString()); ! } ! ! /** ! * Reproduction of bug report 663038 ! * @throws ParserException ! */ ! public void testToHTML2() throws ParserException ! { ! String testHTML = new String("<INPUT type=\"checkbox\" " ! +"name=\"cbCheck\" checked>"); ! createParser(testHTML); ! parser.addScanner(new InputTagScanner("-i")); ! parseAndAssertNodeCount(1); ! assertTrue("Node 1 should be INPUT Tag", ! node[0] instanceof InputTag); ! InputTag InputTag; ! InputTag = (InputTag) node[0]; ! assertStringEquals("HTML String", ! "<INPUT CHECKED NAME=\"cbCheck\" TYPE=\"checkbox\">", ! InputTag.toHtml()); ! } ! } Index: MetaTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/MetaTagTest.java,v retrieving revision 1.25 retrieving revision 1.26 diff -C2 -d -r1.25 -r1.26 *** MetaTagTest.java 24 Aug 2003 21:59:44 -0000 1.25 --- MetaTagTest.java 3 Sep 2003 23:36:22 -0000 1.26 *************** *** 35,63 **** public class MetaTagTest extends ParserTestCase { ! public MetaTagTest(String name) { ! super(name); ! } ! public void testToHTML() throws ParserException { ! createParser( ! "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0//EN\">\n"+ ! "<html>\n"+ ! "<head><title>SpamCop - Welcome to SpamCop\n"+ ! "</title>\n"+ ! "<META name=\"description\" content=\"Protecting the internet community through technology, not legislation. SpamCop eliminates spam. Automatically file spam reports with the network administrators who can stop spam at the source. Subscribe, and filter your email through powerful statistical analysis before it reaches your inbox.\">\n"+ ! "<META name=\"keywords\" content=\"SpamCop spam cop email filter abuse header headers parse parser utility script net net-abuse filter mail program system trace traceroute dns\">\n"+ ! "<META name=\"language\" content=\"en\">\n"+ ! "<META name=\"owner\" content=\"se...@ad...\">\n"+ ! "<META HTTP-EQUIV=\"content-type\" CONTENT=\"text/html; charset=ISO-8859-1\">"); ! parser.registerScanners(); ! ! parseAndAssertNodeCount(9); ! assertTrue("Node 5 should be META Tag",node[4] instanceof MetaTag); ! MetaTag metaTag; ! metaTag = (MetaTag) node[4]; ! assertStringEquals("Meta Tag 4 Name","description",metaTag.getMetaTagName()); ! assertStringEquals("Meta Tag 4 Contents","Protecting the internet community through technology, not legislation. SpamCop eliminates spam. Automatically file spam reports with the network administrators who can stop spam at the source. Subscribe, and filter your email through powerful statistical analysis before it reaches your inbox.",metaTag.getMetaContent()); ! assertStringEquals("toHTML()","<META CONTENT=\"Protecting the internet community through technology, not legislation. SpamCop eliminates spam. Automatically file spam reports with the network administrators who can stop spam at the source. Subscribe, and filter your email through powerful statistical analysis before it reaches your inbox.\" NAME=\"description\">",metaTag.toHtml()); ! } } --- 35,63 ---- public class MetaTagTest extends ParserTestCase { ! public MetaTagTest(String name) { ! super(name); ! } ! public void testToHTML() throws ParserException { ! createParser( ! "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0//EN\">\n"+ ! "<html>\n"+ ! "<head><title>SpamCop - Welcome to SpamCop\n"+ ! "</title>\n"+ ! "<META name=\"description\" content=\"Protecting the internet community through technology, not legislation. SpamCop eliminates spam. Automatically file spam reports with the network administrators who can stop spam at the source. Subscribe, and filter your email through powerful statistical analysis before it reaches your inbox.\">\n"+ ! "<META name=\"keywords\" content=\"SpamCop spam cop email filter abuse header headers parse parser utility script net net-abuse filter mail program system trace traceroute dns\">\n"+ ! "<META name=\"language\" content=\"en\">\n"+ ! "<META name=\"owner\" content=\"se...@ad...\">\n"+ ! "<META HTTP-EQUIV=\"content-type\" CONTENT=\"text/html; charset=ISO-8859-1\">"); ! parser.registerScanners(); ! ! parseAndAssertNodeCount(9); ! assertTrue("Node 5 should be META Tag",node[4] instanceof MetaTag); ! MetaTag metaTag; ! metaTag = (MetaTag) node[4]; ! assertStringEquals("Meta Tag 4 Name","description",metaTag.getMetaTagName()); ! assertStringEquals("Meta Tag 4 Contents","Protecting the internet community through technology, not legislation. SpamCop eliminates spam. Automatically file spam reports with the network administrators who can stop spam at the source. Subscribe, and filter your email through powerful statistical analysis before it reaches your inbox.",metaTag.getMetaContent()); ! assertStringEquals("toHTML()","<META CONTENT=\"Protecting the internet community through technology, not legislation. SpamCop eliminates spam. Automatically file spam reports with the network administrators who can stop spam at the source. Subscribe, and filter your email through powerful statistical analysis before it reaches your inbox.\" NAME=\"description\">",metaTag.toHtml()); ! } } Index: ObjectCollectionTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/ObjectCollectionTest.java,v retrieving revision 1.9 retrieving revision 1.10 diff -C2 -d -r1.9 -r1.10 *** ObjectCollectionTest.java 24 Aug 2003 21:59:44 -0000 1.9 --- ObjectCollectionTest.java 3 Sep 2003 23:36:22 -0000 1.10 *************** *** 42,112 **** public class ObjectCollectionTest extends ParserTestCase { ! public ObjectCollectionTest(String name) { ! super(name); ! } ! private void assertSpanContent(Node[] spans) { ! assertEquals("number of span objects expected",2,spans.length); ! assertType("span",Span.class,spans[0]); ! assertType("span",Span.class,spans[1]); ! assertStringEquals( ! "span[0] text", ! "The Refactoring Challenge", ! spans[0].toPlainTextString() ! ); ! assertStringEquals( ! "span[1] text", ! "
id: 6", ! spans[1].toPlainTextString() ! ); ! } ! ! public void testSimpleSearch() throws ParserException { ! createParser( ! "<SPAN>The Refactoring Challenge</SPAN>" + ! "<SPAN>
id: 6</SPAN>" ! ); ! parser.registerScanners(); ! parser.addScanner(new SpanScanner()); ! assertSpanContent(parser.extractAllNodesThatAre(Span.class)); ! } ! ! public void testOneLevelNesting() throws ParserException { ! createParser( ! "<DIV>" + ! " <SPAN>The Refactoring Challenge</SPAN>" + ! " <SPAN>
id: 6</SPAN>" + ! "</DIV>" ! ); ! parser.registerScanners(); ! parser.addScanner(new DivScanner()); ! parser.addScanner(new SpanScanner()); ! parseAndAssertNodeCount(1); ! Div div = (Div)node[0]; ! NodeList nodeList = new NodeList(); ! div.collectInto(nodeList,Span.class); ! Node[] spans = nodeList.toNodeArray(); ! assertSpanContent(spans); ! } ! public void testTwoLevelNesting() throws ParserException { ! createParser( ! "<table>" + ! " <DIV>" + ! " <SPAN>The Refactoring Challenge</SPAN>" + ! " <SPAN>
id: 6</SPAN>" + ! " </DIV>" + ! "</table>" ! ); ! parser.registerScanners(); ! parser.addScanner(new DivScanner()); ! parser.addScanner(new SpanScanner()); ! parser.addScanner(new TableScanner(parser)); ! parseAndAssertNodeCount(1); ! TableTag tableTag = (TableTag)node[0]; ! NodeList nodeList = new NodeList(); ! tableTag.collectInto(nodeList,Span.class); ! Node [] spans = nodeList.toNodeArray(); ! assertSpanContent(spans); ! } } --- 42,112 ---- public class ObjectCollectionTest extends ParserTestCase { ! public ObjectCollectionTest(String name) { ! super(name); ! } ! private void assertSpanContent(Node[] spans) { ! assertEquals("number of span objects expected",2,spans.length); ! assertType("span",Span.class,spans[0]); ! assertType("span",Span.class,spans[1]); ! assertStringEquals( ! "span[0] text", ! "The Refactoring Challenge", ! spans[0].toPlainTextString() ! ); ! assertStringEquals( ! "span[1] text", ! "
id: 6", ! spans[1].toPlainTextString() ! ); ! } ! ! public void testSimpleSearch() throws ParserException { ! createParser( ! "<SPAN>The Refactoring Challenge</SPAN>" + ! "<SPAN>
id: 6</SPAN>" ! ); ! parser.registerScanners(); ! parser.addScanner(new SpanScanner()); ! assertSpanContent(parser.extractAllNodesThatAre(Span.class)); ! } ! ! public void testOneLevelNesting() throws ParserException { ! createParser( ! "<DIV>" + ! " <SPAN>The Refactoring Challenge</SPAN>" + ! " <SPAN>
id: 6</SPAN>" + ! "</DIV>" ! ); ! parser.registerScanners(); ! parser.addScanner(new DivScanner()); ! parser.addScanner(new SpanScanner()); ! parseAndAssertNodeCount(1); ! Div div = (Div)node[0]; ! NodeList nodeList = new NodeList(); ! div.collectInto(nodeList,Span.class); ! Node[] spans = nodeList.toNodeArray(); ! assertSpanContent(spans); ! } ! public void testTwoLevelNesting() throws ParserException { ! createParser( ! "<table>" + ! " <DIV>" + ! " <SPAN>The Refactoring Challenge</SPAN>" + ! " <SPAN>
id: 6</SPAN>" + ! " </DIV>" + ! "</table>" ! ); ! parser.registerScanners(); ! parser.addScanner(new DivScanner()); ! parser.addScanner(new SpanScanner()); ! parser.addScanner(new TableScanner(parser)); ! parseAndAssertNodeCount(1); ! TableTag tableTag = (TableTag)node[0]; ! NodeList nodeList = new NodeList(); ! tableTag.collectInto(nodeList,Span.class); ! Node [] spans = nodeList.toNodeArray(); ! assertSpanContent(spans); ! } } Index: OptionTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/OptionTagTest.java,v retrieving revision 1.26 retrieving revision 1.27 diff -C2 -d -r1.26 -r1.27 *** OptionTagTest.java 24 Aug 2003 21:59:44 -0000 1.26 --- OptionTagTest.java 3 Sep 2003 23:36:22 -0000 1.27 *************** *** 41,148 **** public class OptionTagTest extends ParserTestCase { ! private String testHTML = new String( ! "<OPTION value=\"Google Search\">Google</OPTION>" + ! "<OPTION value=\"AltaVista Search\">AltaVista" + ! "<OPTION value=\"Lycos Search\"></OPTION>" + ! "<OPTION>Yahoo!</OPTION>" + ! "<OPTION>\nHotmail</OPTION>" + ! "<OPTION value=\"ICQ Messenger\">" + ! "<OPTION>Mailcity\n</OPTION>"+ ! "<OPTION>\nIndiatimes\n</OPTION>"+ ! "<OPTION>\nRediff\n</OPTION>\n"+ ! "<OPTION>Cricinfo" + ! "<OPTION value=\"Microsoft Passport\">" ! // "<OPTION value=\"AOL\"><SPAN>AOL</SPAN></OPTION>" + ! // "<OPTION value=\"Time Warner\">Time <LABEL>Warner <SPAN>AOL </SPAN>Inc.</LABEL>" ! ); ! ! public OptionTagTest(String name) ! { ! super(name); ! } ! ! protected void setUp() throws Exception { ! super.setUp(); ! createParser(testHTML); ! parser.addScanner(new OptionTagScanner("-option", new Stack ())); ! parseAndAssertNodeCount(11); ! } ! ! public void testToHTML() throws ParserException ! { ! for(int j=0;j<nodeCount;j++) ! { ! //assertTrue("Node " + j + " should be Option Tag",node[j] instanceof OptionTag); ! System.out.println(node[j].getClass().getName()); ! System.out.println(node[j].toHtml()); ! } ! OptionTag OptionTag; ! OptionTag = (OptionTag) node[0]; ! assertStringEquals("HTML String","<OPTION VALUE=\"Google Search\">Google</OPTION>",OptionTag.toHtml()); ! OptionTag = (OptionTag) node[1]; ! assertStringEquals("HTML String","<OPTION VALUE=\"AltaVista Search\">AltaVista</OPTION>",OptionTag.toHtml()); ! OptionTag = (OptionTag) node[2]; ! assertStringEquals("HTML String","<OPTION VALUE=\"Lycos Search\"></OPTION>",OptionTag.toHtml()); ! OptionTag = (OptionTag) node[3]; ! assertStringEquals("HTML String","<OPTION>Yahoo!</OPTION>",OptionTag.toHtml()); ! OptionTag = (OptionTag) node[4]; ! assertStringEquals("HTML String","<OPTION>\r\nHotmail</OPTION>",OptionTag.toHtml()); ! OptionTag = (OptionTag) node[5]; ! assertStringEquals("HTML String","<OPTION VALUE=\"ICQ Messenger\"></OPTION>",OptionTag.toHtml()); ! OptionTag = (OptionTag) node[6]; ! assertStringEquals("HTML String","<OPTION>Mailcity\r\n</OPTION>",OptionTag.toHtml()); ! OptionTag = (OptionTag) node[7]; ! assertStringEquals("HTML String","<OPTION>\r\nIndiatimes\r\n</OPTION>",OptionTag.toHtml()); ! OptionTag = (OptionTag) node[8]; ! assertStringEquals("HTML String","<OPTION>\r\nRediff\r\n</OPTION>",OptionTag.toHtml()); ! OptionTag = (OptionTag) node[9]; ! assertStringEquals("HTML String","<OPTION>Cricinfo</OPTION>",OptionTag.toHtml()); ! OptionTag = (OptionTag) node[10]; ! assertStringEquals("HTML String","<OPTION VALUE=\"Microsoft Passport\"></OPTION>",OptionTag.toHtml()); ! /* OptionTag = (OptionTag) node[11]; ! assertStringEquals("HTML String","<OPTION VALUE=\"AOL\"><SPAN>AOL</SPAN></OPTION>",OptionTag.toHtml()); ! OptionTag = (OptionTag) node[12]; ! assertStringEquals("HTML String","<OPTION value=\"Time Warner\">Time <LABEL>Warner <SPAN>AOL </SPAN>Inc.</LABEL></OPTION>",OptionTag.toHtml()); ! */ } ! ! public void testToString() throws ParserException ! { ! for(int j=0;j<11;j++) ! { ! assertTrue("Node " + j + " should be Option Tag",node[j] instanceof OptionTag); ! } ! OptionTag OptionTag; ! OptionTag = (OptionTag) node[0]; ! assertEquals("HTML Raw String","OPTION VALUE: Google Search TEXT: Google\n",OptionTag.toString()); ! OptionTag = (OptionTag) node[1]; ! assertEquals("HTML Raw String","OPTION VALUE: AltaVista Search TEXT: AltaVista\n",OptionTag.toString()); ! OptionTag = (OptionTag) node[2]; ! assertEquals("HTML Raw String","OPTION VALUE: Lycos Search TEXT: \n",OptionTag.toString()); ! OptionTag = (OptionTag) node[3]; ! assertEquals("HTML Raw String","OPTION VALUE: null TEXT: Yahoo!\n",OptionTag.toString()); ! OptionTag = (OptionTag) node[4]; ! assertEquals("HTML Raw String","OPTION VALUE: null TEXT: Hotmail\n",OptionTag.toString()); ! OptionTag = (OptionTag) node[5]; ! assertEquals("HTML Raw String","OPTION VALUE: ICQ Messenger TEXT: \n",OptionTag.toString()); ! OptionTag = (OptionTag) node[6]; ! assertEquals("HTML Raw String","OPTION VALUE: null TEXT: Mailcity\r\n\n",OptionTag.toString()); ! OptionTag = (OptionTag) node[7]; ! assertEquals("HTML Raw String","OPTION VALUE: null TEXT: Indiatimes\r\n\n",OptionTag.toString()); ! OptionTag = (OptionTag) node[8]; ! assertEquals("HTML Raw String","OPTION VALUE: null TEXT: Rediff\r\n\n",OptionTag.toString()); ! OptionTag = (OptionTag) node[9]; ! assertEquals("HTML Raw String","OPTION VALUE: null TEXT: Cricinfo\n",OptionTag.toString()); ! OptionTag = (OptionTag) node[10]; ! assertEquals("HTML Raw String","OPTION VALUE: Microsoft Passport TEXT: \n",OptionTag.toString()); ! /* OptionTag = (OptionTag) node[11]; ! assertEquals("HTML Raw String","OPTION VALUE: AOL TEXT: AOL\n",OptionTag.toString()); ! OptionTag = (OptionTag) node[12]; ! assertEquals("HTML Raw String","OPTION VALUE: Time Warner TEXT: Time Warner AOL Inc.\n",OptionTag.toString()); ! */ } ! ! public static void main(String[] args) ! { ! new junit.awtui.TestRunner().start(new String[] {OptionTagTest.class.getName()}); ! } ! } --- 41,148 ---- public class OptionTagTest extends ParserTestCase { ! private String testHTML = new String( ! "<OPTION value=\"Google Search\">Google</OPTION>" + ! "<OPTION value=\"AltaVista Search\">AltaVista" + ! "<OPTION value=\"Lycos Search\"></OPTION>" + ! "<OPTION>Yahoo!</OPTION>" + ! "<OPTION>\nHotmail</OPTION>" + ! "<OPTION value=\"ICQ Messenger\">" + ! "<OPTION>Mailcity\n</OPTION>"+ ! "<OPTION>\nIndiatimes\n</OPTION>"+ ! "<OPTION>\nRediff\n</OPTION>\n"+ ! "<OPTION>Cricinfo" + ! "<OPTION value=\"Microsoft Passport\">" ! // "<OPTION value=\"AOL\"><SPAN>AOL</SPAN></OPTION>" + ! // "<OPTION value=\"Time Warner\">Time <LABEL>Warner <SPAN>AOL </SPAN>Inc.</LABEL>" ! ); ! ! public OptionTagTest(String name) ! { ! super(name); ! } ! ! protected void setUp() throws Exception { ! super.setUp(); ! createParser(testHTML); ! parser.addScanner(new OptionTagScanner("-option", new Stack ())); ! parseAndAssertNodeCount(11); ! } ! ! public void testToHTML() throws ParserException ! { ! for(int j=0;j<nodeCount;j++) ! { ! //assertTrue("Node " + j + " should be Option Tag",node[j] instanceof OptionTag); ! System.out.println(node[j].getClass().getName()); ! System.out.println(node[j].toHtml()); ! } ! OptionTag OptionTag; ! OptionTag = (OptionTag) node[0]; ! assertStringEquals("HTML String","<OPTION VALUE=\"Google Search\">Google</OPTION>",OptionTag.toHtml()); ! OptionTag = (OptionTag) node[1]; ! assertStringEquals("HTML String","<OPTION VALUE=\"AltaVista Search\">AltaVista</OPTION>",OptionTag.toHtml()); ! OptionTag = (OptionTag) node[2]; ! assertStringEquals("HTML String","<OPTION VALUE=\"Lycos Search\"></OPTION>",OptionTag.toHtml()); ! OptionTag = (OptionTag) node[3]; ! assertStringEquals("HTML String","<OPTION>Yahoo!</OPTION>",OptionTag.toHtml()); ! OptionTag = (OptionTag) node[4]; ! assertStringEquals("HTML String","<OPTION>\r\nHotmail</OPTION>",OptionTag.toHtml()); ! OptionTag = (OptionTag) node[5]; ! assertStringEquals("HTML String","<OPTION VALUE=\"ICQ Messenger\"></OPTION>",OptionTag.toHtml()); ! OptionTag = (OptionTag) node[6]; ! assertStringEquals("HTML String","<OPTION>Mailcity\r\n</OPTION>",OptionTag.toHtml()); ! OptionTag = (OptionTag) node[7]; ! assertStringEquals("HTML String","<OPTION>\r\nIndiatimes\r\n</OPTION>",OptionTag.toHtml()); ! OptionTag = (OptionTag) node[8]; ! assertStringEquals("HTML String","<OPTION>\r\nRediff\r\n</OPTION>",OptionTag.toHtml()); ! OptionTag = (OptionTag) node[9]; ! assertStringEquals("HTML String","<OPTION>Cricinfo</OPTION>",OptionTag.toHtml()); ! OptionTag = (OptionTag) node[10]; ! assertStringEquals("HTML String","<OPTION VALUE=\"Microsoft Passport\"></OPTION>",OptionTag.toHtml()); ! /* OptionTag = (OptionTag) node[11]; ! assertStringEquals("HTML String","<OPTION VALUE=\"AOL\"><SPAN>AOL</SPAN></OPTION>",OptionTag.toHtml()); ! OptionTag = (OptionTag) node[12]; ! assertStringEquals("HTML String","<OPTION value=\"Time Warner\">Time <LABEL>Warner <SPAN>AOL </SPAN>Inc.</LABEL></OPTION>",OptionTag.toHtml()); ! */ } ! ! public void testToString() throws ParserException ! { ! for(int j=0;j<11;j++) ! { ! assertTrue("Node " + j + " should be Option Tag",node[j] instanceof OptionTag); ! } ! OptionTag OptionTag; ! OptionTag = (OptionTag) node[0]; ! assertEquals("HTML Raw String","OPTION VALUE: Google Search TEXT: Google\n",OptionTag.toString()); ! OptionTag = (OptionTag) node[1]; ! assertEquals("HTML Raw String","OPTION VALUE: AltaVista Search TEXT: AltaVista\n",OptionTag.toString()); ! OptionTag = (OptionTag) node[2]; ! assertEquals("HTML Raw String","OPTION VALUE: Lycos Search TEXT: \n",OptionTag.toString()); ! OptionTag = (OptionTag) node[3]; ! assertEquals("HTML Raw String","OPTION VALUE: null TEXT: Yahoo!\n",OptionTag.toString()); ! OptionTag = (OptionTag) node[4]; ! assertEquals("HTML Raw String","OPTION VALUE: null TEXT: Hotmail\n",OptionTag.toString()); ! OptionTag = (OptionTag) node[5]; ! assertEquals("HTML Raw String","OPTION VALUE: ICQ Messenger TEXT: \n",OptionTag.toString()); ! OptionTag = (OptionTag) node[6]; ! assertEquals("HTML Raw String","OPTION VALUE: null TEXT: Mailcity\r\n\n",OptionTag.toString()); ! OptionTag = (OptionTag) node[7]; ! assertEquals("HTML Raw String","OPTION VALUE: null TEXT: Indiatimes\r\n\n",OptionTag.toString()); ! OptionTag = (OptionTag) node[8]; ! assertEquals("HTML Raw String","OPTION VALUE: null TEXT: Rediff\r\n\n",OptionTag.toString()); ! OptionTag = (OptionTag) node[9]; ! assertEquals("HTML Raw String","OPTION VALUE: null TEXT: Cricinfo\n",OptionTag.toString()); ! OptionTag = (OptionTag) node[10]; ! assertEquals("HTML Raw String","OPTION VALUE: Microsoft Passport TEXT: \n",OptionTag.toString()); ! /* OptionTag = (OptionTag) node[11]; ! assertEquals("HTML Raw String","OPTION VALUE: AOL TEXT: AOL\n",OptionTag.toString()); ! OptionTag = (OptionTag) node[12]; ! assertEquals("HTML Raw String","OPTION VALUE: Time Warner TEXT: Time Warner AOL Inc.\n",OptionTag.toString()); ! */ } ! ! public static void main(String[] args) ! { ! new junit.awtui.TestRunner().start(new String[] {OptionTagTest.class.getName()}); ! } ! } Index: ScriptTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/ScriptTagTest.java,v retrieving revision 1.25 retrieving revision 1.26 diff -C2 -d -r1.25 -r1.26 *** ScriptTagTest.java 24 Aug 2003 21:59:44 -0000 1.25 --- ScriptTagTest.java 3 Sep 2003 23:36:22 -0000 1.26 *************** *** 40,181 **** public class ScriptTagTest extends ParserTestCase{ ! private ScriptScanner scriptScanner; ! public ScriptTagTest(String name) ! { ! super(name); ! } ! protected void setUp() throws Exception ! { ! super.setUp(); ! scriptScanner = new ScriptScanner(); ! } ! public void testCreation() { ! StringNode stringNode = ! new StringNode(new StringBuffer("Script Code"),0,0); ! NodeList childVector = new NodeList(); ! childVector.add(stringNode); ! ScriptTag scriptTag = ! new ScriptTag( ! new TagData(0,10,"Tag Contents","tagline"), ! new CompositeTagData(null,null,childVector) ! ); ! ! assertNotNull("Script Tag object creation",scriptTag); ! assertEquals("Script Tag Begin",0,scriptTag.elementBegin()); ! assertEquals("Script Tag End",10,scriptTag.elementEnd()); ! assertEquals("Script Tag Contents","Tag Contents",scriptTag.getText()); ! assertEquals("Script Tag Code","Script Code",scriptTag.getScriptCode()); ! assertEquals("Script Tag Line","tagline",scriptTag.getTagLine()); ! } ! public void testToHTML() throws ParserException { ! createParser("<SCRIPT>document.write(d+\".com\")</SCRIPT>"); ! // Register the image scanner ! parser.addScanner(new ScriptScanner("-s")); ! ! parseAndAssertNodeCount(1); ! assertTrue("Node should be a script tag",node[0] instanceof ScriptTag); ! // Check the data in the applet tag ! ScriptTag scriptTag = (ScriptTag)node[0]; ! assertEquals("Expected Raw String","<SCRIPT>document.write(d+\".com\")</SCRIPT>",scriptTag.toHtml()); ! } ! ! /** ! * Bug check by Wolfgang Germund 2002-06-02 ! * Upon parsing : ! * <script language="javascript"> ! * if(navigator.appName.indexOf("Netscape") != -1) ! * document.write ('xxx'); ! * else ! * document.write ('yyy'); ! * </script> ! * check toRawString(). ! */ ! public void testToHTMLWG() throws ParserException ! { ! StringBuffer sb1 = new StringBuffer(); ! sb1.append("<body><script language=\"javascript\">\r\n"); ! sb1.append("if(navigator.appName.indexOf(\"Netscape\") != -1)\r\n"); ! sb1.append(" document.write ('xxx');\r\n"); ! sb1.append("else\r\n"); ! sb1.append(" document.write ('yyy');\r\n"); ! sb1.a... [truncated message content] |
From: <der...@us...> - 2003-09-03 23:36:26
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/visitorsTests In directory sc8-pr-cvs1:/tmp/cvs-serv31228/tests/visitorsTests Modified Files: AllTests.java CompositeTagFindingVisitorTest.java HtmlPageTest.java LinkFindingVisitorTest.java NodeVisitorTest.java StringFindingVisitorTest.java TagFindingVisitorTest.java TextExtractingVisitorTest.java UrlModifyingVisitorTest.java Log Message: Change tabs to spaces in all source files. Index: AllTests.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/visitorsTests/AllTests.java,v retrieving revision 1.32 retrieving revision 1.33 diff -C2 -d -r1.32 -r1.33 *** AllTests.java 24 Aug 2003 21:59:44 -0000 1.32 --- AllTests.java 3 Sep 2003 23:36:22 -0000 1.33 *************** *** 33,54 **** public class AllTests extends junit.framework.TestCase { ! public AllTests(String name) { ! super(name); ! } ! ! public static TestSuite suite() { ! TestSuite suite = new TestSuite("Visitor Tests"); suite.addTestSuite(CompositeTagFindingVisitorTest.class); ! suite.addTestSuite(HtmlPageTest.class); ! suite.addTestSuite(LinkFindingVisitorTest.class); ! suite.addTestSuite(NodeVisitorTest.class); ! suite.addTestSuite(StringFindingVisitorTest.class); ! suite.addTestSuite(TagFindingVisitorTest.class); ! suite.addTestSuite(TextExtractingVisitorTest.class); ! suite.addTestSuite(UrlModifyingVisitorTest.class); ! return suite; ! } /** --- 33,54 ---- public class AllTests extends junit.framework.TestCase { ! public AllTests(String name) { ! super(name); ! } ! ! public static TestSuite suite() { ! TestSuite suite = new TestSuite("Visitor Tests"); suite.addTestSuite(CompositeTagFindingVisitorTest.class); ! suite.addTestSuite(HtmlPageTest.class); ! suite.addTestSuite(LinkFindingVisitorTest.class); ! suite.addTestSuite(NodeVisitorTest.class); ! suite.addTestSuite(StringFindingVisitorTest.class); ! suite.addTestSuite(TagFindingVisitorTest.class); ! suite.addTestSuite(TextExtractingVisitorTest.class); ! suite.addTestSuite(UrlModifyingVisitorTest.class); ! return suite; ! } /** Index: CompositeTagFindingVisitorTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/visitorsTests/CompositeTagFindingVisitorTest.java,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -d -r1.5 -r1.6 *** CompositeTagFindingVisitorTest.java 24 Aug 2003 21:59:44 -0000 1.5 --- CompositeTagFindingVisitorTest.java 3 Sep 2003 23:36:22 -0000 1.6 *************** *** 35,44 **** public class CompositeTagFindingVisitorTest extends ParserTestCase { ! public CompositeTagFindingVisitorTest(String name) { ! super(name); ! } ! public void testFindTagsWithinGivenTag() { ! createParser("<html></html>"); ! } } --- 35,44 ---- public class CompositeTagFindingVisitorTest extends ParserTestCase { ! public CompositeTagFindingVisitorTest(String name) { ! super(name); ! } ! public void testFindTagsWithinGivenTag() { ! createParser("<html></html>"); ! } } Index: HtmlPageTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/visitorsTests/HtmlPageTest.java,v retrieving revision 1.8 retrieving revision 1.9 diff -C2 -d -r1.8 -r1.9 *** HtmlPageTest.java 24 Aug 2003 21:59:44 -0000 1.8 --- HtmlPageTest.java 3 Sep 2003 23:36:22 -0000 1.9 *************** *** 40,124 **** public class HtmlPageTest extends ParserTestCase { ! private static final String SIMPLE_PAGE = ! "<html>" + ! "<head>" + ! "<title>Welcome to the HTMLParser website</title>" + ! "</head>" + ! "<body>" + ! "Welcome to HTMLParser" + ! "</body>" + ! "</html>"; ! private static final String PAGE_WITH_TABLE = ! "<html>" + ! "<head>" + ! "<title>Welcome to the HTMLParser website</title>" + ! "</head>" + ! "<body>" + ! "Welcome to HTMLParser" + ! "<table>" + ! "<tr>" + ! "<td>cell 1</td>" + ! "<td>cell 2</td>" + ! "</tr>" + ! "</table>" + ! "</body>" + ! "</html>"; ! ! public HtmlPageTest(String name) { ! super(name); ! } ! public void testCreateSimplePage() throws Exception { ! createParser( ! SIMPLE_PAGE ! ); ! HtmlPage page = new HtmlPage(parser); ! parser.visitAllNodesWith(page); ! assertStringEquals( ! "title", ! "Welcome to the HTMLParser website", ! page.getTitle() ! ); ! NodeList bodyNodes = page.getBody(); ! assertEquals("number of nodes in body",1,bodyNodes.size()); ! Node node = bodyNodes.elementAt(0); ! assertTrue("expected stringNode but was "+node.getClass().getName(), ! node instanceof StringNode ! ); ! assertStringEquals( ! "body contents", ! "Welcome to HTMLParser", ! page.getBody().asString() ! ); ! } ! ! public void testCreatePageWithTables() throws Exception { ! createParser( ! PAGE_WITH_TABLE ! ); ! HtmlPage page = new HtmlPage(parser); ! parser.visitAllNodesWith(page); ! NodeList bodyNodes = page.getBody(); ! assertEquals("number of nodes in body",2,bodyNodes.size()); ! assertXmlEquals( ! "body html", ! "Welcome to HTMLParser" + ! "<table>" + ! "<tr>" + ! " <td>cell 1</td>" + ! " <td>cell 2</td>" + ! "</tr>" + ! "</table>", ! bodyNodes.asHtml() ! ); ! TableTag tables [] = page.getTables(); ! assertEquals("number of tables",1,tables.length); ! assertEquals("number of rows",1,tables[0].getRowCount()); ! TableRow row = tables[0].getRow(0); ! assertEquals("number of columns",2,row.getColumnCount()); ! TableColumn [] col = row.getColumns(); ! assertEquals("column contents","cell 1",col[0].toPlainTextString()); ! assertEquals("column contents","cell 2",col[1].toPlainTextString()); ! } } --- 40,124 ---- public class HtmlPageTest extends ParserTestCase { ! private static final String SIMPLE_PAGE = ! "<html>" + ! "<head>" + ! "<title>Welcome to the HTMLParser website</title>" + ! "</head>" + ! "<body>" + ! "Welcome to HTMLParser" + ! "</body>" + ! "</html>"; ! private static final String PAGE_WITH_TABLE = ! "<html>" + ! "<head>" + ! "<title>Welcome to the HTMLParser website</title>" + ! "</head>" + ! "<body>" + ! "Welcome to HTMLParser" + ! "<table>" + ! "<tr>" + ! "<td>cell 1</td>" + ! "<td>cell 2</td>" + ! "</tr>" + ! "</table>" + ! "</body>" + ! "</html>"; ! ! public HtmlPageTest(String name) { ! super(name); ! } ! public void testCreateSimplePage() throws Exception { ! createParser( ! SIMPLE_PAGE ! ); ! HtmlPage page = new HtmlPage(parser); ! parser.visitAllNodesWith(page); ! assertStringEquals( ! "title", ! "Welcome to the HTMLParser website", ! page.getTitle() ! ); ! NodeList bodyNodes = page.getBody(); ! assertEquals("number of nodes in body",1,bodyNodes.size()); ! Node node = bodyNodes.elementAt(0); ! assertTrue("expected stringNode but was "+node.getClass().getName(), ! node instanceof StringNode ! ); ! assertStringEquals( ! "body contents", ! "Welcome to HTMLParser", ! page.getBody().asString() ! ); ! } ! ! public void testCreatePageWithTables() throws Exception { ! createParser( ! PAGE_WITH_TABLE ! ); ! HtmlPage page = new HtmlPage(parser); ! parser.visitAllNodesWith(page); ! NodeList bodyNodes = page.getBody(); ! assertEquals("number of nodes in body",2,bodyNodes.size()); ! assertXmlEquals( ! "body html", ! "Welcome to HTMLParser" + ! "<table>" + ! "<tr>" + ! " <td>cell 1</td>" + ! " <td>cell 2</td>" + ! "</tr>" + ! "</table>", ! bodyNodes.asHtml() ! ); ! TableTag tables [] = page.getTables(); ! assertEquals("number of tables",1,tables.length); ! assertEquals("number of rows",1,tables[0].getRowCount()); ! TableRow row = tables[0].getRow(0); ! assertEquals("number of columns",2,row.getColumnCount()); ! TableColumn [] col = row.getColumns(); ! assertEquals("column contents","cell 1",col[0].toPlainTextString()); ! assertEquals("column contents","cell 2",col[1].toPlainTextString()); ! } } Index: LinkFindingVisitorTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/visitorsTests/LinkFindingVisitorTest.java,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -d -r1.5 -r1.6 *** LinkFindingVisitorTest.java 24 Aug 2003 21:59:44 -0000 1.5 --- LinkFindingVisitorTest.java 3 Sep 2003 23:36:22 -0000 1.6 *************** *** 33,51 **** public class LinkFindingVisitorTest extends ParserTestCase { ! private String html = ! "<HTML><HEAD><TITLE>This is the Title</TITLE></HEAD><BODY>Hello World, <A href=\"http://www.industriallogic.com\">Industrial Logic</a></BODY></HTML>"; ! public LinkFindingVisitorTest(String name) { ! super(name); ! } ! public void testLinkFoundCorrectly() throws Exception { ! createParser(html); ! parser.registerScanners(); ! LinkFindingVisitor visitor = new LinkFindingVisitor("Industrial Logic"); ! parser.visitAllNodesWith(visitor); ! assertTrue("Found Industrial Logic Link",visitor.linkTextFound()); ! assertEquals("Link Count",1,visitor.getCount()); ! } ! } --- 33,51 ---- public class LinkFindingVisitorTest extends ParserTestCase { ! private String html = ! "<HTML><HEAD><TITLE>This is the Title</TITLE></HEAD><BODY>Hello World, <A href=\"http://www.industriallogic.com\">Industrial Logic</a></BODY></HTML>"; ! public LinkFindingVisitorTest(String name) { ! super(name); ! } ! public void testLinkFoundCorrectly() throws Exception { ! createParser(html); ! parser.registerScanners(); ! LinkFindingVisitor visitor = new LinkFindingVisitor("Industrial Logic"); ! parser.visitAllNodesWith(visitor); ! assertTrue("Found Industrial Logic Link",visitor.linkTextFound()); ! assertEquals("Link Count",1,visitor.getCount()); ! } ! } Index: NodeVisitorTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/visitorsTests/NodeVisitorTest.java,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -d -r1.5 -r1.6 *** NodeVisitorTest.java 24 Aug 2003 21:59:44 -0000 1.5 --- NodeVisitorTest.java 3 Sep 2003 23:36:22 -0000 1.6 *************** *** 39,76 **** public class NodeVisitorTest extends ParserTestCase { ! public NodeVisitorTest(String name) { ! super(name); ! } ! ! public void testVisitTag() throws Exception { ! ParameterVisitor visitor = new ParameterVisitor(); ! createParser( ! "<input>" + ! "<param name='key1'>value1</param>"+ ! "<param name='key2'>value2</param>"+ ! "</input>" ! ); ! parser.visitAllNodesWith(visitor); ! assertEquals("value of key1","value1",visitor.getValue("key1")); ! assertEquals("value of key2","value2",visitor.getValue("key2")); ! } ! ! class ParameterVisitor extends NodeVisitor { ! Map paramsMap = new HashMap(); ! String lastKeyVisited; ! ! public String getValue(String key) { ! return (String)paramsMap.get(key); ! } ! ! public void visitStringNode(StringNode stringNode) { ! paramsMap.put(lastKeyVisited,stringNode.getText()); ! } ! ! public void visitTag(Tag tag) { ! if (tag.getTagName().equals("PARAM")) { ! lastKeyVisited = tag.getAttribute("NAME"); ! } ! } ! } } --- 39,76 ---- public class NodeVisitorTest extends ParserTestCase { ! public NodeVisitorTest(String name) { ! super(name); ! } ! ! public void testVisitTag() throws Exception { ! ParameterVisitor visitor = new ParameterVisitor(); ! createParser( ! "<input>" + ! "<param name='key1'>value1</param>"+ ! "<param name='key2'>value2</param>"+ ! "</input>" ! ); ! parser.visitAllNodesWith(visitor); ! assertEquals("value of key1","value1",visitor.getValue("key1")); ! assertEquals("value of key2","value2",visitor.getValue("key2")); ! } ! ! class ParameterVisitor extends NodeVisitor { ! Map paramsMap = new HashMap(); ! String lastKeyVisited; ! ! public String getValue(String key) { ! return (String)paramsMap.get(key); ! } ! ! public void visitStringNode(StringNode stringNode) { ! paramsMap.put(lastKeyVisited,stringNode.getText()); ! } ! ! public void visitTag(Tag tag) { ! if (tag.getTagName().equals("PARAM")) { ! lastKeyVisited = tag.getAttribute("NAME"); ! } ! } ! } } Index: StringFindingVisitorTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/visitorsTests/StringFindingVisitorTest.java,v retrieving revision 1.7 retrieving revision 1.8 diff -C2 -d -r1.7 -r1.8 *** StringFindingVisitorTest.java 24 Aug 2003 21:59:44 -0000 1.7 --- StringFindingVisitorTest.java 3 Sep 2003 23:36:22 -0000 1.8 *************** *** 33,98 **** public class StringFindingVisitorTest extends ParserTestCase { ! private static final String HTML = ! "<HTML><HEAD><TITLE>This is the Title</TITLE>" + ! "</HEAD><BODY>Hello World, this is an excellent parser</BODY></HTML>"; ! ! private static final String HTML_TO_SEARCH = ! "<HTML><HEAD><TITLE>test</TITLE></HEAD>\n"+ ! "<BODY><H1>This is a test page</H1>\n"+ ! "Writing tests is good for code. Testing is a good\n"+ ! "philosophy. Test driven development is even better.\n"; ! public StringFindingVisitorTest(String name) { ! super(name); ! } ! public void testSimpleStringFind() throws Exception { ! createParser(HTML); ! StringFindingVisitor visitor = new StringFindingVisitor("Hello"); ! parser.visitAllNodesWith(visitor); ! assertTrue("Hello found", visitor.stringWasFound()); ! } ! ! public void testStringNotFound() throws Exception { ! createParser(HTML); ! StringFindingVisitor visitor = new StringFindingVisitor("industrial logic"); ! parser.visitAllNodesWith(visitor); ! assertTrue("industrial logic should not have been found", !visitor.stringWasFound()); ! } ! ! public void testStringInTagNotFound() throws Exception { ! createParser(HTML); ! StringFindingVisitor visitor = new StringFindingVisitor("HTML"); ! parser.visitAllNodesWith(visitor); ! assertTrue("HTML should not have been found", !visitor.stringWasFound()); ! } ! public void testStringFoundInSingleStringNode() throws Exception { ! createParser("this is some text!"); ! StringFindingVisitor visitor = new StringFindingVisitor("text"); ! parser.visitAllNodesWith(visitor); ! assertTrue("text should be found", visitor.stringWasFound()); ! } ! ! public void testStringFoundCount() throws Exception { ! createParser(HTML); ! StringFindingVisitor visitor = new StringFindingVisitor("is"); ! parser.visitAllNodesWith(visitor); ! assertEquals("# times 'is' was found", 2, visitor.stringFoundCount()); ! ! visitor = new StringFindingVisitor("and"); ! parser.visitAllNodesWith(visitor); ! assertEquals("# times 'and' was found", 0, visitor.stringFoundCount()); ! } ! public void testStringFoundMultipleTimes() throws Exception { ! createParser(HTML_TO_SEARCH); ! StringFindingVisitor visitor = new StringFindingVisitor("TEST"); ! visitor.doMultipleSearchesWithinStrings(); ! parser.visitAllNodesWith(visitor); ! assertEquals("TEST found", 5, visitor.stringFoundCount()); ! } ! ! } --- 33,98 ---- public class StringFindingVisitorTest extends ParserTestCase { ! private static final String HTML = ! "<HTML><HEAD><TITLE>This is the Title</TITLE>" + ! "</HEAD><BODY>Hello World, this is an excellent parser</BODY></HTML>"; ! ! private static final String HTML_TO_SEARCH = ! "<HTML><HEAD><TITLE>test</TITLE></HEAD>\n"+ ! "<BODY><H1>This is a test page</H1>\n"+ ! "Writing tests is good for code. Testing is a good\n"+ ! "philosophy. Test driven development is even better.\n"; ! public StringFindingVisitorTest(String name) { ! super(name); ! } ! public void testSimpleStringFind() throws Exception { ! createParser(HTML); ! StringFindingVisitor visitor = new StringFindingVisitor("Hello"); ! parser.visitAllNodesWith(visitor); ! assertTrue("Hello found", visitor.stringWasFound()); ! } ! ! public void testStringNotFound() throws Exception { ! createParser(HTML); ! StringFindingVisitor visitor = new StringFindingVisitor("industrial logic"); ! parser.visitAllNodesWith(visitor); ! assertTrue("industrial logic should not have been found", !visitor.stringWasFound()); ! } ! ! public void testStringInTagNotFound() throws Exception { ! createParser(HTML); ! StringFindingVisitor visitor = new StringFindingVisitor("HTML"); ! parser.visitAllNodesWith(visitor); ! assertTrue("HTML should not have been found", !visitor.stringWasFound()); ! } ! public void testStringFoundInSingleStringNode() throws Exception { ! createParser("this is some text!"); ! StringFindingVisitor visitor = new StringFindingVisitor("text"); ! parser.visitAllNodesWith(visitor); ! assertTrue("text should be found", visitor.stringWasFound()); ! } ! ! public void testStringFoundCount() throws Exception { ! createParser(HTML); ! StringFindingVisitor visitor = new StringFindingVisitor("is"); ! parser.visitAllNodesWith(visitor); ! assertEquals("# times 'is' was found", 2, visitor.stringFoundCount()); ! ! visitor = new StringFindingVisitor("and"); ! parser.visitAllNodesWith(visitor); ! assertEquals("# times 'and' was found", 0, visitor.stringFoundCount()); ! } ! public void testStringFoundMultipleTimes() throws Exception { ! createParser(HTML_TO_SEARCH); ! StringFindingVisitor visitor = new StringFindingVisitor("TEST"); ! visitor.doMultipleSearchesWithinStrings(); ! parser.visitAllNodesWith(visitor); ! assertEquals("TEST found", 5, visitor.stringFoundCount()); ! } ! ! } Index: TagFindingVisitorTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/visitorsTests/TagFindingVisitorTest.java,v retrieving revision 1.8 retrieving revision 1.9 diff -C2 -d -r1.8 -r1.9 *** TagFindingVisitorTest.java 24 Aug 2003 21:59:44 -0000 1.8 --- TagFindingVisitorTest.java 3 Sep 2003 23:36:22 -0000 1.9 *************** *** 35,100 **** public class TagFindingVisitorTest extends ParserTestCase { ! private String html = ! "<HTML><HEAD><TITLE>This is the Title</TITLE></HEAD>" + ! "<BODY>Hello World, this is an excellent parser</BODY>" + ! "<UL><LI><LI></UL>" + ! "<A href=\"http://www.industriallogic.com\">Industrial Logic</a>" + ! "</HTML>"; ! public TagFindingVisitorTest(String name) { ! super(name); ! } ! ! public void setUp() { ! createParser(html); ! } ! public void testTagFound() throws Exception { ! TagFindingVisitor visitor = new TagFindingVisitor(new String[] {"HEAD"}); ! parser.visitAllNodesWith(visitor); ! assertEquals("HEAD found", 1, visitor.getTagCount(0)); ! } ! public void testTagsFound() throws Exception { ! TagFindingVisitor visitor = new TagFindingVisitor(new String [] {"LI"}); ! parser.visitAllNodesWith(visitor); ! assertEquals("LI tags found", 2, visitor.getTagCount(0)); ! } ! ! public void testMultipleTags() throws Exception { ! TagFindingVisitor visitor = ! new TagFindingVisitor( ! new String [] { ! "LI","BODY","UL","A" ! } ! ); ! parser.visitAllNodesWith(visitor); ! assertEquals("LI tags found", 2, visitor.getTagCount(0)); ! assertEquals("BODY tag found", 1, visitor.getTagCount(1)); ! assertEquals("UL tag found", 1, visitor.getTagCount(2)); ! assertEquals("A tag found", 1, visitor.getTagCount(3)); ! } ! public void testEndTags() throws Exception { ! TagFindingVisitor visitor = ! new TagFindingVisitor( ! new String [] { ! "LI","BODY","UL","A" ! }, ! true ! ); ! parser.visitAllNodesWith(visitor); ! assertEquals("LI tags found", 2, visitor.getTagCount(0)); ! assertEquals("BODY tag found", 1, visitor.getTagCount(1)); ! assertEquals("UL tag found", 1, visitor.getTagCount(2)); ! assertEquals("A tag found", 1, visitor.getTagCount(3)); ! assertEquals("BODY end tag found", 1, visitor.getEndTagCount(1)); ! } ! public void assertTagNameShouldBe(String message, Node node, String expectedTagName) { ! Tag tag = (Tag)node; ! assertStringEquals(message,expectedTagName,tag.getTagName()); ! } } --- 35,100 ---- public class TagFindingVisitorTest extends ParserTestCase { ! private String html = ! "<HTML><HEAD><TITLE>This is the Title</TITLE></HEAD>" + ! "<BODY>Hello World, this is an excellent parser</BODY>" + ! "<UL><LI><LI></UL>" + ! "<A href=\"http://www.industriallogic.com\">Industrial Logic</a>" + ! "</HTML>"; ! public TagFindingVisitorTest(String name) { ! super(name); ! } ! ! public void setUp() { ! createParser(html); ! } ! public void testTagFound() throws Exception { ! TagFindingVisitor visitor = new TagFindingVisitor(new String[] {"HEAD"}); ! parser.visitAllNodesWith(visitor); ! assertEquals("HEAD found", 1, visitor.getTagCount(0)); ! } ! public void testTagsFound() throws Exception { ! TagFindingVisitor visitor = new TagFindingVisitor(new String [] {"LI"}); ! parser.visitAllNodesWith(visitor); ! assertEquals("LI tags found", 2, visitor.getTagCount(0)); ! } ! ! public void testMultipleTags() throws Exception { ! TagFindingVisitor visitor = ! new TagFindingVisitor( ! new String [] { ! "LI","BODY","UL","A" ! } ! ); ! parser.visitAllNodesWith(visitor); ! assertEquals("LI tags found", 2, visitor.getTagCount(0)); ! assertEquals("BODY tag found", 1, visitor.getTagCount(1)); ! assertEquals("UL tag found", 1, visitor.getTagCount(2)); ! assertEquals("A tag found", 1, visitor.getTagCount(3)); ! } ! public void testEndTags() throws Exception { ! TagFindingVisitor visitor = ! new TagFindingVisitor( ! new String [] { ! "LI","BODY","UL","A" ! }, ! true ! ); ! parser.visitAllNodesWith(visitor); ! assertEquals("LI tags found", 2, visitor.getTagCount(0)); ! assertEquals("BODY tag found", 1, visitor.getTagCount(1)); ! assertEquals("UL tag found", 1, visitor.getTagCount(2)); ! assertEquals("A tag found", 1, visitor.getTagCount(3)); ! assertEquals("BODY end tag found", 1, visitor.getEndTagCount(1)); ! } ! public void assertTagNameShouldBe(String message, Node node, String expectedTagName) { ! Tag tag = (Tag)node; ! assertStringEquals(message,expectedTagName,tag.getTagName()); ! } } Index: TextExtractingVisitorTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/visitorsTests/TextExtractingVisitorTest.java,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -d -r1.5 -r1.6 *** TextExtractingVisitorTest.java 24 Aug 2003 21:59:44 -0000 1.5 --- TextExtractingVisitorTest.java 3 Sep 2003 23:36:22 -0000 1.6 *************** *** 34,86 **** public class TextExtractingVisitorTest extends ParserTestCase { ! public TextExtractingVisitorTest(String name) { ! super(name); ! } ! public void testSimpleVisit() throws Exception { ! createParser("<HTML><HEAD><TITLE>Hello World</TITLE></HEAD></HTML>"); ! TextExtractingVisitor visitor = new TextExtractingVisitor(); ! parser.visitAllNodesWith(visitor); ! assertStringEquals( ! "extracted text", ! "Hello World", ! visitor.getExtractedText() ! ); ! } ! ! public void testSimpleVisitWithRegisteredScanners() throws Exception { ! createParser("<HTML><HEAD><TITLE>Hello World</TITLE></HEAD></HTML>"); ! parser.registerScanners(); ! TextExtractingVisitor visitor = new TextExtractingVisitor(); ! parser.visitAllNodesWith(visitor); ! assertStringEquals( ! "extracted text", ! "Hello World", ! visitor.getExtractedText() ! ); ! } ! ! public void testVisitHtmlWithSpecialChars() throws Exception { ! createParser("<BODY>Hello World </BODY>"); ! TextExtractingVisitor visitor = new TextExtractingVisitor(); ! parser.visitAllNodesWith(visitor); ! assertStringEquals( ! "extracted text", ! "Hello World ", ! visitor.getExtractedText() ! ); ! } ! ! public void testVisitHtmlWithPreTags() throws Exception { ! createParser( ! "Some text with <pre>this should be preserved</pre>" ! ); ! TextExtractingVisitor visitor = new TextExtractingVisitor(); ! parser.visitAllNodesWith(visitor); ! assertStringEquals( ! "extracted text", ! "Some text with this should be preserved", ! visitor.getExtractedText() ! ); ! } } --- 34,86 ---- public class TextExtractingVisitorTest extends ParserTestCase { ! public TextExtractingVisitorTest(String name) { ! super(name); ! } ! public void testSimpleVisit() throws Exception { ! createParser("<HTML><HEAD><TITLE>Hello World</TITLE></HEAD></HTML>"); ! TextExtractingVisitor visitor = new TextExtractingVisitor(); ! parser.visitAllNodesWith(visitor); ! assertStringEquals( ! "extracted text", ! "Hello World", ! visitor.getExtractedText() ! ); ! } ! ! public void testSimpleVisitWithRegisteredScanners() throws Exception { ! createParser("<HTML><HEAD><TITLE>Hello World</TITLE></HEAD></HTML>"); ! parser.registerScanners(); ! TextExtractingVisitor visitor = new TextExtractingVisitor(); ! parser.visitAllNodesWith(visitor); ! assertStringEquals( ! "extracted text", ! "Hello World", ! visitor.getExtractedText() ! ); ! } ! ! public void testVisitHtmlWithSpecialChars() throws Exception { ! createParser("<BODY>Hello World </BODY>"); ! TextExtractingVisitor visitor = new TextExtractingVisitor(); ! parser.visitAllNodesWith(visitor); ! assertStringEquals( ! "extracted text", ! "Hello World ", ! visitor.getExtractedText() ! ); ! } ! ! public void testVisitHtmlWithPreTags() throws Exception { ! createParser( ! "Some text with <pre>this should be preserved</pre>" ! ); ! TextExtractingVisitor visitor = new TextExtractingVisitor(); ! parser.visitAllNodesWith(visitor); ! assertStringEquals( ! "extracted text", ! "Some text with this should be preserved", ! visitor.getExtractedText() ! ); ! } } Index: UrlModifyingVisitorTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/visitorsTests/UrlModifyingVisitorTest.java,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -d -r1.5 -r1.6 *** UrlModifyingVisitorTest.java 24 Aug 2003 21:59:44 -0000 1.5 --- UrlModifyingVisitorTest.java 3 Sep 2003 23:36:22 -0000 1.6 *************** *** 34,62 **** public class UrlModifyingVisitorTest extends ParserTestCase { ! private static final String HTML_WITH_LINK = ! "<HTML><BODY>" + ! "<A HREF=\"mylink.html\"><IMG SRC=\"mypic.jpg\">" + ! "</A><IMG SRC=\"mysecondimage.gif\">" + ! "</BODY></HTML>"; ! ! private static final String MODIFIED_HTML = ! "<HTML><BODY>" + ! "<A HREF=\"localhost://mylink.html\">" + ! "<IMG SRC=\"localhost://mypic.jpg\"></A>" + ! "<IMG SRC=\"localhost://mysecondimage.gif\">" + ! "</BODY></HTML>"; ! ! public UrlModifyingVisitorTest(String name) { ! super(name); ! } ! ! public void testUrlModificationWithVisitor() throws Exception { ! Parser parser = Parser.createParser(HTML_WITH_LINK); ! UrlModifyingVisitor visitor = ! new UrlModifyingVisitor(parser, "localhost://"); ! parser.visitAllNodesWith(visitor); ! assertStringEquals("Expected HTML", ! MODIFIED_HTML, ! visitor.getModifiedResult()); ! } } --- 34,62 ---- public class UrlModifyingVisitorTest extends ParserTestCase { ! private static final String HTML_WITH_LINK = ! "<HTML><BODY>" + ! "<A HREF=\"mylink.html\"><IMG SRC=\"mypic.jpg\">" + ! "</A><IMG SRC=\"mysecondimage.gif\">" + ! "</BODY></HTML>"; ! ! private static final String MODIFIED_HTML = ! "<HTML><BODY>" + ! "<A HREF=\"localhost://mylink.html\">" + ! "<IMG SRC=\"localhost://mypic.jpg\"></A>" + ! "<IMG SRC=\"localhost://mysecondimage.gif\">" + ! "</BODY></HTML>"; ! ! public UrlModifyingVisitorTest(String name) { ! super(name); ! } ! ! public void testUrlModificationWithVisitor() throws Exception { ! Parser parser = Parser.createParser(HTML_WITH_LINK); ! UrlModifyingVisitor visitor = ! new UrlModifyingVisitor(parser, "localhost://"); ! parser.visitAllNodesWith(visitor); ! assertStringEquals("Expected HTML", ! MODIFIED_HTML, ! visitor.getModifiedResult()); ! } } |
From: <der...@us...> - 2003-09-03 23:36:25
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/utilTests In directory sc8-pr-cvs1:/tmp/cvs-serv31228/tests/utilTests Modified Files: AllTests.java BeanTest.java CharacterTranslationTest.java HTMLLinkProcessorTest.java HTMLParserUtilsTest.java HTMLTagParserTest.java NodeListTest.java Log Message: Change tabs to spaces in all source files. Index: AllTests.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/utilTests/AllTests.java,v retrieving revision 1.44 retrieving revision 1.45 diff -C2 -d -r1.44 -r1.45 *** AllTests.java 24 Aug 2003 21:59:44 -0000 1.44 --- AllTests.java 3 Sep 2003 23:36:22 -0000 1.45 *************** *** 44,48 **** */ public AllTests(String name) { ! super(name); } --- 44,48 ---- */ public AllTests(String name) { ! super(name); } *************** *** 58,62 **** * All other options are passed on to the junit framework. */ ! public static void main(String[] args) { String runner; --- 58,62 ---- * All other options are passed on to the junit framework. */ ! public static void main(String[] args) { String runner; *************** *** 108,112 **** + ")"); } ! } /** --- 108,112 ---- + ")"); } ! } /** Index: BeanTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/utilTests/BeanTest.java,v retrieving revision 1.38 retrieving revision 1.39 diff -C2 -d -r1.38 -r1.39 *** BeanTest.java 24 Aug 2003 21:59:44 -0000 1.38 --- BeanTest.java 3 Sep 2003 23:36:22 -0000 1.39 *************** *** 377,412 **** } ! /** ! * Test text including a "pre" tag ! */ ! public void testOutputWithPreTags() { ! StringBean sb; ! sb = new StringBean (); ! String sampleCode = "public class Product {}"; ! check (sb, "<body><pre>"+sampleCode+"</pre></body>", sampleCode); ! } ! /** ! * Test text including a "script" tag ! */ ! public void testOutputWithScriptTags() { ! StringBean sb; ! sb = new StringBean (); ! ! String sampleScript = ! "<script language=\"javascript\">\r\n" ! + "if(navigator.appName.indexOf(\"Netscape\") != -1)\r\n" ! + " document.write ('xxx');\r\n" ! + "else\r\n" ! + " document.write ('yyy');\r\n" ! + "</script>\r\n"; ! ! check (sb, "<body>"+sampleScript+"</body>", ""); ! } /* ! * Test output with pre and any tag. ! */ ! public void testOutputWithPreAndAnyTag() { StringBean sb; --- 377,412 ---- } ! /** ! * Test text including a "pre" tag ! */ ! public void testOutputWithPreTags() { ! StringBean sb; ! sb = new StringBean (); ! String sampleCode = "public class Product {}"; ! check (sb, "<body><pre>"+sampleCode+"</pre></body>", sampleCode); ! } ! /** ! * Test text including a "script" tag ! */ ! public void testOutputWithScriptTags() { ! StringBean sb; ! sb = new StringBean (); ! ! String sampleScript = ! "<script language=\"javascript\">\r\n" ! + "if(navigator.appName.indexOf(\"Netscape\") != -1)\r\n" ! + " document.write ('xxx');\r\n" ! + "else\r\n" ! + " document.write ('yyy');\r\n" ! + "</script>\r\n"; ! ! check (sb, "<body>"+sampleScript+"</body>", ""); ! } /* ! * Test output with pre and any tag. ! */ ! public void testOutputWithPreAndAnyTag() { StringBean sb; *************** *** 420,426 **** /* ! * Test output with pre and any tag and text. ! */ ! public void testOutputWithPreAndAnyTagPlusText() { StringBean sb; --- 420,426 ---- /* ! * Test output with pre and any tag and text. ! */ ! public void testOutputWithPreAndAnyTagPlusText() { StringBean sb; *************** *** 434,440 **** /* ! * Test output with pre and any tag and text. ! */ ! public void testOutputWithPreAndAnyTagPlusTextWithWhitespace() { StringBean sb; --- 434,440 ---- /* ! * Test output with pre and any tag and text. ! */ ! public void testOutputWithPreAndAnyTagPlusTextWithWhitespace() { StringBean sb; *************** *** 448,454 **** /* ! * Test output without pre and any tag and text. ! */ ! public void testOutputWithoutPreAndAnyTagPlusTextWithWhitespace() { StringBean sb; --- 448,454 ---- /* ! * Test output without pre and any tag and text. ! */ ! public void testOutputWithoutPreAndAnyTagPlusTextWithWhitespace() { StringBean sb; *************** *** 461,481 **** } ! /** ! * Test output with pre and script tags ! */ ! public void xtestOutputWithPreAndScriptTags() { ! StringBean sb; ! sb = new StringBean (); ! ! String sampleScript = ! "<script language=\"javascript\">\r\n" ! + "if(navigator.appName.indexOf(\"Netscape\") != -1)\r\n" ! + " document.write ('xxx');\r\n" ! + "else\r\n" ! + " document.write ('yyy');\r\n" ! + "</script>\r\n"; ! ! check (sb, "<body><pre>"+sampleScript+"</pre></body>", sampleScript); ! } } --- 461,481 ---- } ! /** ! * Test output with pre and script tags ! */ ! public void xtestOutputWithPreAndScriptTags() { ! StringBean sb; ! sb = new StringBean (); ! ! String sampleScript = ! "<script language=\"javascript\">\r\n" ! + "if(navigator.appName.indexOf(\"Netscape\") != -1)\r\n" ! + " document.write ('xxx');\r\n" ! + "else\r\n" ! + " document.write ('yyy');\r\n" ! + "</script>\r\n"; ! ! check (sb, "<body><pre>"+sampleScript+"</pre></body>", sampleScript); ! } } Index: CharacterTranslationTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/utilTests/CharacterTranslationTest.java,v retrieving revision 1.28 retrieving revision 1.29 diff -C2 -d -r1.28 -r1.29 *** CharacterTranslationTest.java 24 Aug 2003 21:59:44 -0000 1.28 --- CharacterTranslationTest.java 3 Sep 2003 23:36:22 -0000 1.29 *************** *** 35,42 **** public class CharacterTranslationTest extends TestCase { ! public CharacterTranslationTest (String name) { ! super (name); ! } public void testInitialCharacterEntityReference () --- 35,42 ---- public class CharacterTranslationTest extends TestCase { ! public CharacterTranslationTest (String name) { ! super (name); ! } public void testInitialCharacterEntityReference () Index: HTMLLinkProcessorTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/utilTests/HTMLLinkProcessorTest.java,v retrieving revision 1.42 retrieving revision 1.43 diff -C2 -d -r1.42 -r1.43 *** HTMLLinkProcessorTest.java 24 Aug 2003 21:59:44 -0000 1.42 --- HTMLLinkProcessorTest.java 3 Sep 2003 23:36:22 -0000 1.43 *************** *** 34,76 **** public class HTMLLinkProcessorTest extends ParserTestCase { ! private LinkProcessor lp; ! public HTMLLinkProcessorTest(String name) { ! super(name); ! } ! protected void setUp() { ! lp = new LinkProcessor(); ! } ! public void testIsURL() { ! String resourceLoc1 = "http://someurl.com"; ! String resourceLoc2 = "myfilehttp.dat"; ! assertTrue(resourceLoc1+" should be a url",LinkProcessor.isURL(resourceLoc1)); ! assertTrue(resourceLoc2+" should not be a url",!LinkProcessor.isURL(resourceLoc2)); ! String resourceLoc3 = "file://localhost/D:/java/jdk1.3/docs/api/overview-summary.html"; ! assertTrue(resourceLoc3+" should be a url",LinkProcessor.isURL(resourceLoc3)); ! ! } ! public void testFixSpaces() { ! String url = "http://htmlparser.sourceforge.net/test/This is a Test Page.html"; ! String fixedURL = LinkProcessor.fixSpaces(url); ! assertEquals("Expected","http://htmlparser.sourceforge.net/test/This%20is%20a%20Test%20Page.html",fixedURL); ! } ! /** ! * Reproduction of bug 673379 reported by Joe Robbins. Parser goes into ! * infinte loop if the link has no slashes. ! */ ! public void testLinkWithNoSlashes() throws Exception { ! createParser("<A HREF=\".foo.txt\">Foo</A>","http://www.oygevalt.com"); ! parser.registerScanners(); ! parseAndAssertNodeCount(1); ! assertTrue(node[0] instanceof LinkTag); ! LinkTag linkTag = (LinkTag)node[0]; ! assertStringEquals("link","http://www.oygevalt.com/foo.txt",linkTag.getLink()); ! assertEquals("link","Foo",linkTag.getLinkText()); ! } // // Tests from Appendix C Examples of Resolving Relative URI References --- 34,76 ---- public class HTMLLinkProcessorTest extends ParserTestCase { ! private LinkProcessor lp; ! public HTMLLinkProcessorTest(String name) { ! super(name); ! } ! protected void setUp() { ! lp = new LinkProcessor(); ! } ! public void testIsURL() { ! String resourceLoc1 = "http://someurl.com"; ! String resourceLoc2 = "myfilehttp.dat"; ! assertTrue(resourceLoc1+" should be a url",LinkProcessor.isURL(resourceLoc1)); ! assertTrue(resourceLoc2+" should not be a url",!LinkProcessor.isURL(resourceLoc2)); ! String resourceLoc3 = "file://localhost/D:/java/jdk1.3/docs/api/overview-summary.html"; ! assertTrue(resourceLoc3+" should be a url",LinkProcessor.isURL(resourceLoc3)); ! ! } ! public void testFixSpaces() { ! String url = "http://htmlparser.sourceforge.net/test/This is a Test Page.html"; ! String fixedURL = LinkProcessor.fixSpaces(url); ! assertEquals("Expected","http://htmlparser.sourceforge.net/test/This%20is%20a%20Test%20Page.html",fixedURL); ! } ! /** ! * Reproduction of bug 673379 reported by Joe Robbins. Parser goes into ! * infinte loop if the link has no slashes. ! */ ! public void testLinkWithNoSlashes() throws Exception { ! createParser("<A HREF=\".foo.txt\">Foo</A>","http://www.oygevalt.com"); ! parser.registerScanners(); ! parseAndAssertNodeCount(1); ! assertTrue(node[0] instanceof LinkTag); ! LinkTag linkTag = (LinkTag)node[0]; ! assertStringEquals("link","http://www.oygevalt.com/foo.txt",linkTag.getLink()); ! assertEquals("link","Foo",linkTag.getLinkText()); ! } // // Tests from Appendix C Examples of Resolving Relative URI References Index: HTMLParserUtilsTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/utilTests/HTMLParserUtilsTest.java,v retrieving revision 1.6 retrieving revision 1.7 diff -C2 -d -r1.6 -r1.7 *** HTMLParserUtilsTest.java 24 Aug 2003 21:59:44 -0000 1.6 --- HTMLParserUtilsTest.java 3 Sep 2003 23:36:22 -0000 1.7 *************** *** 34,48 **** public class HTMLParserUtilsTest extends ParserTestCase { ! public HTMLParserUtilsTest(String name) { ! super(name); ! } ! public void testRemoveTrailingSpaces() { ! String text = "Hello World "; ! assertStringEquals( ! "modified text", ! "Hello World", ! ParserUtils.removeTrailingBlanks(text) ! ); ! } } --- 34,48 ---- public class HTMLParserUtilsTest extends ParserTestCase { ! public HTMLParserUtilsTest(String name) { ! super(name); ! } ! public void testRemoveTrailingSpaces() { ! String text = "Hello World "; ! assertStringEquals( ! "modified text", ! "Hello World", ! ParserUtils.removeTrailingBlanks(text) ! ); ! } } Index: HTMLTagParserTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/utilTests/HTMLTagParserTest.java,v retrieving revision 1.41 retrieving revision 1.42 diff -C2 -d -r1.41 -r1.42 *** HTMLTagParserTest.java 24 Aug 2003 21:59:44 -0000 1.41 --- HTMLTagParserTest.java 3 Sep 2003 23:36:22 -0000 1.42 *************** *** 36,64 **** public class HTMLTagParserTest extends ParserTestCase { ! private TagParser tagParser; ! public HTMLTagParserTest(String name) { ! super(name); ! } public void testCorrectTag() { ! Tag tag = new Tag(new TagData(0,20,"font face=\"Arial,\"helvetica,\" sans-serif=\"sans-serif\" size=\"2\" color=\"#FFFFFF\"","<font face=\"Arial,\"helvetica,\" sans-serif=\"sans-serif\" size=\"2\" color=\"#FFFFFF\">")); ! tagParser.correctTag(tag); ! assertStringEquals("Corrected Tag","font face=\"Arial,helvetica,\" sans-serif=\"sans-serif\" size=\"2\" color=\"#FFFFFF\"",tag.getText()); ! } ! public void testInsertInvertedCommasCorrectly() { ! StringBuffer test = new StringBuffer("a b=c d e = f"); ! StringBuffer result = tagParser.insertInvertedCommasCorrectly(test); ! assertStringEquals("Expected Correction","a b=\"c d\" e=\"f\"",result.toString()); ! } ! public void testPruneSpaces() { ! String test = " fdfdf dfdf "; ! assertEquals("Expected Pruned string","fdfdf dfdf",TagParser.pruneSpaces(test)); ! } ! protected void setUp() { ! tagParser = new TagParser(new DefaultParserFeedback()); ! } } --- 36,64 ---- public class HTMLTagParserTest extends ParserTestCase { ! private TagParser tagParser; ! public HTMLTagParserTest(String name) { ! super(name); ! } public void testCorrectTag() { ! Tag tag = new Tag(new TagData(0,20,"font face=\"Arial,\"helvetica,\" sans-serif=\"sans-serif\" size=\"2\" color=\"#FFFFFF\"","<font face=\"Arial,\"helvetica,\" sans-serif=\"sans-serif\" size=\"2\" color=\"#FFFFFF\">")); ! tagParser.correctTag(tag); ! assertStringEquals("Corrected Tag","font face=\"Arial,helvetica,\" sans-serif=\"sans-serif\" size=\"2\" color=\"#FFFFFF\"",tag.getText()); ! } ! public void testInsertInvertedCommasCorrectly() { ! StringBuffer test = new StringBuffer("a b=c d e = f"); ! StringBuffer result = tagParser.insertInvertedCommasCorrectly(test); ! assertStringEquals("Expected Correction","a b=\"c d\" e=\"f\"",result.toString()); ! } ! public void testPruneSpaces() { ! String test = " fdfdf dfdf "; ! assertEquals("Expected Pruned string","fdfdf dfdf",TagParser.pruneSpaces(test)); ! } ! protected void setUp() { ! tagParser = new TagParser(new DefaultParserFeedback()); ! } } Index: NodeListTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/utilTests/NodeListTest.java,v retrieving revision 1.14 retrieving revision 1.15 diff -C2 -d -r1.14 -r1.15 *** NodeListTest.java 24 Aug 2003 21:59:44 -0000 1.14 --- NodeListTest.java 3 Sep 2003 23:36:22 -0000 1.15 *************** *** 36,194 **** public class NodeListTest extends ParserTestCase { ! private NodeList nodeList; ! private Node[] testNodes; ! ! public NodeListTest(String name) { ! super(name); ! } ! protected void setUp() { ! nodeList = new NodeList(); ! } ! ! public void testAddOneItem() { ! Node node = createHTMLNodeObject(); ! nodeList.add(node); ! assertEquals("Vector Size",1,nodeList.size()); ! assertTrue("First Element",node==nodeList.elementAt(0)); ! } ! public void testAddTwoItems() { ! Node node1 = createHTMLNodeObject(); ! Node node2 = createHTMLNodeObject(); ! nodeList.add(node1); ! nodeList.add(node2); ! assertEquals("Vector Size",2,nodeList.size()); ! assertTrue("First Element",node1==nodeList.elementAt(0)); ! assertTrue("Second Element",node2==nodeList.elementAt(1)); ! } ! ! public void testAddTenItems() { ! createTestDataAndPutInVector(10); ! assertTestDataCouldBeExtractedFromVector(10); ! } ! ! public void testAddElevenItems() { ! createTestDataAndPutInVector(11); ! assertTestDataCouldBeExtractedFromVector(11); ! } ! ! public void testAddThirtyItems() { ! createTestDataAndPutInVector(30); ! assertTestDataCouldBeExtractedFromVector(30); ! assertEquals("Number of Adjustments",1,nodeList.getNumberOfAdjustments()); ! } ! ! public void testAddThirtyOneItems() { ! createTestDataAndPutInVector(31); ! assertTestDataCouldBeExtractedFromVector(31); ! assertEquals("Number of Adjustments",2,nodeList.getNumberOfAdjustments()); ! } ! ! public void testAddFiftyItems() { ! createTestDataAndPutInVector(50); ! assertTestDataCouldBeExtractedFromVector(50); ! assertEquals("Number of Adjustments",2,nodeList.getNumberOfAdjustments()); ! } ! ! public void testAddFiftyOneItems() { ! createTestDataAndPutInVector(51); ! assertTestDataCouldBeExtractedFromVector(51); ! assertEquals("Number of Adjustments",2,nodeList.getNumberOfAdjustments()); ! } ! ! public void testAddTwoHundredItems() { ! createTestDataAndPutInVector(200); ! assertEquals("Number of Adjustments",4,nodeList.getNumberOfAdjustments()); ! } ! ! public void testElements() throws Exception { ! createTestDataAndPutInVector(11); ! Node [] resultNodes = new AbstractNode[11]; ! int i = 0; ! for (SimpleNodeIterator e = nodeList.elements();e.hasMoreNodes();) { ! resultNodes[i] = e.nextNode(); ! assertTrue("Node "+i+" did not match",testNodes[i]==resultNodes[i]); ! i++; ! } ! ! } ! ! private Node createHTMLNodeObject() { ! Node node = new AbstractNode(10,20) { ! public void accept(Object visitor) { ! } ! public void collectInto(NodeList collectionList, String filter) { ! } ! ! public String toHtml() { ! return null; ! } ! ! public String toPlainTextString() { ! return null; ! } ! ! public String toString() { ! return ""; ! } ! }; ! return node; ! } ! private void createTestDataAndPutInVector(int nodeCount) { ! testNodes = new AbstractNode[nodeCount]; ! for (int i=0;i<nodeCount;i++) { ! testNodes[i]= createHTMLNodeObject(); ! nodeList.add(testNodes[i]); ! } ! } ! private void assertTestDataCouldBeExtractedFromVector(int nodeCount) { ! for (int i=0;i<nodeCount;i++) { ! assertTrue("Element "+i+" did not match",testNodes[i]==nodeList.elementAt(i)); ! } ! } ! ! public void testToNodeArray() { ! createTestDataAndPutInVector(387); ! Node nodes [] = nodeList.toNodeArray(); ! assertEquals("Length of array",387,nodes.length); ! for (int i=0;i<nodes.length;i++) ! assertNotNull("node "+i+" should not be null",nodes[i]); ! } ! ! public void testRemove() { ! Node node1 = createHTMLNodeObject(); ! Node node2 = createHTMLNodeObject(); ! nodeList.add(node1); ! nodeList.add(node2); ! assertEquals("Vector Size",2,nodeList.size()); ! assertTrue("First Element",node1==nodeList.elementAt(0)); ! assertTrue("Second Element",node2==nodeList.elementAt(1)); ! nodeList.remove(1); ! assertEquals("List Size",1,nodeList.size()); ! assertTrue("First Element",node1==nodeList.elementAt(0)); ! } ! ! public void testRemoveAll() { ! Node node1 = createHTMLNodeObject(); ! Node node2 = createHTMLNodeObject(); ! nodeList.add(node1); ! nodeList.add(node2); ! assertEquals("Vector Size",2,nodeList.size()); ! assertTrue("First Element",node1==nodeList.elementAt(0)); ! assertTrue("Second Element",node2==nodeList.elementAt(1)); ! nodeList.removeAll(); ! assertEquals("List Size",0,nodeList.size()); ! assertTrue("First Element",null==nodeList.elementAt(0)); ! assertTrue("Second Element",null==nodeList.elementAt(1)); ! } ! ! public static void main(String[] args) ! { ! new junit.awtui.TestRunner().start(new String[] {NodeListTest.class.getName()}); ! } } --- 36,194 ---- public class NodeListTest extends ParserTestCase { ! private NodeList nodeList; ! private Node[] testNodes; ! ! public NodeListTest(String name) { ! super(name); ! } ! protected void setUp() { ! nodeList = new NodeList(); ! } ! ! public void testAddOneItem() { ! Node node = createHTMLNodeObject(); ! nodeList.add(node); ! assertEquals("Vector Size",1,nodeList.size()); ! assertTrue("First Element",node==nodeList.elementAt(0)); ! } ! public void testAddTwoItems() { ! Node node1 = createHTMLNodeObject(); ! Node node2 = createHTMLNodeObject(); ! nodeList.add(node1); ! nodeList.add(node2); ! assertEquals("Vector Size",2,nodeList.size()); ! assertTrue("First Element",node1==nodeList.elementAt(0)); ! assertTrue("Second Element",node2==nodeList.elementAt(1)); ! } ! ! public void testAddTenItems() { ! createTestDataAndPutInVector(10); ! assertTestDataCouldBeExtractedFromVector(10); ! } ! ! public void testAddElevenItems() { ! createTestDataAndPutInVector(11); ! assertTestDataCouldBeExtractedFromVector(11); ! } ! ! public void testAddThirtyItems() { ! createTestDataAndPutInVector(30); ! assertTestDataCouldBeExtractedFromVector(30); ! assertEquals("Number of Adjustments",1,nodeList.getNumberOfAdjustments()); ! } ! ! public void testAddThirtyOneItems() { ! createTestDataAndPutInVector(31); ! assertTestDataCouldBeExtractedFromVector(31); ! assertEquals("Number of Adjustments",2,nodeList.getNumberOfAdjustments()); ! } ! ! public void testAddFiftyItems() { ! createTestDataAndPutInVector(50); ! assertTestDataCouldBeExtractedFromVector(50); ! assertEquals("Number of Adjustments",2,nodeList.getNumberOfAdjustments()); ! } ! ! public void testAddFiftyOneItems() { ! createTestDataAndPutInVector(51); ! assertTestDataCouldBeExtractedFromVector(51); ! assertEquals("Number of Adjustments",2,nodeList.getNumberOfAdjustments()); ! } ! ! public void testAddTwoHundredItems() { ! createTestDataAndPutInVector(200); ! assertEquals("Number of Adjustments",4,nodeList.getNumberOfAdjustments()); ! } ! ! public void testElements() throws Exception { ! createTestDataAndPutInVector(11); ! Node [] resultNodes = new AbstractNode[11]; ! int i = 0; ! for (SimpleNodeIterator e = nodeList.elements();e.hasMoreNodes();) { ! resultNodes[i] = e.nextNode(); ! assertTrue("Node "+i+" did not match",testNodes[i]==resultNodes[i]); ! i++; ! } ! ! } ! ! private Node createHTMLNodeObject() { ! Node node = new AbstractNode(10,20) { ! public void accept(Object visitor) { ! } ! public void collectInto(NodeList collectionList, String filter) { ! } ! ! public String toHtml() { ! return null; ! } ! ! public String toPlainTextString() { ! return null; ! } ! ! public String toString() { ! return ""; ! } ! }; ! return node; ! } ! private void createTestDataAndPutInVector(int nodeCount) { ! testNodes = new AbstractNode[nodeCount]; ! for (int i=0;i<nodeCount;i++) { ! testNodes[i]= createHTMLNodeObject(); ! nodeList.add(testNodes[i]); ! } ! } ! private void assertTestDataCouldBeExtractedFromVector(int nodeCount) { ! for (int i=0;i<nodeCount;i++) { ! assertTrue("Element "+i+" did not match",testNodes[i]==nodeList.elementAt(i)); ! } ! } ! ! public void testToNodeArray() { ! createTestDataAndPutInVector(387); ! Node nodes [] = nodeList.toNodeArray(); ! assertEquals("Length of array",387,nodes.length); ! for (int i=0;i<nodes.length;i++) ! assertNotNull("node "+i+" should not be null",nodes[i]); ! } ! ! public void testRemove() { ! Node node1 = createHTMLNodeObject(); ! Node node2 = createHTMLNodeObject(); ! nodeList.add(node1); ! nodeList.add(node2); ! assertEquals("Vector Size",2,nodeList.size()); ! assertTrue("First Element",node1==nodeList.elementAt(0)); ! assertTrue("Second Element",node2==nodeList.elementAt(1)); ! nodeList.remove(1); ! assertEquals("List Size",1,nodeList.size()); ! assertTrue("First Element",node1==nodeList.elementAt(0)); ! } ! ! public void testRemoveAll() { ! Node node1 = createHTMLNodeObject(); ! Node node2 = createHTMLNodeObject(); ! nodeList.add(node1); ! nodeList.add(node2); ! assertEquals("Vector Size",2,nodeList.size()); ! assertTrue("First Element",node1==nodeList.elementAt(0)); ! assertTrue("Second Element",node2==nodeList.elementAt(1)); ! nodeList.removeAll(); ! assertEquals("List Size",0,nodeList.size()); ! assertTrue("First Element",null==nodeList.elementAt(0)); ! assertTrue("Second Element",null==nodeList.elementAt(1)); ! } ! ! public static void main(String[] args) ! { ! new junit.awtui.TestRunner().start(new String[] {NodeListTest.class.getName()}); ! } } |
From: <der...@us...> - 2003-09-03 23:36:25
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/temporaryFailures In directory sc8-pr-cvs1:/tmp/cvs-serv31228/tests/temporaryFailures Modified Files: AttributeParserTest.java TagParserTest.java Log Message: Change tabs to spaces in all source files. Index: AttributeParserTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/temporaryFailures/AttributeParserTest.java,v retrieving revision 1.10 retrieving revision 1.11 diff -C2 -d -r1.10 -r1.11 *** AttributeParserTest.java 24 Aug 2003 21:59:44 -0000 1.10 --- AttributeParserTest.java 3 Sep 2003 23:36:22 -0000 1.11 *************** *** 44,90 **** public class AttributeParserTest extends ParserTestCase { ! private AttributeParser parser; ! private Tag tag; ! private Hashtable table; ! ! public AttributeParserTest(String name) { ! super(name); ! } ! protected void setUp() { ! parser = new AttributeParser(); ! } ! ! public void getParameterTableFor(String tagContents) { ! tag = new Tag(new TagData(0,0,tagContents,"")); ! table = parser.parseAttributes(tag.getText ()); ! ! } ! ! public void testParseParameters() { ! getParameterTableFor("a b = \"c\""); ! assertEquals("Value","c",table.get("B")); ! } ! public void testParseTokenValues() { ! getParameterTableFor("a b = \"'\""); ! assertEquals("Value","'",table.get("B")); ! } ! public void testParseEmptyValues() { ! getParameterTableFor("a b = \"\""); ! assertEquals("Value","",table.get("B")); ! } ! public void testParseMissingEqual() { ! getParameterTableFor("a b\"c\""); ! assertEquals("ValueB",null,table.get("B")); ! } public void testTwoParams(){ ! getParameterTableFor("PARAM NAME=\"Param1\" VALUE=\"Somik\">\n"); ! assertEquals("Param1","Param1",table.get("NAME")); ! assertEquals("Somik","Somik",table.get("VALUE")); } --- 44,90 ---- public class AttributeParserTest extends ParserTestCase { ! private AttributeParser parser; ! private Tag tag; ! private Hashtable table; ! ! public AttributeParserTest(String name) { ! super(name); ! } ! protected void setUp() { ! parser = new AttributeParser(); ! } ! ! public void getParameterTableFor(String tagContents) { ! tag = new Tag(new TagData(0,0,tagContents,"")); ! table = parser.parseAttributes(tag.getText ()); ! ! } ! ! public void testParseParameters() { ! getParameterTableFor("a b = \"c\""); ! assertEquals("Value","c",table.get("B")); ! } ! public void testParseTokenValues() { ! getParameterTableFor("a b = \"'\""); ! assertEquals("Value","'",table.get("B")); ! } ! public void testParseEmptyValues() { ! getParameterTableFor("a b = \"\""); ! assertEquals("Value","",table.get("B")); ! } ! public void testParseMissingEqual() { ! getParameterTableFor("a b\"c\""); ! assertEquals("ValueB",null,table.get("B")); ! } public void testTwoParams(){ ! getParameterTableFor("PARAM NAME=\"Param1\" VALUE=\"Somik\">\n"); ! assertEquals("Param1","Param1",table.get("NAME")); ! assertEquals("Somik","Somik",table.get("VALUE")); } *************** *** 111,122 **** public void testIncorrectSpaceKeyBug() { getParameterTableFor("TEXTAREA name=\"Remarks\" "); ! // There should only be two keys.. ! assertEquals("There should only be two keys",2,table.size()); ! // The first key is name ! String key1 = "NAME"; ! String value1 = (String)table.get(key1); ! assertEquals("Expected value 1", "Remarks",value1); ! String key2 = Tag.TAGNAME; ! assertEquals("Expected Value 2","TEXTAREA",table.get(key2)); } --- 111,122 ---- public void testIncorrectSpaceKeyBug() { getParameterTableFor("TEXTAREA name=\"Remarks\" "); ! // There should only be two keys.. ! assertEquals("There should only be two keys",2,table.size()); ! // The first key is name ! String key1 = "NAME"; ! String value1 = (String)table.get(key1); ! assertEquals("Expected value 1", "Remarks",value1); ! String key2 = Tag.TAGNAME; ! assertEquals("Expected Value 2","TEXTAREA",table.get(key2)); } *************** *** 128,155 **** public void testAttributeWithSpuriousEqualTo() { ! getParameterTableFor( ! "a class=rlbA href=/news/866201.asp?0sl=-32" ! ); ! assertStringEquals( ! "href", ! "/news/866201.asp?0sl=-32", ! (String)table.get("HREF") ! ); } public void testQuestionMarksInAttributes() { ! getParameterTableFor( ! "a href=\"mailto:sa...@ne...?subject=Site Comments\"" ! ); ! assertStringEquals( ! "href", ! "mailto:sa...@ne...?subject=Site Comments", ! (String)table.get("HREF") ! ); ! assertStringEquals( ! "tag name", ! "A", ! (String)table.get(Tag.TAGNAME) ! ); } --- 128,155 ---- public void testAttributeWithSpuriousEqualTo() { ! getParameterTableFor( ! "a class=rlbA href=/news/866201.asp?0sl=-32" ! ); ! assertStringEquals( ! "href", ! "/news/866201.asp?0sl=-32", ! (String)table.get("HREF") ! ); } public void testQuestionMarksInAttributes() { ! getParameterTableFor( ! "a href=\"mailto:sa...@ne...?subject=Site Comments\"" ! ); ! assertStringEquals( ! "href", ! "mailto:sa...@ne...?subject=Site Comments", ! (String)table.get("HREF") ! ); ! assertStringEquals( ! "tag name", ! "A", ! (String)table.get(Tag.TAGNAME) ! ); } *************** *** 164,169 **** **/ public void testEmptyTag () { ! getParameterTableFor(""); ! assertNotNull ("No Tag.TAGNAME",table.get(Tag.TAGNAME)); } --- 164,169 ---- **/ public void testEmptyTag () { ! getParameterTableFor(""); ! assertNotNull ("No Tag.TAGNAME",table.get(Tag.TAGNAME)); } Index: TagParserTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/temporaryFailures/TagParserTest.java,v retrieving revision 1.8 retrieving revision 1.9 diff -C2 -d -r1.8 -r1.9 *** TagParserTest.java 24 Aug 2003 21:59:44 -0000 1.8 --- TagParserTest.java 3 Sep 2003 23:36:22 -0000 1.9 *************** *** 45,143 **** public class TagParserTest extends ParserTestCase { ! private static final String TEST_HTML = "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\">" + ! "<!-- Server: sf-web2 -->" + ! "<html lang=\"en\">" + ! " <head><link rel=\"stylesheet\" type=\"text/css\" href=\"http://sourceforge.net/cssdef.php\">" + ! " <meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-1\">" + ! " <TITLE>SourceForge.net: Modify: 711073 - HTMLTagParser not threadsafe as a static variable in Tag</TITLE>" + ! " <SCRIPT language=\"JavaScript\" type=\"text/javascript\">" + ! " <!--" + ! " function help_window(helpurl) {" + ! " HelpWin = window.open( 'http://sourceforge.net' + helpurl,'HelpWindow','scrollbars=yes,resizable=yes,toolbar=no,height=400,width=400');" + ! " }" + ! " // -->" + ! " </SCRIPT>" + ! " <link rel=\"SHORTCUT ICON\" href=\"/images/favicon.ico\">" + ! "<!-- This is temp javascript for the jump button. If we could actually have a jump script on the server side that would be ideal -->" + ! "<script language=\"JavaScript\" type=\"text/javascript\">" + ! "<!--" + ! " function jump(targ,selObj,restore){ //v3.0" + ! " if (selObj.options[selObj.selectedIndex].value) " + ! " eval(targ+\".location='\"+selObj.options[selObj.selectedIndex].value+\"'\");" + ! " if (restore) selObj.selectedIndex=0;" + ! " }" + ! " //-->" + ! "</script>" + ! "<a href=\"http://normallink.com/sometext.html\">" + ! "<style type=\"text/css\">" + ! "<!--" + ! "A:link { text-decoration:none }" + ! "A:visited { text-decoration:none }" + ! "A:active { text-decoration:none }" + ! "A:hover { text-decoration:underline; color:#0066FF; }" + ! "-->" + ! "</style>" + ! "</head>" + ! "<body bgcolor=\"#FFFFFF\" text=\"#000000\" leftmargin=\"0\" topmargin=\"0\" marginwidth=\"0\" marginheight=\"0\" link=\"#003399\" vlink=\"#003399\" alink=\"#003399\">"; ! private Map results; ! private int testProgress; ! ! public TagParserTest(String name) { ! super(name); ! } ! ! public void testTagWithQuotes() throws Exception { ! String testHtml = ! "<img src=\"http://g-images.amazon.com/images/G/01/merchants/logos/marshall-fields-logo-20.gif\" width=87 height=20 border=0 alt=\"Marshall Field's\">"; ! ! createParser(testHtml); ! parseAndAssertNodeCount(1); ! assertType("should be Tag",Tag.class,node[0]); ! Tag tag = (Tag)node[0]; ! assertStringEquals("alt","Marshall Field's",tag.getAttribute("ALT")); ! assertStringEquals( ! "html", ! "<IMG BORDER=\"0\" ALT=\"Marshall Field's\" WIDTH=\"87\" SRC=\"http://g-images.amazon.com/images/G/01/merchants/logos/marshall-fields-logo-20.gif\" HEIGHT=\"20\">", ! tag.toHtml() ! ); ! } ! ! public void testEmptyTag() throws Exception { ! createParser("<custom/>"); ! parseAndAssertNodeCount(1); ! assertType("should be Tag",Tag.class,node[0]); ! Tag tag = (Tag)node[0]; ! assertStringEquals("tag name","CUSTOM",tag.getTagName()); ! assertTrue("empty tag",tag.isEmptyXmlTag()); ! assertStringEquals( ! "html", ! "<CUSTOM/>", ! tag.toHtml() ! ); ! } ! ! public void testTagWithCloseTagSymbolInAttribute() throws ParserException { ! createParser("<tag att=\"a>b\">"); ! parseAndAssertNodeCount(1); ! assertType("should be Tag",Tag.class,node[0]); ! Tag tag = (Tag)node[0]; ! assertStringEquals("attribute","a>b",tag.getAttribute("att")); ! } ! ! public void testTagWithOpenTagSymbolInAttribute() throws ParserException { ! createParser("<tag att=\"a<b\">"); ! parseAndAssertNodeCount(1); ! assertType("should be Tag",Tag.class,node[0]); ! Tag tag = (Tag)node[0]; ! assertStringEquals("attribute","a<b",tag.getAttribute("att")); ! } ! public void testTagWithSingleQuote() throws ParserException { ! createParser("<tag att=\'a<b\'>"); ! parseAndAssertNodeCount(1); ! assertType("should be Tag",Tag.class,node[0]); ! Tag tag = (Tag)node[0]; ! assertStringEquals("html","<TAG ATT=\"a<b\">",tag.toHtml()); ! assertStringEquals("attribute","a<b",tag.getAttribute("att")); } --- 45,143 ---- public class TagParserTest extends ParserTestCase { ! private static final String TEST_HTML = "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\">" + ! "<!-- Server: sf-web2 -->" + ! "<html lang=\"en\">" + ! " <head><link rel=\"stylesheet\" type=\"text/css\" href=\"http://sourceforge.net/cssdef.php\">" + ! " <meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-1\">" + ! " <TITLE>SourceForge.net: Modify: 711073 - HTMLTagParser not threadsafe as a static variable in Tag</TITLE>" + ! " <SCRIPT language=\"JavaScript\" type=\"text/javascript\">" + ! " <!--" + ! " function help_window(helpurl) {" + ! " HelpWin = window.open( 'http://sourceforge.net' + helpurl,'HelpWindow','scrollbars=yes,resizable=yes,toolbar=no,height=400,width=400');" + ! " }" + ! " // -->" + ! " </SCRIPT>" + ! " <link rel=\"SHORTCUT ICON\" href=\"/images/favicon.ico\">" + ! "<!-- This is temp javascript for the jump button. If we could actually have a jump script on the server side that would be ideal -->" + ! "<script language=\"JavaScript\" type=\"text/javascript\">" + ! "<!--" + ! " function jump(targ,selObj,restore){ //v3.0" + ! " if (selObj.options[selObj.selectedIndex].value) " + ! " eval(targ+\".location='\"+selObj.options[selObj.selectedIndex].value+\"'\");" + ! " if (restore) selObj.selectedIndex=0;" + ! " }" + ! " //-->" + ! "</script>" + ! "<a href=\"http://normallink.com/sometext.html\">" + ! "<style type=\"text/css\">" + ! "<!--" + ! "A:link { text-decoration:none }" + ! "A:visited { text-decoration:none }" + ! "A:active { text-decoration:none }" + ! "A:hover { text-decoration:underline; color:#0066FF; }" + ! "-->" + ! "</style>" + ! "</head>" + ! "<body bgcolor=\"#FFFFFF\" text=\"#000000\" leftmargin=\"0\" topmargin=\"0\" marginwidth=\"0\" marginheight=\"0\" link=\"#003399\" vlink=\"#003399\" alink=\"#003399\">"; ! private Map results; ! private int testProgress; ! ! public TagParserTest(String name) { ! super(name); ! } ! ! public void testTagWithQuotes() throws Exception { ! String testHtml = ! "<img src=\"http://g-images.amazon.com/images/G/01/merchants/logos/marshall-fields-logo-20.gif\" width=87 height=20 border=0 alt=\"Marshall Field's\">"; ! ! createParser(testHtml); ! parseAndAssertNodeCount(1); ! assertType("should be Tag",Tag.class,node[0]); ! Tag tag = (Tag)node[0]; ! assertStringEquals("alt","Marshall Field's",tag.getAttribute("ALT")); ! assertStringEquals( ! "html", ! "<IMG BORDER=\"0\" ALT=\"Marshall Field's\" WIDTH=\"87\" SRC=\"http://g-images.amazon.com/images/G/01/merchants/logos/marshall-fields-logo-20.gif\" HEIGHT=\"20\">", ! tag.toHtml() ! ); ! } ! ! public void testEmptyTag() throws Exception { ! createParser("<custom/>"); ! parseAndAssertNodeCount(1); ! assertType("should be Tag",Tag.class,node[0]); ! Tag tag = (Tag)node[0]; ! assertStringEquals("tag name","CUSTOM",tag.getTagName()); ! assertTrue("empty tag",tag.isEmptyXmlTag()); ! assertStringEquals( ! "html", ! "<CUSTOM/>", ! tag.toHtml() ! ); ! } ! ! public void testTagWithCloseTagSymbolInAttribute() throws ParserException { ! createParser("<tag att=\"a>b\">"); ! parseAndAssertNodeCount(1); ! assertType("should be Tag",Tag.class,node[0]); ! Tag tag = (Tag)node[0]; ! assertStringEquals("attribute","a>b",tag.getAttribute("att")); ! } ! ! public void testTagWithOpenTagSymbolInAttribute() throws ParserException { ! createParser("<tag att=\"a<b\">"); ! parseAndAssertNodeCount(1); ! assertType("should be Tag",Tag.class,node[0]); ! Tag tag = (Tag)node[0]; ! assertStringEquals("attribute","a<b",tag.getAttribute("att")); ! } ! public void testTagWithSingleQuote() throws ParserException { ! createParser("<tag att=\'a<b\'>"); ! parseAndAssertNodeCount(1); ! assertType("should be Tag",Tag.class,node[0]); ! Tag tag = (Tag)node[0]; ! assertStringEquals("html","<TAG ATT=\"a<b\">",tag.toHtml()); ! assertStringEquals("attribute","a<b",tag.getAttribute("att")); } *************** *** 251,255 **** } } ! /** * Test multiline split tag like attribute. --- 251,255 ---- } } ! /** * Test multiline split tag like attribute. *************** *** 285,289 **** * Only perform this test if it's version 1.4 or higher. */ ! public void testThreadSafety() throws Exception { createParser("<html></html>"); --- 285,289 ---- * Only perform this test if it's version 1.4 or higher. */ ! public void testThreadSafety() throws Exception { createParser("<html></html>"); *************** *** 307,311 **** new ParsingThread(i,testHtml2,parsingThread.length); ! Thread thread = new Thread(parsingThread[i]); thread.start(); } --- 307,311 ---- new ParsingThread(i,testHtml2,parsingThread.length); ! Thread thread = new Thread(parsingThread[i]); thread.start(); } *************** *** 319,323 **** catch (InterruptedException e) { } ! } while (testProgress!=completionValue); for (int i=0;i<parsingThread.length;i++) { --- 319,323 ---- catch (InterruptedException e) { } ! } while (testProgress!=completionValue); for (int i=0;i<parsingThread.length;i++) { *************** *** 348,413 **** parsingThread[i].getLink2().getLink() ); ! } } } ! } ! } ! private int computeCompletionValue(int numThreads) { ! return numThreads * (numThreads - 1) / 2; ! } ! ! class ParsingThread implements Runnable { ! Parser parser; ! int id; ! LinkTag link1, link2; ! boolean result; ! int max; ! ! ParsingThread(int id, String testHtml, int max) { ! this.id = id; ! this.max = max; ! this.parser = ! Parser.createParser(testHtml); ! parser.registerScanners(); ! } ! ! public void run() { ! try { ! result = false; ! Node linkTag [] = parser.extractAllNodesThatAre(LinkTag.class); ! link1 = (LinkTag)linkTag[0]; ! link2 = (LinkTag)linkTag[1]; ! if (id<max/2) { ! if (link1.getLink().equals("/cgi-bin/view_search?query_text=postdate>20020701&txt_clr=White&bg_clr=Red&url=http://localhost/Testing/Report1.html") && ! link2.getLink().equals("http://normallink.com/sometext.html")) ! result = true; ! } else { ! if (link1.getLink().equals("http://normallink.com/sometext.html") && ! link2.getLink().equals("http://normallink.com/sometext.html")) ! result = true; ! } ! } ! catch (ParserException e) { ! System.err.println("Parser Exception"); ! e.printStackTrace(); ! } ! finally { ! testProgress += id; ! } ! } ! ! public LinkTag getLink1() { ! return link1; ! } ! ! public LinkTag getLink2() { ! return link2; ! } ! ! public boolean passed() { ! return result; ! } } --- 348,413 ---- parsingThread[i].getLink2().getLink() ); ! } } } ! } ! } ! private int computeCompletionValue(int numThreads) { ! return numThreads * (numThreads - 1) / 2; ! } ! ! class ParsingThread implements Runnable { ! Parser parser; ! int id; ! LinkTag link1, link2; ! boolean result; ! int max; ! ! ParsingThread(int id, String testHtml, int max) { ! this.id = id; ! this.max = max; ! this.parser = ! Parser.createParser(testHtml); ! parser.registerScanners(); ! } ! ! public void run() { ! try { ! result = false; ! Node linkTag [] = parser.extractAllNodesThatAre(LinkTag.class); ! link1 = (LinkTag)linkTag[0]; ! link2 = (LinkTag)linkTag[1]; ! if (id<max/2) { ! if (link1.getLink().equals("/cgi-bin/view_search?query_text=postdate>20020701&txt_clr=White&bg_clr=Red&url=http://localhost/Testing/Report1.html") && ! link2.getLink().equals("http://normallink.com/sometext.html")) ! result = true; ! } else { ! if (link1.getLink().equals("http://normallink.com/sometext.html") && ! link2.getLink().equals("http://normallink.com/sometext.html")) ! result = true; ! } ! } ! catch (ParserException e) { ! System.err.println("Parser Exception"); ! e.printStackTrace(); ! } ! finally { ! testProgress += id; ! } ! } ! ! public LinkTag getLink1() { ! return link1; ! } ! ! public LinkTag getLink2() { ! return link2; ! } ! ! public boolean passed() { ! return result; ! } } |
From: <der...@us...> - 2003-09-03 23:36:24
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/parserHelperTests In directory sc8-pr-cvs1:/tmp/cvs-serv31228/tests/parserHelperTests Modified Files: AllTests.java CompositeTagScannerHelperTest.java StringParserTest.java Log Message: Change tabs to spaces in all source files. Index: AllTests.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/parserHelperTests/AllTests.java,v retrieving revision 1.24 retrieving revision 1.25 diff -C2 -d -r1.24 -r1.25 *** AllTests.java 24 Aug 2003 21:59:43 -0000 1.24 --- AllTests.java 3 Sep 2003 23:36:21 -0000 1.25 *************** *** 39,48 **** public static TestSuite suite() { TestSuite suite = new TestSuite("Parser Helper Tests"); ! // To-do: Test below should be enabled after it passes // suite.addTestSuite(AttributeParserTest.class); suite.addTestSuite(CompositeTagScannerHelperTest.class); suite.addTestSuite(RemarkNodeParserTest.class); suite.addTestSuite(StringParserTest.class); ! // To-do: Test below should be enabled after it passes // suite.addTestSuite(TagParserTest.class); --- 39,48 ---- public static TestSuite suite() { TestSuite suite = new TestSuite("Parser Helper Tests"); ! // To-do: Test below should be enabled after it passes // suite.addTestSuite(AttributeParserTest.class); suite.addTestSuite(CompositeTagScannerHelperTest.class); suite.addTestSuite(RemarkNodeParserTest.class); suite.addTestSuite(StringParserTest.class); ! // To-do: Test below should be enabled after it passes // suite.addTestSuite(TagParserTest.class); Index: CompositeTagScannerHelperTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/parserHelperTests/CompositeTagScannerHelperTest.java,v retrieving revision 1.17 retrieving revision 1.18 diff -C2 -d -r1.17 -r1.18 *** CompositeTagScannerHelperTest.java 24 Aug 2003 21:59:43 -0000 1.17 --- CompositeTagScannerHelperTest.java 3 Sep 2003 23:36:21 -0000 1.18 *************** *** 41,70 **** */ public class CompositeTagScannerHelperTest extends ParserTestCase { ! private CompositeTagScannerHelper helper; ! public CompositeTagScannerHelperTest(String name) { ! super(name); ! } ! protected void setUp() { ! helper = ! new CompositeTagScannerHelper(null,null,null,null,null,false); ! } ! ! public void testIsXmlEndTagForRealXml() { ! Tag tag = new Tag( ! new TagData( ! 0,0,"something/","" ! ) ! ); ! assertTrue("should be an xml end tag",helper.isXmlEndTag(tag)); ! } ! public void testIsXmlEndTagForFalseMatches() { ! Tag tag = new Tag( ! new TagData( ! 0,0,"a href=http://someurl.com/","" ! ) ! ); ! assertFalse("should not be an xml end tag",helper.isXmlEndTag(tag)); ! } } --- 41,70 ---- */ public class CompositeTagScannerHelperTest extends ParserTestCase { ! private CompositeTagScannerHelper helper; ! public CompositeTagScannerHelperTest(String name) { ! super(name); ! } ! protected void setUp() { ! helper = ! new CompositeTagScannerHelper(null,null,null,null,null,false); ! } ! ! public void testIsXmlEndTagForRealXml() { ! Tag tag = new Tag( ! new TagData( ! 0,0,"something/","" ! ) ! ); ! assertTrue("should be an xml end tag",helper.isXmlEndTag(tag)); ! } ! public void testIsXmlEndTagForFalseMatches() { ! Tag tag = new Tag( ! new TagData( ! 0,0,"a href=http://someurl.com/","" ! ) ! ); ! assertFalse("should not be an xml end tag",helper.isXmlEndTag(tag)); ! } } Index: StringParserTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/parserHelperTests/StringParserTest.java,v retrieving revision 1.34 retrieving revision 1.35 diff -C2 -d -r1.34 -r1.35 *** StringParserTest.java 24 Aug 2003 21:59:43 -0000 1.34 --- StringParserTest.java 3 Sep 2003 23:36:21 -0000 1.35 *************** *** 40,209 **** public class StringParserTest extends ParserTestCase { ! public StringParserTest(String name) { ! super(name); ! } ! ! /** ! * The bug being reproduced is this : <BR> ! * <HTML><HEAD><TITLE>Google</TITLE> <BR> ! * The above line is incorrectly parsed in that, the text Google is missed. ! * The presence of this bug is typically when some tag is identified before the string node is. (usually seen ! * with the end tag). The bug lies in NodeReader.readElement(). ! * Creation date: (6/17/2001 4:01:06 PM) ! */ ! public void testStringNodeBug1() throws ParserException { ! createParser("<HTML><HEAD><TITLE>Google</TITLE>"); ! parseAndAssertNodeCount(5); ! // The fourth node should be a HTMLStringNode- with the text - Google ! assertTrue("Fourth node should be a HTMLStringNode",node[3] instanceof StringNode); ! StringNode stringNode = (StringNode)node[3]; ! assertEquals("Text of the StringNode","Google",stringNode.getText()); ! } ! ! /** ! * Bug reported by Kaarle Kaila of Nokia<br> ! * For the following HTML : ! * view these documents, you must have <A href='http://www.adobe.com'>Adobe <br> ! * Acrobat Reader</A> installed on your computer.<br> ! * The first string before the link is not identified, and the space after the link is also not identified ! * Creation date: (8/2/2001 2:07:32 AM) ! */ ! public void testStringNodeBug2() throws ParserException { ! // Register the link scanner ! ! createParser("view these documents, you must have <A href='http://www.adobe.com'>Adobe \n"+ ! "Acrobat Reader</A> installed on your computer."); ! Parser.setLineSeparator("\r\n"); ! parser.addScanner(new LinkScanner("-l")); ! parseAndAssertNodeCount(3); ! // The first node should be a HTMLStringNode- with the text - view these documents, you must have ! assertTrue("First node should be a HTMLStringNode",node[0] instanceof StringNode); ! StringNode stringNode = (StringNode)node[0]; ! assertEquals("Text of the StringNode","view these documents, you must have ",stringNode.getText()); ! assertTrue("Second node should be a link node",node[1] instanceof LinkTag); ! LinkTag linkNode = (LinkTag)node[1]; ! assertEquals("Link is","http://www.adobe.com",linkNode.getLink()); ! assertEquals("Link text is","Adobe \r\nAcrobat Reader",linkNode.getLinkText()); ! ! assertTrue("Third node should be a string node",node[2] instanceof StringNode); ! StringNode stringNode2 = (StringNode)node[2]; ! assertEquals("Contents of third node"," installed on your computer.",stringNode2.getText()); ! } ! ! /** ! * Bug reported by Roger Sollberger<br> ! * For the following HTML : ! * <a href="http://asgard.ch">[< ASGARD ></a><br> ! * The string node is not correctly identified ! */ ! public void testTagCharsInStringNode() throws ParserException { ! createParser("<a href=\"http://asgard.ch\">[> ASGARD <]</a>"); ! parser.addScanner(new LinkScanner("-l")); ! parseAndAssertNodeCount(1); ! assertTrue("Node identified must be a link tag",node[0] instanceof LinkTag); ! LinkTag linkTag = (LinkTag) node[0]; ! assertEquals("[> ASGARD <]",linkTag.getLinkText()); ! assertEquals("http://asgard.ch",linkTag.getLink()); ! } ! ! public void testToPlainTextString() throws ParserException { ! createParser("<HTML><HEAD><TITLE>This is the Title</TITLE></HEAD><BODY>Hello World, this is the HTML Parser</BODY></HTML>"); ! parseAndAssertNodeCount(10); ! assertTrue("Fourth Node identified must be a string node",node[3] instanceof StringNode); ! StringNode stringNode = (StringNode)node[3]; ! assertEquals("First String Node","This is the Title",stringNode.toPlainTextString()); ! assertTrue("Eighth Node identified must be a string node",node[7] instanceof StringNode); ! stringNode = (StringNode)node[7]; ! assertEquals("Second string node","Hello World, this is the HTML Parser",stringNode.toPlainTextString()); ! } ! ! public void testToHTML() throws ParserException { ! createParser("<HTML><HEAD><TITLE>This is the Title</TITLE></HEAD><BODY>Hello World, this is the HTML Parser</BODY></HTML>"); ! parseAndAssertNodeCount(10); ! assertTrue("Fourth Node identified must be a string node",node[3] instanceof StringNode); ! StringNode stringNode = (StringNode)node[3]; ! assertEquals("First String Node","This is the Title",stringNode.toHtml()); ! assertTrue("Eighth Node identified must be a string node",node[7] instanceof StringNode); ! stringNode = (StringNode)node[7]; ! assertEquals("Second string node","Hello World, this is the HTML Parser",stringNode.toHtml()); ! } ! public void testEmptyLines() throws ParserException { ! createParser( ! "David Nirenberg (Center for Advanced Study in the Behavorial Sciences, Stanford).<br>\n"+ ! " \n"+ ! "<br>" ! ); ! parseAndAssertNodeCount(4); ! assertTrue("Third Node identified must be a string node",node[2] instanceof StringNode); ! } ! /** ! * This is a bug reported by John Zook (586222), where the first few chars ! * before a remark is being missed, if its on the same line. ! */ ! public void testStringBeingMissedBug() throws ParserException { ! createParser( ! "Before Comment <!-- Comment --> After Comment" ! ); ! parseAndAssertNodeCount(3); ! assertTrue("First node should be HTMLStringNode",node[0] instanceof StringNode); ! assertTrue("Second node should be HTMLRemarkNode",node[1] instanceof RemarkNode); ! assertTrue("Third node should be HTMLStringNode",node[2] instanceof StringNode); ! StringNode stringNode = (StringNode)node[0]; ! assertEquals("First String node contents","Before Comment ",stringNode.getText()); ! StringNode stringNode2 = (StringNode)node[2]; ! assertEquals("Second String node contents"," After Comment",stringNode2.getText()); ! RemarkNode remarkNode = (RemarkNode)node[1]; ! assertEquals("Remark Node contents"," Comment ",remarkNode.getText()); ! ! } ! /** ! * Based on a bug report submitted by Cedric Rosa, if the last line contains a single character, ! * StringNode does not return the string node correctly. ! */ ! public void testLastLineWithOneChar() throws ParserException { ! createParser("a"); ! parseAndAssertNodeCount(1); ! assertTrue("First node should be HTMLStringNode",node[0] instanceof StringNode); ! StringNode stringNode = (StringNode)node[0]; ! assertEquals("First String node contents","a",stringNode.getText()); ! } ! ! public void testStringWithEmptyLine() throws ParserException { ! createParser("a\n\nb"); ! parseAndAssertNodeCount(1); ! assertTrue("First node should be HTMLStringNode",node[0] instanceof StringNode); ! StringNode stringNode = (StringNode)node[0]; ! assertStringEquals("First String node contents","a\r\n\r\nb",stringNode.getText()); ! } ! ! /** ! * An attempt to reproduce bug 677176, which passes. ! * @throws Exception ! */ ! public void testStringParserBug() throws Exception { ! createParser( ! "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 " + "Transitional//EN\">" + "<html>" + "<head>" + "<title>Untitled Document</title>" + "<meta http-equiv=\"Content-Type\" content=\"text/html; " + "charset=iso-8859-1\">" + "</head>" + "<script language=\"JavaScript\" type=\"text/JavaScript\">" + "// if this fails, output a 'hello' " + "if (true) " + "{ " + "//something good... " + "} " + "</script>" + "<body>" + "</body>" + "</html>" ); ! parser.registerScanners(); ! parseAndAssertNodeCount(10); ! assertType("fourth node",MetaTag.class,node[4]); ! MetaTag metaTag = (MetaTag)node[4]; ! ! assertStringEquals( ! "content", ! "text/html; charset=iso-8859-1", ! metaTag.getAttribute("CONTENT") ! ); ! } ! ! public void testStringWithLineBreaks() throws Exception { ! createParser("Testing &\nRefactoring"); ! parseAndAssertNodeCount(1); ! assertType("first node",StringNode.class,node[0]); ! StringNode stringNode = (StringNode)node[0]; ! assertStringEquals("text","Testing &\r\nRefactoring",stringNode.toPlainTextString()); ! } ! } --- 40,227 ---- public class StringParserTest extends ParserTestCase { ! public StringParserTest(String name) { ! super(name); ! } ! ! /** ! * The bug being reproduced is this : <BR> ! * <HTML><HEAD><TITLE>Google</TITLE> <BR> ! * The above line is incorrectly parsed in that, the text Google is missed. ! * The presence of this bug is typically when some tag is identified before the string node is. (usually seen ! * with the end tag). The bug lies in NodeReader.readElement(). ! * Creation date: (6/17/2001 4:01:06 PM) ! */ ! public void testStringNodeBug1() throws ParserException { ! createParser("<HTML><HEAD><TITLE>Google</TITLE>"); ! parseAndAssertNodeCount(5); ! // The fourth node should be a HTMLStringNode- with the text - Google ! assertTrue("Fourth node should be a HTMLStringNode",node[3] instanceof StringNode); ! StringNode stringNode = (StringNode)node[3]; ! assertEquals("Text of the StringNode","Google",stringNode.getText()); ! } ! ! /** ! * Bug reported by Kaarle Kaila of Nokia<br> ! * For the following HTML : ! * view these documents, you must have <A href='http://www.adobe.com'>Adobe <br> ! * Acrobat Reader</A> installed on your computer.<br> ! * The first string before the link is not identified, and the space after the link is also not identified ! * Creation date: (8/2/2001 2:07:32 AM) ! */ ! public void testStringNodeBug2() throws ParserException { ! // Register the link scanner ! ! createParser("view these documents, you must have <A href='http://www.adobe.com'>Adobe \n"+ ! "Acrobat Reader</A> installed on your computer."); ! Parser.setLineSeparator("\r\n"); ! parser.addScanner(new LinkScanner("-l")); ! parseAndAssertNodeCount(3); ! // The first node should be a HTMLStringNode- with the text - view these documents, you must have ! assertTrue("First node should be a HTMLStringNode",node[0] instanceof StringNode); ! StringNode stringNode = (StringNode)node[0]; ! assertEquals("Text of the StringNode","view these documents, you must have ",stringNode.getText()); ! assertTrue("Second node should be a link node",node[1] instanceof LinkTag); ! LinkTag linkNode = (LinkTag)node[1]; ! assertEquals("Link is","http://www.adobe.com",linkNode.getLink()); ! assertEquals("Link text is","Adobe \r\nAcrobat Reader",linkNode.getLinkText()); ! ! assertTrue("Third node should be a string node",node[2] instanceof StringNode); ! StringNode stringNode2 = (StringNode)node[2]; ! assertEquals("Contents of third node"," installed on your computer.",stringNode2.getText()); ! } ! ! /** ! * Bug reported by Roger Sollberger<br> ! * For the following HTML : ! * <a href="http://asgard.ch">[< ASGARD ></a><br> ! * The string node is not correctly identified ! */ ! public void testTagCharsInStringNode() throws ParserException { ! createParser("<a href=\"http://asgard.ch\">[> ASGARD <]</a>"); ! parser.addScanner(new LinkScanner("-l")); ! parseAndAssertNodeCount(1); ! assertTrue("Node identified must be a link tag",node[0] instanceof LinkTag); ! LinkTag linkTag = (LinkTag) node[0]; ! assertEquals("[> ASGARD <]",linkTag.getLinkText()); ! assertEquals("http://asgard.ch",linkTag.getLink()); ! } ! ! public void testToPlainTextString() throws ParserException { ! createParser("<HTML><HEAD><TITLE>This is the Title</TITLE></HEAD><BODY>Hello World, this is the HTML Parser</BODY></HTML>"); ! parseAndAssertNodeCount(10); ! assertTrue("Fourth Node identified must be a string node",node[3] instanceof StringNode); ! StringNode stringNode = (StringNode)node[3]; ! assertEquals("First String Node","This is the Title",stringNode.toPlainTextString()); ! assertTrue("Eighth Node identified must be a string node",node[7] instanceof StringNode); ! stringNode = (StringNode)node[7]; ! assertEquals("Second string node","Hello World, this is the HTML Parser",stringNode.toPlainTextString()); ! } ! ! public void testToHTML() throws ParserException { ! createParser("<HTML><HEAD><TITLE>This is the Title</TITLE></HEAD><BODY>Hello World, this is the HTML Parser</BODY></HTML>"); ! parseAndAssertNodeCount(10); ! assertTrue("Fourth Node identified must be a string node",node[3] instanceof StringNode); ! StringNode stringNode = (StringNode)node[3]; ! assertEquals("First String Node","This is the Title",stringNode.toHtml()); ! assertTrue("Eighth Node identified must be a string node",node[7] instanceof StringNode); ! stringNode = (StringNode)node[7]; ! assertEquals("Second string node","Hello World, this is the HTML Parser",stringNode.toHtml()); ! } ! public void testEmptyLines() throws ParserException { ! createParser( ! "David Nirenberg (Center for Advanced Study in the Behavorial Sciences, Stanford).<br>\n"+ ! " \n"+ ! "<br>" ! ); ! parseAndAssertNodeCount(4); ! assertTrue("Third Node identified must be a string node",node[2] instanceof StringNode); ! } ! /** ! * This is a bug reported by John Zook (586222), where the first few chars ! * before a remark is being missed, if its on the same line. ! */ ! public void testStringBeingMissedBug() throws ParserException { ! createParser( ! "Before Comment <!-- Comment --> After Comment" ! ); ! parseAndAssertNodeCount(3); ! assertTrue("First node should be HTMLStringNode",node[0] instanceof StringNode); ! assertTrue("Second node should be HTMLRemarkNode",node[1] instanceof RemarkNode); ! assertTrue("Third node should be HTMLStringNode",node[2] instanceof StringNode); ! StringNode stringNode = (StringNode)node[0]; ! assertEquals("First String node contents","Before Comment ",stringNode.getText()); ! StringNode stringNode2 = (StringNode)node[2]; ! assertEquals("Second String node contents"," After Comment",stringNode2.getText()); ! RemarkNode remarkNode = (RemarkNode)node[1]; ! assertEquals("Remark Node contents"," Comment ",remarkNode.getText()); ! ! } ! /** ! * Based on a bug report submitted by Cedric Rosa, if the last line contains a single character, ! * StringNode does not return the string node correctly. ! */ ! public void testLastLineWithOneChar() throws ParserException { ! createParser("a"); ! parseAndAssertNodeCount(1); ! assertTrue("First node should be HTMLStringNode",node[0] instanceof StringNode); ! StringNode stringNode = (StringNode)node[0]; ! assertEquals("First String node contents","a",stringNode.getText()); ! } ! ! public void testStringWithEmptyLine() throws ParserException { ! createParser("a\n\nb"); ! parseAndAssertNodeCount(1); ! assertTrue("First node should be HTMLStringNode",node[0] instanceof StringNode); ! StringNode stringNode = (StringNode)node[0]; ! assertStringEquals("First String node contents","a\r\n\r\nb",stringNode.getText()); ! } ! ! /** ! * An attempt to reproduce bug 677176, which passes. ! * @throws Exception ! */ ! public void testStringParserBug() throws Exception { ! createParser( ! "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 " + ! "Transitional//EN\">" + ! "<html>" + ! "<head>" + ! "<title>Untitled Document</title>" + ! "<meta http-equiv=\"Content-Type\" content=\"text/html; " + ! "charset=iso-8859-1\">" + ! "</head>" + ! "<script language=\"JavaScript\" type=\"text/JavaScript\">" + ! "// if this fails, output a 'hello' " + ! "if (true) " + ! "{ " + ! "//something good... " + ! "} " + ! "</script>" + ! "<body>" + ! "</body>" + ! "</html>" ! ); ! parser.registerScanners(); ! parseAndAssertNodeCount(10); ! assertType("fourth node",MetaTag.class,node[4]); ! MetaTag metaTag = (MetaTag)node[4]; ! ! assertStringEquals( ! "content", ! "text/html; charset=iso-8859-1", ! metaTag.getAttribute("CONTENT") ! ); ! } ! ! public void testStringWithLineBreaks() throws Exception { ! createParser("Testing &\nRefactoring"); ! parseAndAssertNodeCount(1); ! assertType("first node",StringNode.class,node[0]); ! StringNode stringNode = (StringNode)node[0]; ! assertStringEquals("text","Testing &\r\nRefactoring",stringNode.toPlainTextString()); ! } ! } |
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/nodeDecoratorTests In directory sc8-pr-cvs1:/tmp/cvs-serv31228/tests/nodeDecoratorTests Modified Files: AllTests.java DecodingNodeTest.java EscapeCharacterRemovingNodeTest.java NonBreakingSpaceConvertingNodeTest.java Log Message: Change tabs to spaces in all source files. Index: AllTests.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/nodeDecoratorTests/AllTests.java,v retrieving revision 1.7 retrieving revision 1.8 diff -C2 -d -r1.7 -r1.8 *** AllTests.java 24 Aug 2003 21:59:43 -0000 1.7 --- AllTests.java 3 Sep 2003 23:36:21 -0000 1.8 *************** *** 33,47 **** public class AllTests extends junit.framework.TestCase { ! public AllTests(String name) { ! super(name); ! } ! public static TestSuite suite() { ! TestSuite suite = new TestSuite("Node Decorator Tests"); ! suite.addTestSuite(DecodingNodeTest.class); ! suite.addTestSuite(EscapeCharacterRemovingNodeTest.class); ! suite.addTestSuite(NonBreakingSpaceConvertingNodeTest.class); ! return suite; ! } /** --- 33,47 ---- public class AllTests extends junit.framework.TestCase { ! public AllTests(String name) { ! super(name); ! } ! public static TestSuite suite() { ! TestSuite suite = new TestSuite("Node Decorator Tests"); ! suite.addTestSuite(DecodingNodeTest.class); ! suite.addTestSuite(EscapeCharacterRemovingNodeTest.class); ! suite.addTestSuite(NonBreakingSpaceConvertingNodeTest.class); ! return suite; ! } /** Index: DecodingNodeTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/nodeDecoratorTests/DecodingNodeTest.java,v retrieving revision 1.8 retrieving revision 1.9 diff -C2 -d -r1.8 -r1.9 *** DecodingNodeTest.java 24 Aug 2003 21:59:43 -0000 1.8 --- DecodingNodeTest.java 3 Sep 2003 23:36:21 -0000 1.9 *************** *** 38,131 **** public class DecodingNodeTest extends ParserTestCase { ! public DecodingNodeTest(String name) { ! super(name); ! } ! private String parseToObtainDecodedResult(String STRING_TO_DECODE) ! throws ParserException { ! StringBuffer decodedContent = new StringBuffer(); ! StringNodeFactory stringNodeFactory = new StringNodeFactory(); ! stringNodeFactory.setNodeDecoding(true); ! createParser(STRING_TO_DECODE); ! parser.setStringNodeFactory(stringNodeFactory); ! NodeIterator nodes = parser.elements(); ! ! while (nodes.hasMoreNodes()) ! decodedContent.append(nodes.nextNode().toPlainTextString()); ! return decodedContent.toString(); ! } ! public void testAmpersand() throws Exception { ! String ENCODED_WORKSHOP_TITLE = ! "The Testing & Refactoring Workshop"; ! ! String DECODED_WORKSHOP_TITLE = ! "The Testing & Refactoring Workshop"; ! assertEquals( ! "ampersand in string", ! DECODED_WORKSHOP_TITLE, ! parseToObtainDecodedResult(ENCODED_WORKSHOP_TITLE)); ! } ! public void testNumericReference() throws Exception { ! String ENCODED_DIVISION_SIGN = ! "÷ is the division sign."; ! ! String DECODED_DIVISION_SIGN = ! "÷ is the division sign."; ! ! assertEquals( ! "numeric reference for division sign", ! DECODED_DIVISION_SIGN, ! parseToObtainDecodedResult(ENCODED_DIVISION_SIGN)); ! } ! ! ! public void testReferencesInString () throws Exception { ! String ENCODED_REFERENCE_IN_STRING = ! "Thus, the character entity reference ÷ is a more convenient" + ! " form than ÷ for obtaining the division sign (÷)"; ! ! String DECODED_REFERENCE_IN_STRING = ! "Thus, the character entity reference ÷ is a more convenient" + ! " form than ÷ for obtaining the division sign (÷)"; ! ! assertEquals ( ! "character references within a string", ! DECODED_REFERENCE_IN_STRING, ! parseToObtainDecodedResult(ENCODED_REFERENCE_IN_STRING)); ! } ! public void testBogusCharacterEntityReference() throws Exception { ! ! String ENCODED_BOGUS_CHARACTER_ENTITY = ! "The character entity reference &divode; is bogus"; ! ! String DECODED_BOGUS_CHARACTER_ENTITY = ! "The character entity reference &divode; is bogus"; ! ! assertEquals ( ! "bogus character entity reference", ! DECODED_BOGUS_CHARACTER_ENTITY, ! parseToObtainDecodedResult(ENCODED_BOGUS_CHARACTER_ENTITY)); ! } ! ! public void testDecodingNonBreakingSpaceDoesNotOccur() throws Exception { ! ! String ENCODED_WITH_NON_BREAKING_SPACE = ! "Here is string with \u00a0."; ! ! String DECODED_WITH_NON_BREAKING_SPACE = ! "Here is string with \u00a0."; ! ! assertEquals ( ! "bogus character entity reference", ! DECODED_WITH_NON_BREAKING_SPACE, ! parseToObtainDecodedResult(ENCODED_WITH_NON_BREAKING_SPACE)); ! } ! ! } --- 38,131 ---- public class DecodingNodeTest extends ParserTestCase { ! public DecodingNodeTest(String name) { ! super(name); ! } ! private String parseToObtainDecodedResult(String STRING_TO_DECODE) ! throws ParserException { ! StringBuffer decodedContent = new StringBuffer(); ! StringNodeFactory stringNodeFactory = new StringNodeFactory(); ! stringNodeFactory.setNodeDecoding(true); ! createParser(STRING_TO_DECODE); ! parser.setStringNodeFactory(stringNodeFactory); ! NodeIterator nodes = parser.elements(); ! ! while (nodes.hasMoreNodes()) ! decodedContent.append(nodes.nextNode().toPlainTextString()); ! return decodedContent.toString(); ! } ! public void testAmpersand() throws Exception { ! String ENCODED_WORKSHOP_TITLE = ! "The Testing & Refactoring Workshop"; ! ! String DECODED_WORKSHOP_TITLE = ! "The Testing & Refactoring Workshop"; ! assertEquals( ! "ampersand in string", ! DECODED_WORKSHOP_TITLE, ! parseToObtainDecodedResult(ENCODED_WORKSHOP_TITLE)); ! } ! public void testNumericReference() throws Exception { ! String ENCODED_DIVISION_SIGN = ! "÷ is the division sign."; ! ! String DECODED_DIVISION_SIGN = ! "÷ is the division sign."; ! ! assertEquals( ! "numeric reference for division sign", ! DECODED_DIVISION_SIGN, ! parseToObtainDecodedResult(ENCODED_DIVISION_SIGN)); ! } ! ! ! public void testReferencesInString () throws Exception { ! String ENCODED_REFERENCE_IN_STRING = ! "Thus, the character entity reference ÷ is a more convenient" + ! " form than ÷ for obtaining the division sign (÷)"; ! ! String DECODED_REFERENCE_IN_STRING = ! "Thus, the character entity reference ÷ is a more convenient" + ! " form than ÷ for obtaining the division sign (÷)"; ! ! assertEquals ( ! "character references within a string", ! DECODED_REFERENCE_IN_STRING, ! parseToObtainDecodedResult(ENCODED_REFERENCE_IN_STRING)); ! } ! public void testBogusCharacterEntityReference() throws Exception { ! ! String ENCODED_BOGUS_CHARACTER_ENTITY = ! "The character entity reference &divode; is bogus"; ! ! String DECODED_BOGUS_CHARACTER_ENTITY = ! "The character entity reference &divode; is bogus"; ! ! assertEquals ( ! "bogus character entity reference", ! DECODED_BOGUS_CHARACTER_ENTITY, ! parseToObtainDecodedResult(ENCODED_BOGUS_CHARACTER_ENTITY)); ! } ! ! public void testDecodingNonBreakingSpaceDoesNotOccur() throws Exception { ! ! String ENCODED_WITH_NON_BREAKING_SPACE = ! "Here is string with \u00a0."; ! ! String DECODED_WITH_NON_BREAKING_SPACE = ! "Here is string with \u00a0."; ! ! assertEquals ( ! "bogus character entity reference", ! DECODED_WITH_NON_BREAKING_SPACE, ! parseToObtainDecodedResult(ENCODED_WITH_NON_BREAKING_SPACE)); ! } ! ! } Index: EscapeCharacterRemovingNodeTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/nodeDecoratorTests/EscapeCharacterRemovingNodeTest.java,v retrieving revision 1.8 retrieving revision 1.9 diff -C2 -d -r1.8 -r1.9 *** EscapeCharacterRemovingNodeTest.java 24 Aug 2003 21:59:43 -0000 1.8 --- EscapeCharacterRemovingNodeTest.java 3 Sep 2003 23:36:21 -0000 1.9 *************** *** 37,112 **** public class EscapeCharacterRemovingNodeTest extends ParserTestCase { ! public EscapeCharacterRemovingNodeTest(String name) { ! super(name); ! } ! private String parseToObtainDecodedResult(String STRING_TO_DECODE) ! throws ParserException { ! StringBuffer decodedContent = new StringBuffer(); ! StringNodeFactory stringNodeFactory = new StringNodeFactory(); ! stringNodeFactory.setEscapeCharacterRemoval(true); ! createParser(STRING_TO_DECODE); ! parser.setStringNodeFactory(stringNodeFactory); ! NodeIterator nodes = parser.elements(); ! ! while (nodes.hasMoreNodes()) ! decodedContent.append(nodes.nextNode().toPlainTextString()); ! return decodedContent.toString(); ! } ! public void testTab() throws Exception { ! String ENCODED_WORKSHOP_TITLE = ! "The Testing & Refactoring Workshop\tCreated by Industrial Logic, Inc."; ! ! String DECODED_WORKSHOP_TITLE = ! "The Testing & Refactoring WorkshopCreated by Industrial Logic, Inc."; ! assertEquals( ! "tab in string", ! DECODED_WORKSHOP_TITLE, ! parseToObtainDecodedResult(ENCODED_WORKSHOP_TITLE)); ! } ! ! public void testCarriageReturn() throws Exception { ! String ENCODED_WORKSHOP_TITLE = ! "The Testing & Refactoring Workshop\nCreated by Industrial Logic, Inc.\n"; ! ! String DECODED_WORKSHOP_TITLE = ! "The Testing & Refactoring WorkshopCreated by Industrial Logic, Inc."; ! assertEquals( ! "tab in string", ! DECODED_WORKSHOP_TITLE, ! parseToObtainDecodedResult(ENCODED_WORKSHOP_TITLE)); ! } ! ! public void testWithDecodingNodeDecorator() throws Exception { ! String ENCODED_WORKSHOP_TITLE = ! "The Testing & Refactoring Workshop\nCreated by Industrial Logic, Inc.\n"; ! ! String DECODED_WORKSHOP_TITLE = ! "The Testing & Refactoring WorkshopCreated by Industrial Logic, Inc."; ! StringBuffer decodedContent = new StringBuffer(); ! ! StringNodeFactory stringNodeFactory = new StringNodeFactory(); ! stringNodeFactory.setNodeDecoding(true); ! stringNodeFactory.setEscapeCharacterRemoval(true); ! ! createParser(ENCODED_WORKSHOP_TITLE); ! parser.setStringNodeFactory(stringNodeFactory); ! NodeIterator nodes = parser.elements(); ! ! while (nodes.hasMoreNodes()) ! decodedContent.append(nodes.nextNode().toPlainTextString()); ! assertEquals( ! "tab in string", ! DECODED_WORKSHOP_TITLE, ! decodedContent.toString()); ! ! } } --- 37,112 ---- public class EscapeCharacterRemovingNodeTest extends ParserTestCase { ! public EscapeCharacterRemovingNodeTest(String name) { ! super(name); ! } ! private String parseToObtainDecodedResult(String STRING_TO_DECODE) ! throws ParserException { ! StringBuffer decodedContent = new StringBuffer(); ! StringNodeFactory stringNodeFactory = new StringNodeFactory(); ! stringNodeFactory.setEscapeCharacterRemoval(true); ! createParser(STRING_TO_DECODE); ! parser.setStringNodeFactory(stringNodeFactory); ! NodeIterator nodes = parser.elements(); ! ! while (nodes.hasMoreNodes()) ! decodedContent.append(nodes.nextNode().toPlainTextString()); ! return decodedContent.toString(); ! } ! public void testTab() throws Exception { ! String ENCODED_WORKSHOP_TITLE = ! "The Testing & Refactoring Workshop\tCreated by Industrial Logic, Inc."; ! ! String DECODED_WORKSHOP_TITLE = ! "The Testing & Refactoring WorkshopCreated by Industrial Logic, Inc."; ! assertEquals( ! "tab in string", ! DECODED_WORKSHOP_TITLE, ! parseToObtainDecodedResult(ENCODED_WORKSHOP_TITLE)); ! } ! ! public void testCarriageReturn() throws Exception { ! String ENCODED_WORKSHOP_TITLE = ! "The Testing & Refactoring Workshop\nCreated by Industrial Logic, Inc.\n"; ! ! String DECODED_WORKSHOP_TITLE = ! "The Testing & Refactoring WorkshopCreated by Industrial Logic, Inc."; ! assertEquals( ! "tab in string", ! DECODED_WORKSHOP_TITLE, ! parseToObtainDecodedResult(ENCODED_WORKSHOP_TITLE)); ! } ! ! public void testWithDecodingNodeDecorator() throws Exception { ! String ENCODED_WORKSHOP_TITLE = ! "The Testing & Refactoring Workshop\nCreated by Industrial Logic, Inc.\n"; ! ! String DECODED_WORKSHOP_TITLE = ! "The Testing & Refactoring WorkshopCreated by Industrial Logic, Inc."; ! StringBuffer decodedContent = new StringBuffer(); ! ! StringNodeFactory stringNodeFactory = new StringNodeFactory(); ! stringNodeFactory.setNodeDecoding(true); ! stringNodeFactory.setEscapeCharacterRemoval(true); ! ! createParser(ENCODED_WORKSHOP_TITLE); ! parser.setStringNodeFactory(stringNodeFactory); ! NodeIterator nodes = parser.elements(); ! ! while (nodes.hasMoreNodes()) ! decodedContent.append(nodes.nextNode().toPlainTextString()); ! assertEquals( ! "tab in string", ! DECODED_WORKSHOP_TITLE, ! decodedContent.toString()); ! ! } } Index: NonBreakingSpaceConvertingNodeTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/nodeDecoratorTests/NonBreakingSpaceConvertingNodeTest.java,v retrieving revision 1.7 retrieving revision 1.8 diff -C2 -d -r1.7 -r1.8 *** NonBreakingSpaceConvertingNodeTest.java 24 Aug 2003 21:59:43 -0000 1.7 --- NonBreakingSpaceConvertingNodeTest.java 3 Sep 2003 23:36:21 -0000 1.8 *************** *** 37,86 **** public class NonBreakingSpaceConvertingNodeTest extends ParserTestCase { ! public NonBreakingSpaceConvertingNodeTest(String name) { ! super(name); ! } ! private String parseToObtainDecodedResult(String STRING_TO_DECODE) ! throws ParserException { ! StringBuffer decodedContent = new StringBuffer(); ! ! StringNodeFactory stringNodeFactory = new StringNodeFactory(); ! stringNodeFactory.setNonBreakSpaceConversion(true); ! createParser(STRING_TO_DECODE); ! parser.setStringNodeFactory(stringNodeFactory); ! NodeIterator nodes = parser.elements(); ! ! while (nodes.hasMoreNodes()) ! decodedContent.append(nodes.nextNode().toPlainTextString()); ! return decodedContent.toString(); ! } ! public void testOneNonBreakingSpace() throws Exception { ! String ENCODED_WITH_NON_BREAKING_SPACE = ! "Here is string with \u00a0 inside of it."; ! ! String DECODED_WITH_NON_BREAKING_SPACE = ! "Here is string with inside of it."; ! ! assertEquals ( ! "\u00a0 was converted to a space correctly", ! DECODED_WITH_NON_BREAKING_SPACE, ! parseToObtainDecodedResult(ENCODED_WITH_NON_BREAKING_SPACE)); ! } ! ! public void testMultipleNonBreakingSpace() throws Exception { ! String ENCODED_WITH_NON_BREAKING_SPACE = ! "\u00a0Here is string with \u00a0 inside of it\u00a0."; ! ! String DECODED_WITH_NON_BREAKING_SPACE = ! " Here is string with inside of it ."; ! ! assertEquals ( ! "\u00a0 was converted to a space correctly", ! DECODED_WITH_NON_BREAKING_SPACE, ! parseToObtainDecodedResult(ENCODED_WITH_NON_BREAKING_SPACE)); ! } ! } --- 37,86 ---- public class NonBreakingSpaceConvertingNodeTest extends ParserTestCase { ! public NonBreakingSpaceConvertingNodeTest(String name) { ! super(name); ! } ! private String parseToObtainDecodedResult(String STRING_TO_DECODE) ! throws ParserException { ! StringBuffer decodedContent = new StringBuffer(); ! ! StringNodeFactory stringNodeFactory = new StringNodeFactory(); ! stringNodeFactory.setNonBreakSpaceConversion(true); ! createParser(STRING_TO_DECODE); ! parser.setStringNodeFactory(stringNodeFactory); ! NodeIterator nodes = parser.elements(); ! ! while (nodes.hasMoreNodes()) ! decodedContent.append(nodes.nextNode().toPlainTextString()); ! return decodedContent.toString(); ! } ! public void testOneNonBreakingSpace() throws Exception { ! String ENCODED_WITH_NON_BREAKING_SPACE = ! "Here is string with \u00a0 inside of it."; ! ! String DECODED_WITH_NON_BREAKING_SPACE = ! "Here is string with inside of it."; ! ! assertEquals ( ! "\u00a0 was converted to a space correctly", ! DECODED_WITH_NON_BREAKING_SPACE, ! parseToObtainDecodedResult(ENCODED_WITH_NON_BREAKING_SPACE)); ! } ! ! public void testMultipleNonBreakingSpace() throws Exception { ! String ENCODED_WITH_NON_BREAKING_SPACE = ! "\u00a0Here is string with \u00a0 inside of it\u00a0."; ! ! String DECODED_WITH_NON_BREAKING_SPACE = ! " Here is string with inside of it ."; ! ! assertEquals ( ! "\u00a0 was converted to a space correctly", ! DECODED_WITH_NON_BREAKING_SPACE, ! parseToObtainDecodedResult(ENCODED_WITH_NON_BREAKING_SPACE)); ! } ! } |
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/lexerTests In directory sc8-pr-cvs1:/tmp/cvs-serv31228/tests/lexerTests Modified Files: AllTests.java LexerTests.java PageIndexTests.java PageTests.java SourceTests.java StreamTests.java Log Message: Change tabs to spaces in all source files. Index: AllTests.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/lexerTests/AllTests.java,v retrieving revision 1.7 retrieving revision 1.8 diff -C2 -d -r1.7 -r1.8 *** AllTests.java 24 Aug 2003 21:59:43 -0000 1.7 --- AllTests.java 3 Sep 2003 23:36:21 -0000 1.8 *************** *** 34,45 **** public class AllTests extends TestCase { ! public AllTests (String name) { ! super (name); ! } ! public static TestSuite suite () { ! TestSuite suite = new TestSuite ("Lexer Tests"); suite.addTestSuite (StreamTests.class); suite.addTestSuite (SourceTests.class); --- 34,45 ---- public class AllTests extends TestCase { ! public AllTests (String name) { ! super (name); ! } ! public static TestSuite suite () { ! TestSuite suite = new TestSuite ("Lexer Tests"); suite.addTestSuite (StreamTests.class); suite.addTestSuite (SourceTests.class); *************** *** 48,52 **** suite.addTestSuite (LexerTests.class); return suite; ! } /** --- 48,52 ---- suite.addTestSuite (LexerTests.class); return suite; ! } /** Index: LexerTests.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/lexerTests/LexerTests.java,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** LexerTests.java 24 Aug 2003 21:59:43 -0000 1.3 --- LexerTests.java 3 Sep 2003 23:36:21 -0000 1.4 *************** *** 61,68 **** * Test the Lexer class. */ ! public LexerTests (String name) { ! super (name); ! } /** --- 61,68 ---- * Test the Lexer class. */ ! public LexerTests (String name) { ! super (name); ! } /** *************** *** 314,318 **** StringBuffer buffer; int i; ! String html; long old_total; --- 314,318 ---- StringBuffer buffer; int i; ! String html; long old_total; *************** *** 379,383 **** StringBuffer buffer; int i; ! String html; long old_total; --- 379,383 ---- StringBuffer buffer; int i; ! String html; long old_total; *************** *** 449,453 **** StringBuffer buffer; int i; ! String html; InputStream stream; --- 449,453 ---- StringBuffer buffer; int i; ! String html; InputStream stream; *************** *** 517,521 **** StringBuffer buffer; int i; ! String html; InputStream stream; --- 517,521 ---- StringBuffer buffer; int i; ! String html; InputStream stream; Index: PageIndexTests.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/lexerTests/PageIndexTests.java,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** PageIndexTests.java 24 Aug 2003 21:59:43 -0000 1.4 --- PageIndexTests.java 3 Sep 2003 23:36:21 -0000 1.5 *************** *** 39,46 **** * Test the end-of-line index class. */ ! public PageIndexTests (String name) { ! super (name); ! } public void testAppend1 () --- 39,46 ---- * Test the end-of-line index class. */ ! public PageIndexTests (String name) { ! super (name); ! } public void testAppend1 () Index: PageTests.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/lexerTests/PageTests.java,v retrieving revision 1.6 retrieving revision 1.7 diff -C2 -d -r1.6 -r1.7 *** PageTests.java 24 Aug 2003 21:59:43 -0000 1.6 --- PageTests.java 3 Sep 2003 23:36:21 -0000 1.7 *************** *** 52,64 **** * Test the third level page class. */ ! public PageTests (String name) { ! super (name); ! } /** * Test initialization with a null value. */ ! public void testNull () throws ParserException { Page page; --- 52,64 ---- * Test the third level page class. */ ! public PageTests (String name) { ! super (name); ! } /** * Test initialization with a null value. */ ! public void testNull () throws ParserException { Page page; *************** *** 88,92 **** * Test initialization with a real value. */ ! public void testURLConnection () throws ParserException, IOException { String link; --- 88,92 ---- * Test initialization with a real value. */ ! public void testURLConnection () throws ParserException, IOException { String link; *************** *** 102,106 **** * Test initialization with non-existant URL. */ ! public void testBadURLConnection () throws IOException { String link; --- 102,106 ---- * Test initialization with non-existant URL. */ ! public void testBadURLConnection () throws IOException { String link; Index: SourceTests.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/lexerTests/SourceTests.java,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -d -r1.5 -r1.6 *** SourceTests.java 24 Aug 2003 21:59:43 -0000 1.5 --- SourceTests.java 3 Sep 2003 23:36:21 -0000 1.6 *************** *** 58,70 **** * Test the first level stream class. */ ! public SourceTests (String name) { ! super (name); ! } /** * Test initialization with a null value. */ ! public void testNull () throws IOException { Source source; --- 58,70 ---- * Test the first level stream class. */ ! public SourceTests (String name) { ! super (name); ! } /** * Test initialization with a null value. */ ! public void testNull () throws IOException { Source source; *************** *** 77,81 **** * Test initialization with a null charset name. */ ! public void testEmpty () throws IOException { Source source; --- 77,81 ---- * Test initialization with a null charset name. */ ! public void testEmpty () throws IOException { Source source; *************** *** 88,92 **** * Test initialization with an input stream having only one byte. */ ! public void testOneByte () throws IOException { Source source; --- 88,92 ---- * Test initialization with an input stream having only one byte. */ ! public void testOneByte () throws IOException { Source source; *************** *** 100,104 **** * Test close. */ ! public void testClose () throws IOException { Source source; --- 100,104 ---- * Test close. */ ! public void testClose () throws IOException { Source source; *************** *** 121,125 **** * Test reset. */ ! public void testReset () throws IOException { String reference; --- 121,125 ---- * Test reset. */ ! public void testReset () throws IOException { String reference; *************** *** 145,149 **** * Test reset in the middle of reading. */ ! public void testMidReset () throws IOException { String reference; --- 145,149 ---- * Test reset in the middle of reading. */ ! public void testMidReset () throws IOException { String reference; *************** *** 169,173 **** * Test mark/reset in the middle of reading. */ ! public void testMarkReset () throws IOException { String reference; --- 169,173 ---- * Test mark/reset in the middle of reading. */ ! public void testMarkReset () throws IOException { String reference; *************** *** 195,199 **** * Test skip. */ ! public void testSkip () throws IOException { String part1; --- 195,199 ---- * Test skip. */ ! public void testSkip () throws IOException { String part1; *************** *** 223,227 **** * Test multi-byte read. */ ! public void testMultByte () throws IOException { String reference; --- 223,227 ---- * Test multi-byte read. */ ! public void testMultByte () throws IOException { String reference; *************** *** 241,245 **** * Test positioned multi-byte read. */ ! public void testPositionedMultByte () throws IOException { String part1; --- 241,245 ---- * Test positioned multi-byte read. */ ! public void testPositionedMultByte () throws IOException { String part1; *************** *** 273,277 **** * Test ready. */ ! public void testReady () throws IOException { Source source; --- 273,277 ---- * Test ready. */ ! public void testReady () throws IOException { Source source; *************** *** 305,311 **** chars1 = new ArrayList (); chars2 = new ArrayList (); ! try ! { ! url = new URL (link); connection = url.openConnection (); connection.connect (); --- 305,311 ---- chars1 = new ArrayList (); chars2 = new ArrayList (); ! try ! { ! url = new URL (link); connection = url.openConnection (); connection.connect (); *************** *** 329,334 **** } assertTrue ("extra characters", index == chars2.size ()); ! } ! catch (MalformedURLException murle) { fail ("bad url " + link); --- 329,334 ---- } assertTrue ("extra characters", index == chars2.size ()); ! } ! catch (MalformedURLException murle) { fail ("bad url " + link); Index: StreamTests.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/lexerTests/StreamTests.java,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** StreamTests.java 24 Aug 2003 21:59:43 -0000 1.4 --- StreamTests.java 3 Sep 2003 23:36:21 -0000 1.5 *************** *** 48,60 **** * Test the first level stream class. */ ! public StreamTests (String name) { ! super (name); ! } /** * Test initialization with a null value. */ ! public void testNull () throws IOException { Stream stream; --- 48,60 ---- * Test the first level stream class. */ ! public StreamTests (String name) { ! super (name); ! } /** * Test initialization with a null value. */ ! public void testNull () throws IOException { Stream stream; *************** *** 67,71 **** * Test initialization with an empty input stream. */ ! public void testEmpty () throws IOException { Stream stream; --- 67,71 ---- * Test initialization with an empty input stream. */ ! public void testEmpty () throws IOException { Stream stream; *************** *** 78,82 **** * Test initialization with an input stream having only one byte. */ ! public void testOneByte () throws IOException { Stream stream; --- 78,82 ---- * Test initialization with an input stream having only one byte. */ ! public void testOneByte () throws IOException { Stream stream; *************** *** 106,112 **** bytes1 = new ArrayList (); bytes2 = new ArrayList (); ! try ! { ! url = new URL (link); connection = url.openConnection (); connection.connect (); --- 106,112 ---- bytes1 = new ArrayList (); bytes2 = new ArrayList (); ! try ! { ! url = new URL (link); connection = url.openConnection (); connection.connect (); *************** *** 130,135 **** } assertTrue ("extra bytes", index == bytes2.size ()); ! } ! catch (MalformedURLException murle) { fail ("bad url " + link); --- 130,135 ---- } assertTrue ("extra bytes", index == bytes2.size ()); ! } ! catch (MalformedURLException murle) { fail ("bad url " + link); *************** *** 177,183 **** // pick a big file link = "http://htmlparser.sourceforge.net/javadoc_1_3/index-all.html"; ! try ! { ! url = new URL (link); // estimate the connection speed --- 177,183 ---- // pick a big file link = "http://htmlparser.sourceforge.net/javadoc_1_3/index-all.html"; ! try ! { ! url = new URL (link); // estimate the connection speed *************** *** 263,268 **** assertTrue ("slower (" + time2 + ") vs. (" + time1 + ")", time2 < time1); assertTrue ("average available bytes not greater (" + available2/samples + ") vs. (" + available1/samples + ")", available2 > available1); ! } ! catch (MalformedURLException murle) { fail ("bad url " + link); --- 263,268 ---- assertTrue ("slower (" + time2 + ") vs. (" + time1 + ")", time2 < time1); assertTrue ("average available bytes not greater (" + available2/samples + ") vs. (" + available1/samples + ")", available2 > available1); ! } ! catch (MalformedURLException murle) { fail ("bad url " + link); *************** *** 288,294 **** bytes1 = new ArrayList (); bytes2 = new ArrayList (); ! try ! { ! url = new URL (link); connection = url.openConnection (); connection.connect (); --- 288,294 ---- bytes1 = new ArrayList (); bytes2 = new ArrayList (); ! try ! { ! url = new URL (link); connection = url.openConnection (); connection.connect (); *************** *** 338,343 **** index++; } ! } ! catch (MalformedURLException murle) { fail ("bad url " + link); --- 338,343 ---- index++; } ! } ! catch (MalformedURLException murle) { fail ("bad url " + link); *************** *** 363,369 **** bytes1 = new ArrayList (); bytes2 = new ArrayList (); ! try ! { ! url = new URL (link); connection = url.openConnection (); connection.connect (); --- 363,369 ---- bytes1 = new ArrayList (); bytes2 = new ArrayList (); ! try ! { ! url = new URL (link); connection = url.openConnection (); connection.connect (); *************** *** 414,419 **** index++; } ! } ! catch (MalformedURLException murle) { fail ("bad url " + link); --- 414,419 ---- index++; } ! } ! catch (MalformedURLException murle) { fail ("bad url " + link); *************** *** 424,428 **** * Test close. */ ! public void testClose () throws IOException { Stream stream; --- 424,428 ---- * Test close. */ ! public void testClose () throws IOException { Stream stream; |
From: <der...@us...> - 2003-09-02 00:42:01
|
Update of /cvsroot/htmlparser/htmlparser/docs/samples In directory sc8-pr-cvs1:/tmp/cvs-serv4306 Modified Files: index.html Log Message: Further to bug #786869 LinkExtractor Sample not working, The original samples directory on the web page either needs to be revamped or removed and the missing pieces moved to the wiki pages: http://htmlparser.sourceforge.net/docs/index.php/SamplePrograms Until then I've flagged the samples directory as out of date in the index.html file. I'll add the task to the list of requests for feature enhancements. Index: index.html =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/docs/samples/index.html,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** index.html 24 Dec 2002 06:05:37 -0000 1.4 --- index.html 2 Sep 2003 00:41:56 -0000 1.5 *************** *** 8,11 **** --- 8,16 ---- <body> <h3><font size="4"><strong>Sample Programs </strong></font></h3> + <strong>WARNING: These examples are outdated. Except for the embedded links article, + they need to be reworked to the most recent version of HTML Parser. + <p>Please see <a + href="http://htmlparser.sourceforge.net/docs/index.php/SamplePrograms">WikiPages + Sample Programs</a> for more recent versions.</strong> <p>We provide below some commonly-used sample programs that were created using HTMLParser. Going through these programs will give you an idea of the design |
From: <der...@us...> - 2003-09-01 22:02:52
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests In directory sc8-pr-cvs1:/tmp/cvs-serv11743 Modified Files: AppletScannerTest.java Log Message: Further to bug #798554 Applet Tag does not update codebase data, fix test case. ******************** Note: AppletTag getAttribute() no longer retrieves the parameter, it returns the Tag.getAttribute() value like it should. Use AttributeTag.getParameter() to access the list of parameters of the applet tag., ******************** Index: AppletScannerTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests/AppletScannerTest.java,v retrieving revision 1.23 retrieving revision 1.24 diff -C2 -d -r1.23 -r1.24 *** AppletScannerTest.java 24 Aug 2003 21:59:43 -0000 1.23 --- AppletScannerTest.java 1 Sep 2003 22:02:48 -0000 1.24 *************** *** 39,88 **** { ! public AppletScannerTest(String name) { ! super(name); ! } ! ! public void testEvaluate() ! { ! AppletScanner scanner = new AppletScanner("-a"); ! boolean retVal = scanner.evaluate(" Applet ",null); ! assertEquals("Evaluation of APPLET tag",new Boolean(true),new Boolean(retVal)); ! } ! public void testScan() throws ParserException ! { ! String [][]paramsData = {{"Param1","Value1"},{"Name","Somik"},{"Age","23"}}; ! Hashtable paramsMap = new Hashtable(); ! String testHTML = new String("<APPLET CODE=Myclass.class ARCHIVE=test.jar CODEBASE=www.kizna.com>\n"); ! for (int i = 0;i<paramsData.length;i++) ! { ! testHTML+="<PARAM NAME=\""+paramsData[i][0]+"\" VALUE=\""+paramsData[i][1]+"\">\n"; ! paramsMap.put(paramsData[i][0],paramsData[i][1]); ! } ! testHTML+= ! "</APPLET>\n"+ ! "</HTML>"; ! createParser(testHTML); ! ! // Register the applet scanner ! parser.addScanner(new AppletScanner("-a")); ! ! parseAndAssertNodeCount(2); ! assertTrue("Node should be an applet tag",node[0] instanceof AppletTag); ! // Check the data in the applet tag ! AppletTag appletTag = (AppletTag)node[0]; ! assertEquals("Class Name","Myclass.class",appletTag.getAppletClass()); ! assertEquals("Archive","test.jar",appletTag.getArchive()); ! assertEquals("Codebase","www.kizna.com",appletTag.getCodeBase()); ! // Check the params data ! int cnt = 0; ! for (Enumeration e = appletTag.getParameterNames();e.hasMoreElements();) ! { ! String paramName = (String)e.nextElement(); ! String paramValue = appletTag.getAttribute(paramName); ! assertEquals("Param "+cnt+" value",paramsMap.get(paramName),paramValue); ! cnt++; ! } ! assertEquals("Number of params",new Integer(paramsData.length),new Integer(cnt)); ! } } --- 39,88 ---- { ! public AppletScannerTest(String name) { ! super(name); ! } ! ! public void testEvaluate() ! { ! AppletScanner scanner = new AppletScanner("-a"); ! boolean retVal = scanner.evaluate(" Applet ",null); ! assertEquals("Evaluation of APPLET tag",new Boolean(true),new Boolean(retVal)); ! } ! public void testScan() throws ParserException ! { ! String [][]paramsData = {{"Param1","Value1"},{"Name","Somik"},{"Age","23"}}; ! Hashtable paramsMap = new Hashtable(); ! String testHTML = new String("<APPLET CODE=Myclass.class ARCHIVE=test.jar CODEBASE=www.kizna.com>\n"); ! for (int i = 0;i<paramsData.length;i++) ! { ! testHTML+="<PARAM NAME=\""+paramsData[i][0]+"\" VALUE=\""+paramsData[i][1]+"\">\n"; ! paramsMap.put(paramsData[i][0],paramsData[i][1]); ! } ! testHTML+= ! "</APPLET>\n"+ ! "</HTML>"; ! createParser(testHTML); ! ! // Register the applet scanner ! parser.addScanner(new AppletScanner("-a")); ! ! parseAndAssertNodeCount(2); ! assertTrue("Node should be an applet tag",node[0] instanceof AppletTag); ! // Check the data in the applet tag ! AppletTag appletTag = (AppletTag)node[0]; ! assertEquals("Class Name","Myclass.class",appletTag.getAppletClass()); ! assertEquals("Archive","test.jar",appletTag.getArchive()); ! assertEquals("Codebase","www.kizna.com",appletTag.getCodeBase()); ! // Check the params data ! int cnt = 0; ! for (Enumeration e = appletTag.getParameterNames();e.hasMoreElements();) ! { ! String paramName = (String)e.nextElement(); ! String paramValue = appletTag.getParameter(paramName); ! assertEquals("Param "+cnt+" value",paramsMap.get(paramName),paramValue); ! cnt++; ! } ! assertEquals("Number of params",new Integer(paramsData.length),new Integer(cnt)); ! } } |
From: <der...@us...> - 2003-09-01 21:53:40
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests In directory sc8-pr-cvs1:/tmp/cvs-serv10193 Modified Files: JspTagTest.java Log Message: Incorporated test cases from bug #772700 Jsp Tags are not parsed correctly when in quoted attributes. This should be resolved when the lexer package is integrated. Commented out the one that fails. Index: JspTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/JspTagTest.java,v retrieving revision 1.27 retrieving revision 1.28 diff -C2 -d -r1.27 -r1.28 *** JspTagTest.java 24 Aug 2003 21:59:44 -0000 1.27 --- JspTagTest.java 1 Sep 2003 21:53:33 -0000 1.28 *************** *** 38,180 **** { ! public JspTagTest(String name) { ! super(name); ! } ! /** ! * Check if the JSP Tag is being correctly recognized. ! * Our test html is : <BR> ! * <%@ taglib uri="/WEB-INF/struts.tld" prefix="struts" %><BR> ! * <jsp:useBean id="transfer" scope="session" class="com.bank.PageBean"/><BR> ! * <%<BR> ! * org.apache.struts.util.BeanUtils.populate(transfer, request);<BR> ! * if(request.getParameter("marker") == null)<BR> ! * // initialize a pseudo-property<BR> ! * transfer.set("days", java.util.Arrays.asList(<BR> ! * new String[] {"1", "2", "3", "4", "31"}));<BR> ! * else <BR> ! * if(transfer.validate(request))<BR> ! * %><jsp:forward page="transferConfirm.jsp"/><% ! * %> ! * Creation date: (6/17/2001 4:01:06 PM) ! */ ! public void testJspTag() throws ParserException ! { ! createParser( ! "<%@ taglib uri=\"/WEB-INF/struts.tld\" prefix=\"struts\" %>\n"+ ! "<jsp:useBean id=\"transfer\" scope=\"session\" class=\"com.bank.PageBean\"/>\n"+ ! "<%\n"+ ! " org.apache.struts.util.BeanUtils.populate(transfer, request);\n"+ ! " if(request.getParameter(\"marker\") == null)\n"+ ! " // initialize a pseudo-property\n"+ ! " transfer.set(\"days\", java.util.Arrays.asList(\n"+ ! " new String[] {\"1\", \"2\", \"3\", \"4\", \"31\"}));\n"+ ! " else \n"+ ! " if(transfer.validate(request))\n"+ ! " %><jsp:forward page=\"transferConfirm.jsp\"/><%\n"+ ! "%>\n"); ! Parser.setLineSeparator("\r\n"); ! // Register the Jsp Scanner ! parser.addScanner(new JspScanner("-j")); ! parseAndAssertNodeCount(5); ! // The first node should be an HTMLJspTag ! assertTrue("Node 1 should be an HTMLJspTag",node[0] instanceof JspTag); ! JspTag tag = (JspTag)node[0]; ! assertStringEquals("Contents of the tag","@ taglib uri=\"/WEB-INF/struts.tld\" prefix=\"struts\" ",tag.getText()); ! ! // The second node should be a normal tag ! assertTrue("Node 2 should be an Tag",node[1] instanceof Tag); ! Tag htag = (Tag)node[1]; ! assertStringEquals("Contents of the tag","jsp:useBean id=\"transfer\" scope=\"session\" class=\"com.bank.PageBean\"",htag.getText()); ! assertStringEquals("html","<JSP:USEBEAN ID=\"transfer\" SCOPE=\"session\" CLASS=\"com.bank.PageBean\"/>",htag.toHtml()); ! // The third node should be an HTMLJspTag ! assertTrue("Node 3 should be an HTMLJspTag",node[2] instanceof JspTag); ! JspTag tag2 = (JspTag)node[2]; ! String expected = "\r\n"+ ! " org.apache.struts.util.BeanUtils.populate(transfer, request);\r\n"+ ! " if(request.getParameter(\"marker\") == null)\r\n"+ ! " // initialize a pseudo-property\r\n"+ ! " transfer.set(\"days\", java.util.Arrays.asList(\r\n"+ ! " new String[] {\"1\", \"2\", \"3\", \"4\", \"31\"}));\r\n"+ ! " else \r\n"+ ! " if(transfer.validate(request))\r\n"+ ! " "; ! assertEquals("Contents of the tag",expected,tag2.getText()); ! ! } ! ! /** ! * Check if the JSP Tag is being correctly recognized. ! * Our test html is : <BR> ! * <%@ taglib uri="/WEB-INF/struts.tld" prefix="struts" %><BR> ! * <jsp:useBean id="transfer" scope="session" class="com.bank.PageBean"/><BR> ! * <%<BR> ! * org.apache.struts.util.BeanUtils.populate(transfer, request);<BR> ! * if(request.getParameter("marker") == null)<BR> ! * // initialize a pseudo-property<BR> ! * transfer.set("days", java.util.Arrays.asList(<BR> ! * new String[] {"1", "2", "3", "4", "31"}));<BR> ! * else <BR> ! * if(transfer.validate(request))<BR> ! * %><jsp:forward page="transferConfirm.jsp"/><% ! * %> ! * Creation date: (6/17/2001 4:01:06 PM) ! */ ! public void testToHTML() throws ParserException ! { ! createParser( ! "<%@ taglib uri=\"/WEB-INF/struts.tld\" prefix=\"struts\" %>\n"+ ! "<jsp:useBean id=\"transfer\" scope=\"session\" class=\"com.bank.PageBean\"/>\n"+ ! "<%\n"+ ! " org.apache.struts.util.BeanUtils.populate(transfer, request);\n"+ ! " if(request.getParameter(\"marker\") == null)\n"+ ! " // initialize a pseudo-property\n"+ ! " transfer.set(\"days\", java.util.Arrays.asList(\n"+ ! " new String[] {\"1\", \"2\", \"3\", \"4\", \"31\"}));\n"+ ! " else \n"+ ! " if(transfer.validate(request))\n"+ ! " %><jsp:forward page=\"transferConfirm.jsp\"/><%\n"+ ! "%>\n"); ! Parser.setLineSeparator("\r\n"); ! // Register the Jsp Scanner ! parser.addScanner(new JspScanner("-j")); ! parseAndAssertNodeCount(5); ! // The first node should be an HTMLJspTag ! assertTrue("Node 1 should be an HTMLJspTag",node[0] instanceof JspTag); ! JspTag tag = (JspTag)node[0]; ! assertEquals("Raw String of the first JSP tag","<%@ taglib uri=\"/WEB-INF/struts.tld\" prefix=\"struts\" %>",tag.toHtml()); ! ! ! // The third node should be an HTMLJspTag ! assertTrue("Node 2 should be an HTMLJspTag",node[2] instanceof JspTag); ! JspTag tag2 = (JspTag)node[2]; ! String expected = "<%\r\n"+ ! " org.apache.struts.util.BeanUtils.populate(transfer, request);\r\n"+ ! " if(request.getParameter(\"marker\") == null)\r\n"+ ! " // initialize a pseudo-property\r\n"+ ! " transfer.set(\"days\", java.util.Arrays.asList(\r\n"+ ! " new String[] {\"1\", \"2\", \"3\", \"4\", \"31\"}));\r\n"+ ! " else \r\n"+ ! " if(transfer.validate(request))\r\n"+ ! " %>"; ! assertEquals("Raw String of the second JSP tag",expected,tag2.toHtml()); ! assertTrue("Node 4 should be an HTMLJspTag",node[4] instanceof JspTag); ! JspTag tag4 = (JspTag)node[4]; ! expected = "<%\r\n"+ ! "%>"; ! assertEquals("Raw String of the fourth JSP tag",expected,tag4.toHtml()); ! ! } ! public void testSpecialCharacters() throws ParserException { ! StringBuffer sb1 = new StringBuffer(); ! sb1.append("<% for (i=0;i<j;i++);%>"); ! createParser(sb1.toString()); ! ! // Register the jsp scanner ! parser.addScanner(new JspScanner("-j")); ! parseAndAssertNodeCount(1); ! //assertTrue("Node should be a jsp tag",node[1] instanceof HTMLJspTag); ! JspTag jspTag = (JspTag)node[0]; ! assertEquals("jsp toHTML()","<% for (i=0;i<j;i++);%>",jspTag.toHtml()); ! } } --- 38,212 ---- { ! public JspTagTest(String name) { ! super(name); ! } ! /** ! * Check if the JSP Tag is being correctly recognized. ! * Our test html is : <BR> ! * <%@ taglib uri="/WEB-INF/struts.tld" prefix="struts" %><BR> ! * <jsp:useBean id="transfer" scope="session" class="com.bank.PageBean"/><BR> ! * <%<BR> ! * org.apache.struts.util.BeanUtils.populate(transfer, request);<BR> ! * if(request.getParameter("marker") == null)<BR> ! * // initialize a pseudo-property<BR> ! * transfer.set("days", java.util.Arrays.asList(<BR> ! * new String[] {"1", "2", "3", "4", "31"}));<BR> ! * else <BR> ! * if(transfer.validate(request))<BR> ! * %><jsp:forward page="transferConfirm.jsp"/><% ! * %> ! * Creation date: (6/17/2001 4:01:06 PM) ! */ ! public void testJspTag() throws ParserException ! { ! createParser( ! "<%@ taglib uri=\"/WEB-INF/struts.tld\" prefix=\"struts\" %>\n"+ ! "<jsp:useBean id=\"transfer\" scope=\"session\" class=\"com.bank.PageBean\"/>\n"+ ! "<%\n"+ ! " org.apache.struts.util.BeanUtils.populate(transfer, request);\n"+ ! " if(request.getParameter(\"marker\") == null)\n"+ ! " // initialize a pseudo-property\n"+ ! " transfer.set(\"days\", java.util.Arrays.asList(\n"+ ! " new String[] {\"1\", \"2\", \"3\", \"4\", \"31\"}));\n"+ ! " else \n"+ ! " if(transfer.validate(request))\n"+ ! " %><jsp:forward page=\"transferConfirm.jsp\"/><%\n"+ ! "%>\n"); ! Parser.setLineSeparator("\r\n"); ! // Register the Jsp Scanner ! parser.addScanner(new JspScanner("-j")); ! parseAndAssertNodeCount(5); ! // The first node should be an HTMLJspTag ! assertTrue("Node 1 should be an HTMLJspTag",node[0] instanceof JspTag); ! JspTag tag = (JspTag)node[0]; ! assertStringEquals("Contents of the tag","@ taglib uri=\"/WEB-INF/struts.tld\" prefix=\"struts\" ",tag.getText()); ! ! // The second node should be a normal tag ! assertTrue("Node 2 should be an Tag",node[1] instanceof Tag); ! Tag htag = (Tag)node[1]; ! assertStringEquals("Contents of the tag","jsp:useBean id=\"transfer\" scope=\"session\" class=\"com.bank.PageBean\"",htag.getText()); ! assertStringEquals("html","<JSP:USEBEAN ID=\"transfer\" SCOPE=\"session\" CLASS=\"com.bank.PageBean\"/>",htag.toHtml()); ! // The third node should be an HTMLJspTag ! assertTrue("Node 3 should be an HTMLJspTag",node[2] instanceof JspTag); ! JspTag tag2 = (JspTag)node[2]; ! String expected = "\r\n"+ ! " org.apache.struts.util.BeanUtils.populate(transfer, request);\r\n"+ ! " if(request.getParameter(\"marker\") == null)\r\n"+ ! " // initialize a pseudo-property\r\n"+ ! " transfer.set(\"days\", java.util.Arrays.asList(\r\n"+ ! " new String[] {\"1\", \"2\", \"3\", \"4\", \"31\"}));\r\n"+ ! " else \r\n"+ ! " if(transfer.validate(request))\r\n"+ ! " "; ! assertEquals("Contents of the tag",expected,tag2.getText()); ! ! } ! ! /** ! * Check if the JSP Tag is being correctly recognized. ! * Our test html is : <BR> ! * <%@ taglib uri="/WEB-INF/struts.tld" prefix="struts" %><BR> ! * <jsp:useBean id="transfer" scope="session" class="com.bank.PageBean"/><BR> ! * <%<BR> ! * org.apache.struts.util.BeanUtils.populate(transfer, request);<BR> ! * if(request.getParameter("marker") == null)<BR> ! * // initialize a pseudo-property<BR> ! * transfer.set("days", java.util.Arrays.asList(<BR> ! * new String[] {"1", "2", "3", "4", "31"}));<BR> ! * else <BR> ! * if(transfer.validate(request))<BR> ! * %><jsp:forward page="transferConfirm.jsp"/><% ! * %> ! * Creation date: (6/17/2001 4:01:06 PM) ! */ ! public void testToHTML() throws ParserException ! { ! createParser( ! "<%@ taglib uri=\"/WEB-INF/struts.tld\" prefix=\"struts\" %>\n"+ ! "<jsp:useBean id=\"transfer\" scope=\"session\" class=\"com.bank.PageBean\"/>\n"+ ! "<%\n"+ ! " org.apache.struts.util.BeanUtils.populate(transfer, request);\n"+ ! " if(request.getParameter(\"marker\") == null)\n"+ ! " // initialize a pseudo-property\n"+ ! " transfer.set(\"days\", java.util.Arrays.asList(\n"+ ! " new String[] {\"1\", \"2\", \"3\", \"4\", \"31\"}));\n"+ ! " else \n"+ ! " if(transfer.validate(request))\n"+ ! " %><jsp:forward page=\"transferConfirm.jsp\"/><%\n"+ ! "%>\n"); ! Parser.setLineSeparator("\r\n"); ! // Register the Jsp Scanner ! parser.addScanner(new JspScanner("-j")); ! parseAndAssertNodeCount(5); ! // The first node should be an HTMLJspTag ! assertTrue("Node 1 should be an HTMLJspTag",node[0] instanceof JspTag); ! JspTag tag = (JspTag)node[0]; ! assertEquals("Raw String of the first JSP tag","<%@ taglib uri=\"/WEB-INF/struts.tld\" prefix=\"struts\" %>",tag.toHtml()); ! ! ! // The third node should be an HTMLJspTag ! assertTrue("Node 2 should be an HTMLJspTag",node[2] instanceof JspTag); ! JspTag tag2 = (JspTag)node[2]; ! String expected = "<%\r\n"+ ! " org.apache.struts.util.BeanUtils.populate(transfer, request);\r\n"+ ! " if(request.getParameter(\"marker\") == null)\r\n"+ ! " // initialize a pseudo-property\r\n"+ ! " transfer.set(\"days\", java.util.Arrays.asList(\r\n"+ ! " new String[] {\"1\", \"2\", \"3\", \"4\", \"31\"}));\r\n"+ ! " else \r\n"+ ! " if(transfer.validate(request))\r\n"+ ! " %>"; ! assertEquals("Raw String of the second JSP tag",expected,tag2.toHtml()); ! assertTrue("Node 4 should be an HTMLJspTag",node[4] instanceof JspTag); ! JspTag tag4 = (JspTag)node[4]; ! expected = "<%\r\n"+ ! "%>"; ! assertEquals("Raw String of the fourth JSP tag",expected,tag4.toHtml()); ! ! } ! public void testSpecialCharacters() throws ParserException { ! StringBuffer sb1 = new StringBuffer(); ! sb1.append("<% for (i=0;i<j;i++);%>"); ! createParser(sb1.toString()); ! ! // Register the jsp scanner ! parser.addScanner(new JspScanner("-j")); ! parseAndAssertNodeCount(1); ! //assertTrue("Node should be a jsp tag",node[1] instanceof HTMLJspTag); ! JspTag jspTag = (JspTag)node[0]; ! assertEquals("jsp toHTML()","<% for (i=0;i<j;i++);%>",jspTag.toHtml()); ! } ! ! ! /** ! * See bug #772700 Jsp Tags are not parsed correctly when in quoted attributes. ! */ ! public void testJspTagsInUnQuotedAttribes() throws ParserException { ! // this test should pass when none of the attibutes are quoted ! testJspTagsInAttributes("<img alt=<%=altText1%> src=<%=imgUrl1%> border=<%=borderToggle%>>"); ! } ! ! /** ! * See bug #772700 Jsp Tags are not parsed correctly when in quoted attributes. ! */ ! // public void testJspTagsInQuotedAttribes() throws ParserException { ! // // this test seems to mess up.... ! // testJspTagsInAttributes("<img alt=\"<%=altText1%>\" src=\"<%=imgUrl1%>\" border=\"<%=borderToggle%>\">"); ! // } ! ! private void testJspTagsInAttributes(String html) throws ParserException { ! createParser(html); ! parser.addScanner(new JspScanner()); ! parseAndAssertNodeCount(7); ! ! assertTrue("Should be a Jsp tag but was "+node[1].getClass().getName(),node[1] instanceof JspTag); ! assertTrue("Should be a Jsp tag but was "+node[3].getClass().getName(),node[3] instanceof JspTag); ! assertTrue("Should be a Jsp tag but was "+node[5].getClass().getName(),node[5] instanceof JspTag); ! ! assertTrue("Text Should be '<%=altText1%>'but was '" + node[1].toHtml() + "'" ,node[1].toHtml().equals("<%=altText1%>")); ! assertTrue("Text Should be '<%=imgUrl1%>' but was '" + node[3].toHtml() + "'" ,node[3].toHtml().equals("<%=imgUrl1%>")); ! assertTrue("Text Should be '<%=borderToggle%>' but was '" + node[5].toHtml() + "'" ,node[5].toHtml().equals("<%=borderToggle%>")); ! ! } } |
From: <der...@us...> - 2003-09-01 21:41:32
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests In directory sc8-pr-cvs1:/tmp/cvs-serv8038 Modified Files: ImageTagTest.java Log Message: Add test case text from bug bug #778781 SRC-attribute suppression in IMG-tags. Index: ImageTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/ImageTagTest.java,v retrieving revision 1.27 retrieving revision 1.28 diff -C2 -d -r1.27 -r1.28 *** ImageTagTest.java 24 Aug 2003 21:59:44 -0000 1.27 --- ImageTagTest.java 1 Sep 2003 21:41:28 -0000 1.28 *************** *** 41,158 **** public class ImageTagTest extends ParserTestCase { ! public ImageTagTest(String name) { ! super(name); ! } ! /** ! * The bug being reproduced is this : <BR> ! * <BODY aLink=#ff0000 bgColor=#ffffff link=#0000cc onload=setfocus() text=#000000 <BR> ! * vLink=#551a8b> ! * The above line is incorrectly parsed in that, the BODY tag is not identified. ! * Creation date: (6/17/2001 4:01:06 PM) ! */ ! public void testImageTag() throws ParserException ! { ! createParser("<IMG alt=Google height=115 src=\"goo/title_homepage4.gif\" width=305>","http://www.google.com/test/index.html"); ! // Register the image scanner ! parser.addScanner(new ImageScanner("-i",new LinkProcessor())); ! ! parseAndAssertNodeCount(1); ! // The node should be an HTMLImageTag ! assertTrue("Node should be a HTMLImageTag",node[0] instanceof ImageTag); ! ImageTag imageTag = (ImageTag)node[0]; ! assertEquals("The image locn","http://www.google.com/test/goo/title_homepage4.gif",imageTag.getImageURL()); ! } ! /** ! * The bug being reproduced is this : <BR> ! * <BODY aLink=#ff0000 bgColor=#ffffff link=#0000cc onload=setfocus() text=#000000 <BR> ! * vLink=#551a8b> ! * The above line is incorrectly parsed in that, the BODY tag is not identified. ! * Creation date: (6/17/2001 4:01:06 PM) ! */ ! public void testImageTagBug() throws ParserException ! { ! createParser("<IMG alt=Google height=115 src=\"../goo/title_homepage4.gif\" width=305>","http://www.google.com/test/"); ! // Register the image scanner ! parser.addScanner(new ImageScanner("-i",new LinkProcessor())); ! ! parseAndAssertNodeCount(1); ! // The node should be an HTMLImageTag ! assertTrue("Node should be a HTMLImageTag",node[0] instanceof ImageTag); ! ImageTag imageTag = (ImageTag)node[0]; ! assertEquals("The image locn","http://www.google.com/goo/title_homepage4.gif",imageTag.getImageURL()); ! } ! /** ! * The bug being reproduced is this : <BR> ! * <BODY aLink=#ff0000 bgColor=#ffffff link=#0000cc onload=setfocus() text=#000000 <BR> ! * vLink=#551a8b> ! * The above line is incorrectly parsed in that, the BODY tag is not identified. ! * Creation date: (6/17/2001 4:01:06 PM) ! */ ! public void testImageTageBug2() throws ParserException ! { ! createParser("<IMG alt=Google height=115 src=\"../../goo/title_homepage4.gif\" width=305>","http://www.google.com/test/test/index.html"); ! // Register the image scanner ! parser.addScanner(new ImageScanner("-i",new LinkProcessor())); ! ! parseAndAssertNodeCount(1); ! // The node should be an HTMLImageTag ! assertTrue("Node should be a HTMLImageTag",node[0] instanceof ImageTag); ! ImageTag imageTag = (ImageTag)node[0]; ! assertEquals("The image locn","http://www.google.com/goo/title_homepage4.gif",imageTag.getImageURL()); ! } ! /** ! * This bug occurs when there is a null pointer exception thrown while scanning a tag using LinkScanner. ! * Creation date: (7/1/2001 2:42:13 PM) ! */ ! public void testImageTagSingleQuoteBug() throws ParserException ! { ! createParser("<IMG SRC='abcd.jpg'>","http://www.cj.com/"); ! // Register the image scanner ! parser.addScanner(new ImageScanner("-i",new LinkProcessor())); ! ! parseAndAssertNodeCount(1); ! assertTrue("Node should be a HTMLImageTag",node[0] instanceof ImageTag); ! ImageTag imageTag = (ImageTag)node[0]; ! assertEquals("Image incorrect","http://www.cj.com/abcd.jpg",imageTag.getImageURL()); ! } ! /** ! * The bug being reproduced is this : <BR> ! * <A HREF=>Something<A><BR> ! * vLink=#551a8b> ! * The above line is incorrectly parsed in that, the BODY tag is not identified. ! * Creation date: (6/17/2001 4:01:06 PM) ! */ ! public void testNullImageBug() throws ParserException ! { ! createParser("<IMG SRC=>","http://www.google.com/test/index.html"); ! // Register the image scanner ! parser.addScanner(new ImageScanner("-i",new LinkProcessor())); ! ! parseAndAssertNodeCount(1); ! // The node should be an HTMLLinkTag ! assertTrue("Node should be a HTMLImageTag",node[0] instanceof ImageTag); ! ImageTag imageTag = (ImageTag)node[0]; ! assertStringEquals("The image location","",imageTag.getImageURL()); ! } ! public void testToHTML() throws ParserException { ! createParser("<IMG alt=Google height=115 src=\"../../goo/title_homepage4.gif\" width=305>","http://www.google.com/test/test/index.html"); ! // Register the image scanner ! parser.addScanner(new ImageScanner("-i",new LinkProcessor())); ! ! parseAndAssertNodeCount(1); ! // The node should be an ImageTag ! assertTrue("Node should be a ImageTag",node[0] instanceof ImageTag); ! ImageTag imageTag = (ImageTag)node[0]; ! assertStringEquals("The image locn","<IMG WIDTH=\"305\" ALT=\"Google\" SRC=\"../../goo/title_homepage4.gif\" HEIGHT=\"115\">",imageTag.toHtml()); ! assertEquals("Alt","Google",imageTag.getAttribute("alt")); ! assertEquals("Height","115",imageTag.getAttribute("height")); ! assertEquals("Width","305",imageTag.getAttribute("width")); ! } /** --- 41,158 ---- public class ImageTagTest extends ParserTestCase { ! public ImageTagTest(String name) { ! super(name); ! } ! /** ! * The bug being reproduced is this : <BR> ! * <BODY aLink=#ff0000 bgColor=#ffffff link=#0000cc onload=setfocus() text=#000000 <BR> ! * vLink=#551a8b> ! * The above line is incorrectly parsed in that, the BODY tag is not identified. ! * Creation date: (6/17/2001 4:01:06 PM) ! */ ! public void testImageTag() throws ParserException ! { ! createParser("<IMG alt=Google height=115 src=\"goo/title_homepage4.gif\" width=305>","http://www.google.com/test/index.html"); ! // Register the image scanner ! parser.addScanner(new ImageScanner("-i",new LinkProcessor())); ! ! parseAndAssertNodeCount(1); ! // The node should be an HTMLImageTag ! assertTrue("Node should be a HTMLImageTag",node[0] instanceof ImageTag); ! ImageTag imageTag = (ImageTag)node[0]; ! assertEquals("The image locn","http://www.google.com/test/goo/title_homepage4.gif",imageTag.getImageURL()); ! } ! /** ! * The bug being reproduced is this : <BR> ! * <BODY aLink=#ff0000 bgColor=#ffffff link=#0000cc onload=setfocus() text=#000000 <BR> ! * vLink=#551a8b> ! * The above line is incorrectly parsed in that, the BODY tag is not identified. ! * Creation date: (6/17/2001 4:01:06 PM) ! */ ! public void testImageTagBug() throws ParserException ! { ! createParser("<IMG alt=Google height=115 src=\"../goo/title_homepage4.gif\" width=305>","http://www.google.com/test/"); ! // Register the image scanner ! parser.addScanner(new ImageScanner("-i",new LinkProcessor())); ! ! parseAndAssertNodeCount(1); ! // The node should be an HTMLImageTag ! assertTrue("Node should be a HTMLImageTag",node[0] instanceof ImageTag); ! ImageTag imageTag = (ImageTag)node[0]; ! assertEquals("The image locn","http://www.google.com/goo/title_homepage4.gif",imageTag.getImageURL()); ! } ! /** ! * The bug being reproduced is this : <BR> ! * <BODY aLink=#ff0000 bgColor=#ffffff link=#0000cc onload=setfocus() text=#000000 <BR> ! * vLink=#551a8b> ! * The above line is incorrectly parsed in that, the BODY tag is not identified. ! * Creation date: (6/17/2001 4:01:06 PM) ! */ ! public void testImageTageBug2() throws ParserException ! { ! createParser("<IMG alt=Google height=115 src=\"../../goo/title_homepage4.gif\" width=305>","http://www.google.com/test/test/index.html"); ! // Register the image scanner ! parser.addScanner(new ImageScanner("-i",new LinkProcessor())); ! ! parseAndAssertNodeCount(1); ! // The node should be an HTMLImageTag ! assertTrue("Node should be a HTMLImageTag",node[0] instanceof ImageTag); ! ImageTag imageTag = (ImageTag)node[0]; ! assertEquals("The image locn","http://www.google.com/goo/title_homepage4.gif",imageTag.getImageURL()); ! } ! /** ! * This bug occurs when there is a null pointer exception thrown while scanning a tag using LinkScanner. ! * Creation date: (7/1/2001 2:42:13 PM) ! */ ! public void testImageTagSingleQuoteBug() throws ParserException ! { ! createParser("<IMG SRC='abcd.jpg'>","http://www.cj.com/"); ! // Register the image scanner ! parser.addScanner(new ImageScanner("-i",new LinkProcessor())); ! ! parseAndAssertNodeCount(1); ! assertTrue("Node should be a HTMLImageTag",node[0] instanceof ImageTag); ! ImageTag imageTag = (ImageTag)node[0]; ! assertEquals("Image incorrect","http://www.cj.com/abcd.jpg",imageTag.getImageURL()); ! } ! /** ! * The bug being reproduced is this : <BR> ! * <A HREF=>Something<A><BR> ! * vLink=#551a8b> ! * The above line is incorrectly parsed in that, the BODY tag is not identified. ! * Creation date: (6/17/2001 4:01:06 PM) ! */ ! public void testNullImageBug() throws ParserException ! { ! createParser("<IMG SRC=>","http://www.google.com/test/index.html"); ! // Register the image scanner ! parser.addScanner(new ImageScanner("-i",new LinkProcessor())); ! ! parseAndAssertNodeCount(1); ! // The node should be an HTMLLinkTag ! assertTrue("Node should be a HTMLImageTag",node[0] instanceof ImageTag); ! ImageTag imageTag = (ImageTag)node[0]; ! assertStringEquals("The image location","",imageTag.getImageURL()); ! } ! public void testToHTML() throws ParserException { ! createParser("<IMG alt=Google height=115 src=\"../../goo/title_homepage4.gif\" width=305>","http://www.google.com/test/test/index.html"); ! // Register the image scanner ! parser.addScanner(new ImageScanner("-i",new LinkProcessor())); ! ! parseAndAssertNodeCount(1); ! // The node should be an ImageTag ! assertTrue("Node should be a ImageTag",node[0] instanceof ImageTag); ! ImageTag imageTag = (ImageTag)node[0]; ! assertStringEquals("The image locn","<IMG WIDTH=\"305\" ALT=\"Google\" SRC=\"../../goo/title_homepage4.gif\" HEIGHT=\"115\">",imageTag.toHtml()); ! assertEquals("Alt","Google",imageTag.getAttribute("alt")); ! assertEquals("Height","115",imageTag.getAttribute("height")); ! assertEquals("Width","305",imageTag.getAttribute("width")); ! } /** *************** *** 191,199 **** + "</map>" + "</a>"; ! createParser (html); ! parser.registerScanners (); ! ! parseAndAssertNodeCount (1); ! assertTrue ("Node should be a LinkTag", node[0] instanceof LinkTag); LinkTag link = (LinkTag)node[0]; ImageTag img = extractLinkImage (link); --- 191,199 ---- + "</map>" + "</a>"; ! createParser (html); ! parser.registerScanners (); ! ! parseAndAssertNodeCount (1); ! assertTrue ("Node should be a LinkTag", node[0] instanceof LinkTag); LinkTag link = (LinkTag)node[0]; ImageTag img = extractLinkImage (link); *************** *** 211,220 **** createParser (html); ! parser.registerScanners (); parseAndAssertNodeCount (1); ! assertTrue ("Node should be an ImageTag", node[0] instanceof ImageTag); ImageTag img = (ImageTag)node[0]; assertTrue ("bad source", "http://i.cnn.net/cnn/images/1.gif".equals (img.getImageURL ())); ! } } --- 211,234 ---- createParser (html); ! parser.registerScanners (); parseAndAssertNodeCount (1); ! assertTrue ("Node should be an ImageTag", node[0] instanceof ImageTag); ImageTag img = (ImageTag)node[0]; assertTrue ("bad source", "http://i.cnn.net/cnn/images/1.gif".equals (img.getImageURL ())); ! } ! ! // see bug #778781 SRC-attribute suppression in IMG-tags ! // HTML before parse: ! // <img src="images/first" alt="first"> ! // <img src="images/second" alt=""> ! // <img alt="third" src="images/third"> ! // <img alt="" src="images/fourth"> ! // ! // HTML after parse: ! // <IMG ALT="first" SRC="images/first"> ! // <IMG ALT="" SRC="images/second"> ! // <IMG ALT="third" SRC="images/third"> ! // <IMG ALT=""> ! } |
From: <der...@us...> - 2003-09-01 21:28:44
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests In directory sc8-pr-cvs1:/tmp/cvs-serv5770/tests/tagTests Modified Files: LinkTagTest.java Log Message: Fix bug #784767 irc://server/channel urls are HTTPLike? Added an isIRCLink() method, but, I'm not sure that isHTTPLikeLink() should use the gainsaying of all know link types. This needs review. Index: LinkTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/LinkTagTest.java,v retrieving revision 1.30 retrieving revision 1.31 diff -C2 -d -r1.30 -r1.31 *** LinkTagTest.java 24 Aug 2003 21:59:44 -0000 1.30 --- LinkTagTest.java 1 Sep 2003 21:28:33 -0000 1.31 *************** *** 40,375 **** public class LinkTagTest extends ParserTestCase { ! public LinkTagTest(String name) { ! super(name); ! } ! /** ! * The bug being reproduced is this : <BR> ! * <BODY aLink=#ff0000 bgColor=#ffffff link=#0000cc onload=setfocus() text=#000000 <BR> ! * vLink=#551a8b> ! * The above line is incorrectly parsed in that, the BODY tag is not identified. ! * Creation date: (6/17/2001 4:01:06 PM) ! */ ! public void testLinkNodeBug() throws ParserException ! { ! createParser("<A HREF=\"../test.html\">abcd</A>","http://www.google.com/test/index.html"); ! // Register the image scanner ! parser.addScanner(new LinkScanner("-l")); ! ! parseAndAssertNodeCount(1); ! // The node should be an HTMLLinkTag ! assertTrue("Node should be a HTMLLinkTag",node[0] instanceof LinkTag); ! LinkTag linkNode = (LinkTag)node[0]; ! assertEquals("The image locn","http://www.google.com/test.html",linkNode.getLink()); ! } ! /** ! * The bug being reproduced is this : <BR> ! * <BODY aLink=#ff0000 bgColor=#ffffff link=#0000cc onload=setfocus() text=#000000 <BR> ! * vLink=#551a8b> ! * The above line is incorrectly parsed in that, the BODY tag is not identified. ! * Creation date: (6/17/2001 4:01:06 PM) ! */ ! public void testLinkNodeBug2() throws ParserException ! { ! createParser("<A HREF=\"../../test.html\">abcd</A>","http://www.google.com/test/test/index.html"); ! // Register the image scanner ! parser.addScanner(new LinkScanner("-l")); ! ! parseAndAssertNodeCount(1); ! // The node should be an HTMLLinkTag ! assertTrue("Node should be a HTMLLinkTag",node[0] instanceof LinkTag); ! LinkTag linkNode = (LinkTag)node[0]; ! assertEquals("The image locn","http://www.google.com/test.html",linkNode.getLink()); ! } ! ! /** ! * The bug being reproduced is this : <BR> ! * When a url ends with a slash, and the link begins with a slash,the parser puts two slashes ! * This bug was submitted by Roget Kjensrud ! * Creation date: (6/17/2001 4:01:06 PM) ! */ ! public void testLinkNodeBug3() throws ParserException ! { ! createParser("<A HREF=\"/mylink.html\">abcd</A>","http://www.cj.com/"); ! // Register the image scanner ! parser.addScanner(new LinkScanner("-l")); ! ! parseAndAssertNodeCount(1); ! // The node should be an HTMLLinkTag ! assertTrue("Node should be a HTMLLinkTag",node[0] instanceof LinkTag); ! LinkTag linkNode = (LinkTag)node[0]; ! assertEquals("Link incorrect","http://www.cj.com/mylink.html",linkNode.getLink()); ! } ! ! /** ! * The bug being reproduced is this : <BR> ! * Simple url without index.html, doesent get appended to link ! * This bug was submitted by Roget Kjensrud ! * Creation date: (6/17/2001 4:01:06 PM) ! */ ! public void testLinkNodeBug4() throws ParserException ! { ! createParser("<A HREF=\"/mylink.html\">abcd</A>","http://www.cj.com"); ! // Register the image scanner ! parser.addScanner(new LinkScanner("-l")); ! ! parseAndAssertNodeCount(1); ! // The node should be an HTMLLinkTag ! assertTrue("Node should be a HTMLLinkTag",node[0] instanceof LinkTag); ! LinkTag linkNode = (LinkTag)node[0]; ! assertEquals("Link incorrect!!","http://www.cj.com/mylink.html",linkNode.getLink()); ! } ! ! public void testLinkNodeBug5() throws ParserException ! { ! createParser("<a href=http://note.kimo.com.tw/>µ§°O</a> <a \n"+ ! "href=http://photo.kimo.com.tw/>¬Ûï</a> <a\n"+ ! "href=http://address.kimo.com.tw/>³q°T¿ý</a> ","http://www.cj.com"); ! Parser.setLineSeparator("\r\n"); ! // Register the image scanner ! parser.addScanner(new LinkScanner("-l")); ! ! parseAndAssertNodeCount(6); ! // The node should be an LinkTag ! assertTrue("Node should be a LinkTag",node[0] instanceof LinkTag); ! LinkTag linkNode = (LinkTag)node[2]; ! assertStringEquals("Link incorrect!!","http://photo.kimo.com.tw",linkNode.getLink()); ! assertEquals("Link beginning",new Integer(48),new Integer(linkNode.elementBegin())); ! assertEquals("Link ending",new Integer(38),new Integer(linkNode.elementEnd())); ! ! LinkTag linkNode2 = (LinkTag)node[4]; ! assertStringEquals("Link incorrect!!","http://address.kimo.com.tw",linkNode2.getLink()); ! assertEquals("Link beginning",new Integer(46),new Integer(linkNode2.elementBegin())); ! assertEquals("Link ending",new Integer(42),new Integer(linkNode2.elementEnd())); ! } ! ! /** ! * This bug occurs when there is a null pointer exception thrown while scanning a tag using LinkScanner. ! * Creation date: (7/1/2001 2:42:13 PM) ! */ ! public void testLinkNodeBugNullPointerException() throws ParserException ! { ! createParser("<FORM action=http://search.yahoo.com/bin/search name=f><MAP name=m><AREA\n"+ ! "coords=0,0,52,52 href=\"http://www.yahoo.com/r/c1\" shape=RECT><AREA"+ ! "coords=53,0,121,52 href=\"http://www.yahoo.com/r/p1\" shape=RECT><AREA"+ ! "coords=122,0,191,52 href=\"http://www.yahoo.com/r/m1\" shape=RECT><AREA"+ ! "coords=441,0,510,52 href=\"http://www.yahoo.com/r/wn\" shape=RECT>","http://www.cj.com/"); ! // Register the image scanner ! parser.addScanner(new LinkScanner("-l")); ! parseAndAssertNodeCount(6); ! } ! /** ! * This bug occurs when there is a null pointer exception thrown while scanning a tag using LinkScanner. ! * Creation date: (7/1/2001 2:42:13 PM) ! */ ! public void testLinkNodeMailtoBug() throws ParserException ! { ! createParser("<A HREF='mailto:so...@ya...'>hello</A>","http://www.cj.com/"); ! // Register the image scanner ! parser.addScanner(new LinkScanner("-l")); ! ! parseAndAssertNodeCount(1); ! assertTrue("Node should be a HTMLLinkTag",node[0] instanceof LinkTag); ! LinkTag linkNode = (LinkTag)node[0]; ! assertStringEquals("Link incorrect","so...@ya...",linkNode.getLink()); ! assertEquals("Link Type",new Boolean(true),new Boolean(linkNode.isMailLink())); ! } ! ! /** ! * This bug occurs when there is a null pointer exception thrown while scanning a tag using LinkScanner. ! * Creation date: (7/1/2001 2:42:13 PM) ! */ ! public void testLinkNodeSingleQuoteBug() throws ParserException ! { ! createParser("<A HREF='abcd.html'>hello</A>","http://www.cj.com/"); ! ! // Register the image scanner ! parser.addScanner(new LinkScanner("-l")); ! ! parseAndAssertNodeCount(1); ! assertTrue("Node should be a HTMLLinkTag",node[0] instanceof LinkTag); ! LinkTag linkNode = (LinkTag)node[0]; ! assertEquals("Link incorrect","http://www.cj.com/abcd.html",linkNode.getLink()); ! } ! ! /** ! * The bug being reproduced is this : <BR> ! * <BODY aLink=#ff0000 bgColor=#ffffff link=#0000cc onload=setfocus() text=#000000 <BR> ! * vLink=#551a8b> ! * The above line is incorrectly parsed in that, the BODY tag is not identified. ! * Creation date: (6/17/2001 4:01:06 PM) ! */ ! public void testLinkTag() throws ParserException ! { ! createParser("<A HREF=\"test.html\">abcd</A>","http://www.google.com/test/index.html"); ! // Register the image scanner ! parser.addScanner(new LinkScanner("-l")); ! ! parseAndAssertNodeCount(1); ! // The node should be an HTMLLinkTag ! assertTrue("Node should be a HTMLLinkTag",node[0] instanceof LinkTag); ! LinkTag LinkTag = (LinkTag)node[0]; ! assertEquals("The image locn","http://www.google.com/test/test.html",LinkTag.getLink()); ! } ! ! /** ! * The bug being reproduced is this : <BR> ! * <BODY aLink=#ff0000 bgColor=#ffffff link=#0000cc onload=setfocus() text=#000000 <BR> ! * vLink=#551a8b> ! * The above line is incorrectly parsed in that, the BODY tag is not identified. ! * Creation date: (6/17/2001 4:01:06 PM) ! */ ! public void testLinkTagBug() throws ParserException ! { ! createParser("<A HREF=\"../test.html\">abcd</A>","http://www.google.com/test/index.html"); ! // Register the image scanner ! parser.addScanner(new LinkScanner("-l")); ! ! parseAndAssertNodeCount(1); ! // The node should be an HTMLLinkTag ! assertTrue("Node should be a HTMLLinkTag",node[0] instanceof LinkTag); ! LinkTag LinkTag = (LinkTag)node[0]; ! assertEquals("The image locn","http://www.google.com/test.html",LinkTag.getLink()); ! } ! ! /** ! * The bug being reproduced is this : <BR> ! * <A HREF=>Something<A><BR> ! * vLink=#551a8b> ! * The above line is incorrectly parsed in that, the BODY tag is not identified. ! * Creation date: (6/17/2001 4:01:06 PM) ! */ ! public void testNullTagBug() throws ParserException ! { ! createParser("<A HREF=>Something</A>","http://www.google.com/test/index.html"); ! // Register the image scanner ! parser.addScanner(new LinkScanner("-l")); ! ! parseAndAssertNodeCount(1); ! // The node should be an HTMLLinkTag ! assertTrue("Node should be a HTMLLinkTag",node[0] instanceof LinkTag); ! LinkTag linkTag = (LinkTag)node[0]; ! assertEquals("The link location","",linkTag.getLink()); ! assertEquals("The link text","Something",linkTag.getLinkText()); ! } ! ! public void testToPlainTextString() throws ParserException { ! createParser("<A HREF='mailto:so...@ya...'>hello</A>","http://www.cj.com/"); ! // Register the image scanner ! parser.addScanner(new LinkScanner("-l")); ! ! parseAndAssertNodeCount(1); ! assertTrue("Node should be a HTMLLinkTag",node[0] instanceof LinkTag); ! LinkTag linkTag = (LinkTag)node[0]; ! assertEquals("Link Plain Text","hello",linkTag.toPlainTextString()); ! } ! ! public void testToHTML() throws ParserException { ! createParser("<A HREF='mailto:so...@ya...'>hello</A>\n"+ ! "<LI><font color=\"FF0000\" size=-1><b>Tech Samachar:</b></font><a \n"+ ! "href=\"http://ads.samachar.com/bin/redirect/tech.txt?http://www.samachar.com/tech\n"+ ! "nical.html\"> Journalism 3.0</a> by Rajesh Jain","http://www.cj.com/"); ! Parser.setLineSeparator("\r\n"); ! // Register the image scanner ! parser.addScanner(new LinkScanner("-l")); ! ! parseAndAssertNodeCount(9); ! assertTrue("First Node should be a HTMLLinkTag",node[0] instanceof LinkTag); ! LinkTag linkTag = (LinkTag)node[0]; ! assertStringEquals("Link Raw Text","<A HREF=\"mailto:so...@ya...\">hello</A>",linkTag.toHtml()); ! assertTrue("Eighth Node should be a HTMLLinkTag",node[7] instanceof LinkTag); ! linkTag = (LinkTag)node[7]; ! assertStringEquals("Link Raw Text","<A HREF=\"http://ads.samachar.com/bin/redirect/tech.txt?http://www.samachar.com/tech\r\nnical.html\"> Journalism 3.0</A>",linkTag.toHtml()); ! } ! public void testTypeHttps() throws ParserException{ ! LinkTag linkTag = ! new LinkTag( ! new TagData(0,0,"",""), ! new CompositeTagData(null,null,null), ! new LinkData("https://www.someurl.com","","",false,false) ! ); ! assertTrue("This is a https link",linkTag.isHTTPSLink()); ! } ! public void testTypeFtp() throws ParserException{ ! LinkTag linkTag = ! new LinkTag( ! new TagData(0,0,"",""), ! new CompositeTagData(null,null,null), ! new LinkData("ftp://www.someurl.com","","",false,false) ! ); ! assertTrue("This is an ftp link",linkTag.isFTPLink()); ! } ! public void testTypeJavaScript() throws ParserException { ! LinkTag linkTag = ! new LinkTag( ! new TagData(0,0,"",""), ! new CompositeTagData(null,null,null), ! new LinkData("javascript://www.someurl.com","","",false,true) ! ); ! assertTrue("This is a javascript link",linkTag.isJavascriptLink()); ! } ! public void testTypeHttpLink() throws ParserException { ! LinkTag linkTag = ! new LinkTag( ! new TagData(0,0,"",""), ! new CompositeTagData(null,null,null), ! new LinkData("http://www.someurl.com","","",false,false) ! ); ! assertTrue("This is a http link : "+linkTag.getLink(),linkTag.isHTTPLink()); ! linkTag = ! new LinkTag( ! new TagData(0,0,"",""), ! new CompositeTagData(null,null,null), ! new LinkData("somePage.html","","",false,false) ! ); ! assertTrue("This relative link is alsp a http link : "+linkTag.getLink(),linkTag.isHTTPLink()); ! linkTag = ! new LinkTag( ! new TagData(0,0,"",""), ! new CompositeTagData(null,null,null), ! new LinkData("ftp://somePage.html","","",false,false) ! ); ! assertTrue("This is not a http link : "+linkTag.getLink(),!linkTag.isHTTPLink()); ! } ! public void testTypeHttpLikeLink() throws ParserException { ! LinkTag linkTag = ! new LinkTag( ! new TagData(0,0,"",""), ! new CompositeTagData(null,null,null), ! new LinkData("http://","","",false,false) ! ); ! assertTrue("This is a http link",linkTag.isHTTPLikeLink()); ! LinkTag linkTag2 = ! new LinkTag( ! new TagData(0,0,"",""), ! new CompositeTagData(null,null,null), ! new LinkData("https://www.someurl.com","","",false,false) ! ); ! assertTrue("This is a https link",linkTag2.isHTTPLikeLink()); } ! /** ! * Bug #738504 MailLink != HTTPLink ! */ ! public void testMailToIsNotAHTTPLink () throws ParserException ! { LinkTag link; ! createParser ("<A HREF='mailto:der...@us...'>Derrick</A>","http://sourceforge.net"); ! // Register the link scanner ! parser.addScanner (new LinkScanner ("-l")); ! ! parseAndAssertNodeCount (1); ! assertTrue ("Node should be a HTMLLinkTag", node[0] instanceof LinkTag); ! link = (LinkTag)node[0]; assertTrue ("bug #738504 MailLink != HTTPLink", !link.isHTTPLink ()); assertTrue ("bug #738504 MailLink != HTTPSLink", !link.isHTTPSLink ()); ! } } --- 40,390 ---- public class LinkTagTest extends ParserTestCase { ! public LinkTagTest(String name) { ! super(name); ! } ! /** ! * The bug being reproduced is this : <BR> ! * <BODY aLink=#ff0000 bgColor=#ffffff link=#0000cc onload=setfocus() text=#000000 <BR> ! * vLink=#551a8b> ! * The above line is incorrectly parsed in that, the BODY tag is not identified. ! * Creation date: (6/17/2001 4:01:06 PM) ! */ ! public void testLinkNodeBug() throws ParserException ! { ! createParser("<A HREF=\"../test.html\">abcd</A>","http://www.google.com/test/index.html"); ! // Register the image scanner ! parser.addScanner(new LinkScanner("-l")); ! ! parseAndAssertNodeCount(1); ! // The node should be an HTMLLinkTag ! assertTrue("Node should be a HTMLLinkTag",node[0] instanceof LinkTag); ! LinkTag linkNode = (LinkTag)node[0]; ! assertEquals("The image locn","http://www.google.com/test.html",linkNode.getLink()); ! } ! /** ! * The bug being reproduced is this : <BR> ! * <BODY aLink=#ff0000 bgColor=#ffffff link=#0000cc onload=setfocus() text=#000000 <BR> ! * vLink=#551a8b> ! * The above line is incorrectly parsed in that, the BODY tag is not identified. ! * Creation date: (6/17/2001 4:01:06 PM) ! */ ! public void testLinkNodeBug2() throws ParserException ! { ! createParser("<A HREF=\"../../test.html\">abcd</A>","http://www.google.com/test/test/index.html"); ! // Register the image scanner ! parser.addScanner(new LinkScanner("-l")); ! ! parseAndAssertNodeCount(1); ! // The node should be an HTMLLinkTag ! assertTrue("Node should be a HTMLLinkTag",node[0] instanceof LinkTag); ! LinkTag linkNode = (LinkTag)node[0]; ! assertEquals("The image locn","http://www.google.com/test.html",linkNode.getLink()); ! } ! ! /** ! * The bug being reproduced is this : <BR> ! * When a url ends with a slash, and the link begins with a slash,the parser puts two slashes ! * This bug was submitted by Roget Kjensrud ! * Creation date: (6/17/2001 4:01:06 PM) ! */ ! public void testLinkNodeBug3() throws ParserException ! { ! createParser("<A HREF=\"/mylink.html\">abcd</A>","http://www.cj.com/"); ! // Register the image scanner ! parser.addScanner(new LinkScanner("-l")); ! ! parseAndAssertNodeCount(1); ! // The node should be an HTMLLinkTag ! assertTrue("Node should be a HTMLLinkTag",node[0] instanceof LinkTag); ! LinkTag linkNode = (LinkTag)node[0]; ! assertEquals("Link incorrect","http://www.cj.com/mylink.html",linkNode.getLink()); ! } ! ! /** ! * The bug being reproduced is this : <BR> ! * Simple url without index.html, doesent get appended to link ! * This bug was submitted by Roget Kjensrud ! * Creation date: (6/17/2001 4:01:06 PM) ! */ ! public void testLinkNodeBug4() throws ParserException ! { ! createParser("<A HREF=\"/mylink.html\">abcd</A>","http://www.cj.com"); ! // Register the image scanner ! parser.addScanner(new LinkScanner("-l")); ! ! parseAndAssertNodeCount(1); ! // The node should be an HTMLLinkTag ! assertTrue("Node should be a HTMLLinkTag",node[0] instanceof LinkTag); ! LinkTag linkNode = (LinkTag)node[0]; ! assertEquals("Link incorrect!!","http://www.cj.com/mylink.html",linkNode.getLink()); ! } ! ! public void testLinkNodeBug5() throws ParserException ! { ! createParser("<a href=http://note.kimo.com.tw/>µ§°O</a> <a \n"+ ! "href=http://photo.kimo.com.tw/>¬Ûï</a> <a\n"+ ! "href=http://address.kimo.com.tw/>³q°T¿ý</a> ","http://www.cj.com"); ! Parser.setLineSeparator("\r\n"); ! // Register the image scanner ! parser.addScanner(new LinkScanner("-l")); ! ! parseAndAssertNodeCount(6); ! // The node should be an LinkTag ! assertTrue("Node should be a LinkTag",node[0] instanceof LinkTag); ! LinkTag linkNode = (LinkTag)node[2]; ! assertStringEquals("Link incorrect!!","http://photo.kimo.com.tw",linkNode.getLink()); ! assertEquals("Link beginning",new Integer(48),new Integer(linkNode.elementBegin())); ! assertEquals("Link ending",new Integer(38),new Integer(linkNode.elementEnd())); ! ! LinkTag linkNode2 = (LinkTag)node[4]; ! assertStringEquals("Link incorrect!!","http://address.kimo.com.tw",linkNode2.getLink()); ! assertEquals("Link beginning",new Integer(46),new Integer(linkNode2.elementBegin())); ! assertEquals("Link ending",new Integer(42),new Integer(linkNode2.elementEnd())); ! } ! ! /** ! * This bug occurs when there is a null pointer exception thrown while scanning a tag using LinkScanner. ! * Creation date: (7/1/2001 2:42:13 PM) ! */ ! public void testLinkNodeBugNullPointerException() throws ParserException ! { ! createParser("<FORM action=http://search.yahoo.com/bin/search name=f><MAP name=m><AREA\n"+ ! "coords=0,0,52,52 href=\"http://www.yahoo.com/r/c1\" shape=RECT><AREA"+ ! "coords=53,0,121,52 href=\"http://www.yahoo.com/r/p1\" shape=RECT><AREA"+ ! "coords=122,0,191,52 href=\"http://www.yahoo.com/r/m1\" shape=RECT><AREA"+ ! "coords=441,0,510,52 href=\"http://www.yahoo.com/r/wn\" shape=RECT>","http://www.cj.com/"); ! // Register the image scanner ! parser.addScanner(new LinkScanner("-l")); ! parseAndAssertNodeCount(6); ! } ! /** ! * This bug occurs when there is a null pointer exception thrown while scanning a tag using LinkScanner. ! * Creation date: (7/1/2001 2:42:13 PM) ! */ ! public void testLinkNodeMailtoBug() throws ParserException ! { ! createParser("<A HREF='mailto:so...@ya...'>hello</A>","http://www.cj.com/"); ! // Register the image scanner ! parser.addScanner(new LinkScanner("-l")); ! ! parseAndAssertNodeCount(1); ! assertTrue("Node should be a HTMLLinkTag",node[0] instanceof LinkTag); ! LinkTag linkNode = (LinkTag)node[0]; ! assertStringEquals("Link incorrect","so...@ya...",linkNode.getLink()); ! assertEquals("Link Type",new Boolean(true),new Boolean(linkNode.isMailLink())); ! } ! ! /** ! * This bug occurs when there is a null pointer exception thrown while scanning a tag using LinkScanner. ! * Creation date: (7/1/2001 2:42:13 PM) ! */ ! public void testLinkNodeSingleQuoteBug() throws ParserException ! { ! createParser("<A HREF='abcd.html'>hello</A>","http://www.cj.com/"); ! ! // Register the image scanner ! parser.addScanner(new LinkScanner("-l")); ! ! parseAndAssertNodeCount(1); ! assertTrue("Node should be a HTMLLinkTag",node[0] instanceof LinkTag); ! LinkTag linkNode = (LinkTag)node[0]; ! assertEquals("Link incorrect","http://www.cj.com/abcd.html",linkNode.getLink()); ! } ! ! /** ! * The bug being reproduced is this : <BR> ! * <BODY aLink=#ff0000 bgColor=#ffffff link=#0000cc onload=setfocus() text=#000000 <BR> ! * vLink=#551a8b> ! * The above line is incorrectly parsed in that, the BODY tag is not identified. ! * Creation date: (6/17/2001 4:01:06 PM) ! */ ! public void testLinkTag() throws ParserException ! { ! createParser("<A HREF=\"test.html\">abcd</A>","http://www.google.com/test/index.html"); ! // Register the image scanner ! parser.addScanner(new LinkScanner("-l")); ! ! parseAndAssertNodeCount(1); ! // The node should be an HTMLLinkTag ! assertTrue("Node should be a HTMLLinkTag",node[0] instanceof LinkTag); ! LinkTag LinkTag = (LinkTag)node[0]; ! assertEquals("The image locn","http://www.google.com/test/test.html",LinkTag.getLink()); ! } ! ! /** ! * The bug being reproduced is this : <BR> ! * <BODY aLink=#ff0000 bgColor=#ffffff link=#0000cc onload=setfocus() text=#000000 <BR> ! * vLink=#551a8b> ! * The above line is incorrectly parsed in that, the BODY tag is not identified. ! * Creation date: (6/17/2001 4:01:06 PM) ! */ ! public void testLinkTagBug() throws ParserException ! { ! createParser("<A HREF=\"../test.html\">abcd</A>","http://www.google.com/test/index.html"); ! // Register the image scanner ! parser.addScanner(new LinkScanner("-l")); ! ! parseAndAssertNodeCount(1); ! // The node should be an HTMLLinkTag ! assertTrue("Node should be a HTMLLinkTag",node[0] instanceof LinkTag); ! LinkTag LinkTag = (LinkTag)node[0]; ! assertEquals("The image locn","http://www.google.com/test.html",LinkTag.getLink()); ! } ! ! /** ! * The bug being reproduced is this : <BR> ! * <A HREF=>Something<A><BR> ! * vLink=#551a8b> ! * The above line is incorrectly parsed in that, the BODY tag is not identified. ! * Creation date: (6/17/2001 4:01:06 PM) ! */ ! public void testNullTagBug() throws ParserException ! { ! createParser("<A HREF=>Something</A>","http://www.google.com/test/index.html"); ! // Register the image scanner ! parser.addScanner(new LinkScanner("-l")); ! ! parseAndAssertNodeCount(1); ! // The node should be an HTMLLinkTag ! assertTrue("Node should be a HTMLLinkTag",node[0] instanceof LinkTag); ! LinkTag linkTag = (LinkTag)node[0]; ! assertEquals("The link location","",linkTag.getLink()); ! assertEquals("The link text","Something",linkTag.getLinkText()); ! } ! ! public void testToPlainTextString() throws ParserException { ! createParser("<A HREF='mailto:so...@ya...'>hello</A>","http://www.cj.com/"); ! // Register the image scanner ! parser.addScanner(new LinkScanner("-l")); ! ! parseAndAssertNodeCount(1); ! assertTrue("Node should be a HTMLLinkTag",node[0] instanceof LinkTag); ! LinkTag linkTag = (LinkTag)node[0]; ! assertEquals("Link Plain Text","hello",linkTag.toPlainTextString()); ! } ! ! public void testToHTML() throws ParserException { ! createParser("<A HREF='mailto:so...@ya...'>hello</A>\n"+ ! "<LI><font color=\"FF0000\" size=-1><b>Tech Samachar:</b></font><a \n"+ ! "href=\"http://ads.samachar.com/bin/redirect/tech.txt?http://www.samachar.com/tech\n"+ ! "nical.html\"> Journalism 3.0</a> by Rajesh Jain","http://www.cj.com/"); ! Parser.setLineSeparator("\r\n"); ! // Register the image scanner ! parser.addScanner(new LinkScanner("-l")); ! ! parseAndAssertNodeCount(9); ! assertTrue("First Node should be a HTMLLinkTag",node[0] instanceof LinkTag); ! LinkTag linkTag = (LinkTag)node[0]; ! assertStringEquals("Link Raw Text","<A HREF=\"mailto:so...@ya...\">hello</A>",linkTag.toHtml()); ! assertTrue("Eighth Node should be a HTMLLinkTag",node[7] instanceof LinkTag); ! linkTag = (LinkTag)node[7]; ! assertStringEquals("Link Raw Text","<A HREF=\"http://ads.samachar.com/bin/redirect/tech.txt?http://www.samachar.com/tech\r\nnical.html\"> Journalism 3.0</A>",linkTag.toHtml()); ! } ! public void testTypeHttps() throws ParserException{ ! LinkTag linkTag = ! new LinkTag( ! new TagData(0,0,"",""), ! new CompositeTagData(null,null,null), ! new LinkData("https://www.someurl.com","","",false,false) ! ); ! assertTrue("This is a https link",linkTag.isHTTPSLink()); ! } ! public void testTypeFtp() throws ParserException{ ! LinkTag linkTag = ! new LinkTag( ! new TagData(0,0,"",""), ! new CompositeTagData(null,null,null), ! new LinkData("ftp://www.someurl.com","","",false,false) ! ); ! assertTrue("This is an ftp link",linkTag.isFTPLink()); ! } ! public void testTypeJavaScript() throws ParserException { ! LinkTag linkTag = ! new LinkTag( ! new TagData(0,0,"",""), ! new CompositeTagData(null,null,null), ! new LinkData("javascript://www.someurl.com","","",false,true) ! ); ! assertTrue("This is a javascript link",linkTag.isJavascriptLink()); ! } ! public void testTypeHttpLink() throws ParserException { ! LinkTag linkTag = ! new LinkTag( ! new TagData(0,0,"",""), ! new CompositeTagData(null,null,null), ! new LinkData("http://www.someurl.com","","",false,false) ! ); ! assertTrue("This is a http link : "+linkTag.getLink(),linkTag.isHTTPLink()); ! linkTag = ! new LinkTag( ! new TagData(0,0,"",""), ! new CompositeTagData(null,null,null), ! new LinkData("somePage.html","","",false,false) ! ); ! assertTrue("This relative link is alsp a http link : "+linkTag.getLink(),linkTag.isHTTPLink()); ! linkTag = ! new LinkTag( ! new TagData(0,0,"",""), ! new CompositeTagData(null,null,null), ! new LinkData("ftp://somePage.html","","",false,false) ! ); ! assertTrue("This is not a http link : "+linkTag.getLink(),!linkTag.isHTTPLink()); ! } ! public void testTypeHttpLikeLink() throws ParserException { ! LinkTag linkTag = ! new LinkTag( ! new TagData(0,0,"",""), ! new CompositeTagData(null,null,null), ! new LinkData("http://","","",false,false) ! ); ! assertTrue("This is a http link",linkTag.isHTTPLikeLink()); ! LinkTag linkTag2 = ! new LinkTag( ! new TagData(0,0,"",""), ! new CompositeTagData(null,null,null), ! new LinkData("https://www.someurl.com","","",false,false) ! ); ! assertTrue("This is a https link",linkTag2.isHTTPLikeLink()); } ! /** ! * Bug #738504 MailLink != HTTPLink ! */ ! public void testMailToIsNotAHTTPLink () throws ParserException ! { LinkTag link; ! createParser ("<A HREF='mailto:der...@us...'>Derrick</A>","http://sourceforge.net"); ! // Register the link scanner ! parser.addScanner (new LinkScanner ("-l")); ! ! parseAndAssertNodeCount (1); ! assertTrue ("Node should be a HTMLLinkTag", node[0] instanceof LinkTag); ! link = (LinkTag)node[0]; assertTrue ("bug #738504 MailLink != HTTPLink", !link.isHTTPLink ()); assertTrue ("bug #738504 MailLink != HTTPSLink", !link.isHTTPSLink ()); ! } ! ! /** ! * Bug #784767 irc://server/channel urls are HTTPLike? ! */ ! public void testIrcIsNotAHTTPLink () throws ParserException ! { ! LinkTag link; ! ! link = new LinkTag( ! new TagData(0,0,"",""), ! new CompositeTagData(null,null,null), ! new LinkData("irc://server/channel","","",false,false) ! ); ! assertTrue("This is not a http link", !link.isHTTPLikeLink ()); ! } } |
From: <der...@us...> - 2003-09-01 21:28:44
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags In directory sc8-pr-cvs1:/tmp/cvs-serv5770/tags Modified Files: LinkTag.java Log Message: Fix bug #784767 irc://server/channel urls are HTTPLike? Added an isIRCLink() method, but, I'm not sure that isHTTPLikeLink() should use the gainsaying of all know link types. This needs review. Index: LinkTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/LinkTag.java,v retrieving revision 1.31 retrieving revision 1.32 diff -C2 -d -r1.31 -r1.32 *** LinkTag.java 24 Aug 2003 21:59:42 -0000 1.31 --- LinkTag.java 1 Sep 2003 21:28:33 -0000 1.32 *************** *** 41,173 **** public class LinkTag extends CompositeTag { ! public static final String LINK_TAG_FILTER="-l"; ! /** ! * The URL where the link points to ! */ ! protected String link; ! /** ! * The text of of the link element ! */ ! protected String linkText; ! /** ! * The accesskey existing inside this link. ! */ ! protected String accessKey; ! private boolean mailLink; ! private boolean javascriptLink; ! ! /** ! * Constructor creates an HTMLLinkNode object, which basically stores the location ! * where the link points to, and the text it contains. ! * <p> ! * In order to get the contents of the link tag, use the method linkData(), ! * which returns an enumeration of nodes encapsulated within the link. ! * <p> ! * The following code will get all the images inside a link tag. ! * <pre> ! * Node node ; ! * ImageTag imageTag; ! * for (Enumeration e=linkTag.linkData();e.hasMoreElements();) { ! * node = (Node)e.nextElement(); ! * if (node instanceof ImageTag) { ! * imageTag = (ImageTag)node; ! * // Process imageTag ! * } ! * } ! * </pre> ! * There is another mechanism available that allows for uniform extraction of images. You could do this to ! * get all images from a web page : ! * <pre> ! * Node node; ! * Vector imageCollectionVector = new Vector(); ! * for (NodeIterator e = parser.elements();e.hasMoreNode();) { ! * node = e.nextHTMLNode(); ! * node.collectInto(imageCollectionVector,ImageTag.IMAGE_FILTER); ! * } ! * </pre> ! * The link tag processes all its contents in collectInto(). ! * @param tagData The data relating to the tag. ! * @param compositeTagData The data regarding the composite structure of the tag. ! * @param linkData The data specific to the link tag. ! * @see #linkData() ! */ ! public LinkTag(TagData tagData,CompositeTagData compositeTagData,LinkData linkData) { ! super(tagData,compositeTagData); ! this.link = linkData.getLink(); ! this.linkText = linkData.getLinkText(); ! this.accessKey = linkData.getAccessKey(); ! this.mailLink = linkData.isMailLink(); ! this.javascriptLink = linkData.isJavascriptLink(); ! } ! /** ! * Returns the accesskey element if any inside this link tag */ ! public String getAccessKey() ! { ! return accessKey; ! } ! /** ! * Returns the url as a string, to which this link points ! */ ! public String getLink() ! { ! return link; ! } ! /** ! * Returns the text contained inside this link tag ! */ ! public String getLinkText() ! { ! return linkText; ! } ! /** ! * Return the text contained in this linkinode ! * Kaarle Kaila 23.10.2001 ! */ ! public String getText() ! { ! return toHtml(); ! } ! /** ! * Is this a mail address ! * @return boolean true/false ! */ ! public boolean isMailLink() { ! return mailLink; ! } /** ! * Tests if the link is javascript ! * @return flag indicating if the link is a javascript code ! */ public boolean isJavascriptLink() { ! return javascriptLink; ! } /** ! * Tests if the link is an FTP link. ! * ! * @return flag indicating if this link is an FTP link ! */ public boolean isFTPLink() { ! return link.indexOf("ftp://")==0; ! } /** ! * Tests if the link is an HTTP link. ! * ! * @return flag indicating if this link is an HTTP link ! */ public boolean isHTTPLink() { ! return (!isFTPLink() && !isHTTPSLink() && !isJavascriptLink() && !isMailLink()); ! } ! /** ! * Tests if the link is an HTTPS link. ! * ! * @return flag indicating if this link is an HTTPS link ! */ public boolean isHTTPSLink() { return link.indexOf("https://")==0; --- 41,181 ---- public class LinkTag extends CompositeTag { ! public static final String LINK_TAG_FILTER="-l"; ! /** ! * The URL where the link points to ! */ ! protected String link; ! /** ! * The text of of the link element ! */ ! protected String linkText; ! /** ! * The accesskey existing inside this link. ! */ ! protected String accessKey; ! private boolean mailLink; ! private boolean javascriptLink; ! ! /** ! * Constructor creates an HTMLLinkNode object, which basically stores the location ! * where the link points to, and the text it contains. ! * <p> ! * In order to get the contents of the link tag, use the method linkData(), ! * which returns an enumeration of nodes encapsulated within the link. ! * <p> ! * The following code will get all the images inside a link tag. ! * <pre> ! * Node node ; ! * ImageTag imageTag; ! * for (Enumeration e=linkTag.linkData();e.hasMoreElements();) { ! * node = (Node)e.nextElement(); ! * if (node instanceof ImageTag) { ! * imageTag = (ImageTag)node; ! * // Process imageTag ! * } ! * } ! * </pre> ! * There is another mechanism available that allows for uniform extraction of images. You could do this to ! * get all images from a web page : ! * <pre> ! * Node node; ! * Vector imageCollectionVector = new Vector(); ! * for (NodeIterator e = parser.elements();e.hasMoreNode();) { ! * node = e.nextHTMLNode(); ! * node.collectInto(imageCollectionVector,ImageTag.IMAGE_FILTER); ! * } ! * </pre> ! * The link tag processes all its contents in collectInto(). ! * @param tagData The data relating to the tag. ! * @param compositeTagData The data regarding the composite structure of the tag. ! * @param linkData The data specific to the link tag. ! * @see #linkData() ! */ ! public LinkTag(TagData tagData,CompositeTagData compositeTagData,LinkData linkData) { ! super(tagData,compositeTagData); ! this.link = linkData.getLink(); ! this.linkText = linkData.getLinkText(); ! this.accessKey = linkData.getAccessKey(); ! this.mailLink = linkData.isMailLink(); ! this.javascriptLink = linkData.isJavascriptLink(); ! } ! /** ! * Returns the accesskey element if any inside this link tag */ ! public String getAccessKey() ! { ! return accessKey; ! } ! /** ! * Returns the url as a string, to which this link points ! */ ! public String getLink() ! { ! return link; ! } ! /** ! * Returns the text contained inside this link tag ! */ ! public String getLinkText() ! { ! return linkText; ! } ! /** ! * Return the text contained in this linkinode ! * Kaarle Kaila 23.10.2001 ! */ ! public String getText() ! { ! return toHtml(); ! } ! /** ! * Is this a mail address ! * @return boolean true/false ! */ ! public boolean isMailLink() { ! return mailLink; ! } /** ! * Tests if the link is javascript ! * @return flag indicating if the link is a javascript code ! */ public boolean isJavascriptLink() { ! return javascriptLink; ! } /** ! * Tests if the link is an FTP link. ! * ! * @return flag indicating if this link is an FTP link ! */ public boolean isFTPLink() { ! return link.indexOf("ftp://")==0; ! } /** ! * Tests if the link is an IRC link. ! * @return flag indicating if this link is an IRC link ! */ ! public boolean isIRCLink() { ! return link.indexOf("irc://")==0; ! } ! ! /** ! * Tests if the link is an HTTP link. ! * ! * @return flag indicating if this link is an HTTP link ! */ public boolean isHTTPLink() { ! return (!isFTPLink() && !isHTTPSLink() && !isJavascriptLink() && !isMailLink() && !isIRCLink()); ! } ! /** ! * Tests if the link is an HTTPS link. ! * ! * @return flag indicating if this link is an HTTPS link ! */ public boolean isHTTPSLink() { return link.indexOf("https://")==0; *************** *** 175,182 **** /** ! * Tests if the link is an HTTP link or one of its variations (HTTPS, etc.). ! * ! * @return flag indicating if this link is an HTTP link or one of its variations (HTTPS, etc.) ! */ public boolean isHTTPLikeLink() { return isHTTPLink() || isHTTPSLink(); --- 183,190 ---- /** ! * Tests if the link is an HTTP link or one of its variations (HTTPS, etc.). ! * ! * @return flag indicating if this link is an HTTP link or one of its variations (HTTPS, etc.) ! */ public boolean isHTTPLikeLink() { return isHTTPLink() || isHTTPSLink(); *************** *** 184,249 **** ! /** ! * Insert the method's description here. ! * Creation date: (8/3/2001 1:49:31 AM) ! * @param newMailLink boolean ! */ ! public void setMailLink(boolean newMailLink) { ! mailLink = newMailLink; ! } ! /** ! * Set the link as a javascript link. ! * ! * @param newJavascriptLink flag indicating if the link is a javascript code ! */ ! public void setJavascriptLink(boolean newJavascriptLink) { ! javascriptLink = newJavascriptLink; ! } ! /** ! * Print the contents of this Link Node ! */ ! public String toString() ! { ! StringBuffer sb = new StringBuffer(); ! sb.append("Link to : "+link + "; titled : "+linkText+"; begins at : "+elementBegin()+"; ends at : "+elementEnd()+ ", AccessKey="); ! if (accessKey==null) sb.append("null\n"); ! else sb.append(accessKey+"\n"); ! if (children()!=null) ! { ! sb.append(" "+"LinkData\n"); ! sb.append(" "+"--------\n"); ! ! Node node; ! int i = 0; ! for (SimpleNodeIterator e=children();e.hasMoreNodes();) ! { ! node = (Node)e.nextNode(); ! sb.append(" "+(i++)+ " "); ! sb.append(node.toString()+"\n"); ! } ! } ! sb.append(" "+"*** END of LinkData ***\n"); ! return sb.toString(); ! } ! public void setLink(String link) { ! this.link = link; setAttribute ("HREF", link); ! } ! /** ! * This method returns an enumeration of data that it contains ! * @return Enumeration ! * @deprecated Use children() instead. ! */ ! public SimpleNodeIterator linkData() { ! return children(); ! } ! ! public void accept(NodeVisitor visitor) { ! visitor.visitLinkTag(this); ! super.accept(visitor); ! } } --- 192,257 ---- ! /** ! * Insert the method's description here. ! * Creation date: (8/3/2001 1:49:31 AM) ! * @param newMailLink boolean ! */ ! public void setMailLink(boolean newMailLink) { ! mailLink = newMailLink; ! } ! /** ! * Set the link as a javascript link. ! * ! * @param newJavascriptLink flag indicating if the link is a javascript code ! */ ! public void setJavascriptLink(boolean newJavascriptLink) { ! javascriptLink = newJavascriptLink; ! } ! /** ! * Print the contents of this Link Node ! */ ! public String toString() ! { ! StringBuffer sb = new StringBuffer(); ! sb.append("Link to : "+link + "; titled : "+linkText+"; begins at : "+elementBegin()+"; ends at : "+elementEnd()+ ", AccessKey="); ! if (accessKey==null) sb.append("null\n"); ! else sb.append(accessKey+"\n"); ! if (children()!=null) ! { ! sb.append(" "+"LinkData\n"); ! sb.append(" "+"--------\n"); ! ! Node node; ! int i = 0; ! for (SimpleNodeIterator e=children();e.hasMoreNodes();) ! { ! node = (Node)e.nextNode(); ! sb.append(" "+(i++)+ " "); ! sb.append(node.toString()+"\n"); ! } ! } ! sb.append(" "+"*** END of LinkData ***\n"); ! return sb.toString(); ! } ! public void setLink(String link) { ! this.link = link; setAttribute ("HREF", link); ! } ! /** ! * This method returns an enumeration of data that it contains ! * @return Enumeration ! * @deprecated Use children() instead. ! */ ! public SimpleNodeIterator linkData() { ! return children(); ! } ! ! public void accept(NodeVisitor visitor) { ! visitor.visitLinkTag(this); ! super.accept(visitor); ! } } |
From: <der...@us...> - 2003-09-01 20:48:35
|
Update of /cvsroot/htmlparser/htmlparser/docs/samples In directory sc8-pr-cvs1:/tmp/cvs-serv29574 Modified Files: links.html Log Message: Fix bug #786869 LinkExtractor Sample not working. Index: links.html =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/docs/samples/links.html,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** links.html 15 Dec 2002 03:41:25 -0000 1.2 --- links.html 1 Sep 2003 20:48:29 -0000 1.3 *************** *** 4,7 **** --- 4,10 ---- <title>Link and Mail Extractor </title> <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> + <style type="text/css"> + <!--code { font-family: Courier New, Courier; font-size: 10pt; margin: 0px; }--> + </style> </head> *************** *** 11,134 **** the parserApplications package (in the download bundle, this will be in src.zip).</p> <p> ! <!-- ======================================================== --> ! <!-- = Java Sourcecode to HTML automatically converted code = --> ! <!-- = J2H V2.0 2002 by Markus Gebhard ma...@ja... = --> ! <!-- = Further information: http://www.java2html.de = --> ! </p> <center> ! <table align="center" border="2" cellpadding="3" cellspacing="0" bgcolor="#FFFBF0"> <tr> - <!-- start line numbers --> - <td align="right" valign="top"> <code> <font color="#808080"> 1<br> - 2<br> - 3<br> - 4<br> - 5<br> - 6<br> - 7<br> - 8<br> - 9<br> - 10<br> - 11<br> - 12<br> - 13<br> - 14<br> - 15<br> - 16<br> - <a name="17"></a>17<br> - <a name="18"></a>18<br> - 19<br> - 20<br> - 21<br> - 22<br> - 23<br> - 24<br> - 25<br> - 26<br> - 27<br> - 28<br> - 29<br> - 30<br> - <a name="31"></a>31<br> - 32<br> - 33<br> - 34<br> - 35<br> - 36<br> - 37<br> - 38<br> - 39<br> - 40<br> - 41<br> - 42<br> - 43<br> - 44<br> - 45<br> - 46<br> - 47<br> - 48<br> - 49<br> - 50<br> - </font> </code> </td> - <!-- end line numbers --> <!-- start source code --> ! <td valign="top"> <code> <font color="#0000c0">import </font><font color="#000000"></font><font color="#000000">org.htmlparser.HTMLNode;<br> ! </font><font color="#0000c0">import </font><font color="#000000">org.htmlparser.HTMLParser;<br> ! </font><font color="#0000c0">import </font><font color="#000000">org.htmlparser.tags.HTMLLinkTag;<br> ! </font><font color="#0000c0">import </font><font color="#000000">org.htmlparser.util.HTMLEnumeration;<br> ! </font><font color="#0000c0">import </font><font color="#000000">org.htmlparser.util.HTMLParserException;<br> ! <br> ! </font><font color="#008000">/**<br> ! * LinkExtractor extracts all the links from the given webpage<br> ! * and prints them on standard output.<br> ! */<br> ! </font><font color="#0000c0">public class </font><font color="#000000">LinkExtractor </font><font color="#000000">{<br> ! </font><font color="#0000c0">private </font><font color="#000000">String location;<br> ! </font><font color="#0000c0">private </font><font color="#000000">HTMLParser parser;<br> ! </font><font color="#0000c0">public </font><font color="#000000">LinkExtractor</font><font color="#000000">(</font><font color="#000000">String location</font><font color="#000000">) {<br> ! </font><font color="#0000c0">this</font><font color="#000000">.location = location;<br> ! </font><font color="#0000c0">try </font><font color="#000000">{<br> ! </font><font color="#0000c0">this</font><font color="#000000">.parser = </font><font color="#0000c0">new </font><font color="#000000">HTMLParser</font><font color="#000000">(</font><font color="#000000">location</font><font color="#000000">)</font><font color="#000000">; </font><font color="#008000">// Create the parser object<br> ! </font><font color="#000000">parser.registerScanners</font><font color="#000000">()</font><font color="#000000">; </font><font color="#008000">// Register standard scanners (Very Important)<br> ! </font><font color="#000000">}<br> ! </font><font color="#0000c0">catch </font><font color="#000000">(</font><font color="#000000">HTMLParserException e</font><font color="#000000">) {<br> ! </font><font color="#000000">e.printStackTrace</font><font color="#000000">()</font><font color="#000000">;<br> ! </font><font color="#000000">}<br> ! <br> ! }<br> ! </font><font color="#0000c0">public </font><font color="#c00000">void </font><font color="#000000">extractLinks</font><font color="#000000">() </font><font color="#0000c0">throws </font><font color="#000000">HTMLParserException </font><font color="#000000">{<br> ! </font><font color="#000000">HTMLNode node;<br> ! HTMLLinkTag linkTag;<br> ! System.out.println</font><font color="#000000">(</font><font color="#990000">"Parsing "</font><font color="#000000">+location+</font><font color="#990000">" for links..."</font><font color="#000000">)</font><font color="#000000">;<br> ! </font><font color="#0000c0">for </font><font color="#000000">(</font><font color="#000000">HTMLEnumeration e = parser.elements</font><font color="#000000">()</font><font color="#000000">; ! e.hasMoreNodes</font><font color="#000000">()</font><font color="#000000">;</font><font color="#000000">) {<br> ! </font><font color="#000000">node = e.nextHTMLNode</font><font color="#000000">()</font><font color="#000000">; </font><font color="#008000">// Get the next HTML Node<br> ! </font><font color="#0000c0">if </font><font color="#000000">(</font><font color="#000000">node </font><font color="#0000c0">instanceof </font><font color="#000000">HTMLLinkTag</font><font color="#000000">) {<br> ! </font><font color="#000000">linkTag = </font><font color="#000000">(</font><font color="#000000">HTMLLinkTag</font><font color="#000000">)</font><font color="#000000">node; </font><font color="#008000">// Downcast to a Link Tag<br> ! </font><font color="#000000">linkTag.print</font><font color="#000000">()</font><font color="#000000">; </font><font color="#008000">// Print it<br> ! </font><font color="#000000">}<br> ! }<br> ! }<br> ! </font><font color="#0000c0">public static </font><font color="#c00000">void </font><font color="#000000">main</font><font color="#000000">(</font><font color="#000000">String</font><font color="#000000">[] </font><font color="#000000">args</font><font color="#000000">) {<br> ! </font><font color="#0000c0">if </font><font color="#000000">(</font><font color="#000000">args.length<</font><font color="#990000">0</font><font color="#000000">) {<br> ! </font><font color="#000000">System.err.println</font><font color="#000000">(</font><font color="#990000">"Syntax Error : Please provide the location(URL or file) to parse"</font><font color="#000000">)</font><font color="#000000">;<br> ! System.exit</font><font color="#000000">(</font><font color="#000000">-</font><font color="#990000">1</font><font color="#000000">)</font><font color="#000000">;<br> ! </font><font color="#000000">}<br> ! </font><font color="#000000">LinkExtractor linkExtractor = </font><font color="#0000c0">new </font><font color="#000000">LinkExtractor</font><font color="#000000">(</font><font color="#000000">args</font><font color="#000000">[</font><font color="#990000">0</font><font color="#000000">])</font><font color="#000000">;<br> ! </font><font color="#0000c0">try </font><font color="#000000">{<br> ! </font><font color="#000000">linkExtractor.extractLinks</font><font color="#000000">()</font><font color="#000000">;<br> ! </font><font color="#000000">}<br> ! </font><font color="#0000c0">catch </font><font color="#000000">(</font><font color="#000000">HTMLParserException e</font><font color="#000000">) {<br> ! </font><font color="#000000">e.printStackTrace</font><font color="#000000">()</font><font color="#000000">;<br> ! </font><font color="#000000">}<br> ! }<br> ! </font><font color="#000000">}</font></code> </td> - </tr> <!-- end source code --> <!-- start J2H link --> <tr> ! <td colspan=2 align=right> <small> <a href="http://www.java2html.de" target="_blank">Java2html</a> --- 14,85 ---- the parserApplications package (in the download bundle, this will be in src.zip).</p> <p> ! ! <!-- ======================================================== --> ! <!-- = Java Sourcecode to HTML automatically converted code = --> ! <!-- = Java to HTML Converter V3.2 2003 by Markus Gebhard ma...@ja... = --> ! <!-- = Further information: http://www.java2html.de = --> <center> ! <table align="center" border="2" cellpadding="3" cellspacing="0" bgcolor="#ffffff"> <tr> <!-- start source code --> ! <td nowrap valign="top" align="left"> ! <code> ! <font color="#808080">01 </font><font color="#0000c0"><b>import </b></font><font color="#000000">org.htmlparser.Node;</font><br> ! <font color="#808080">02 </font><font color="#0000c0"><b>import </b></font><font color="#000000">org.htmlparser.Parser;</font><br> ! <font color="#808080">03 </font><font color="#0000c0"><b>import </b></font><font color="#000000">org.htmlparser.tags.LinkTag;</font><br> ! <font color="#808080">04 </font><font color="#0000c0"><b>import </b></font><font color="#000000">org.htmlparser.util.ParserException;</font><br> ! <font color="#808080">05 </font><font color="#ffffff"></font><br> ! <font color="#808080">06 </font><font color="#008000">/**</font><br> ! <font color="#808080">07 </font><font color="#ffffff"> </font><font color="#008000">* LinkExtractor extracts all the links from the given webpage</font><br> ! <font color="#808080">08 </font><font color="#ffffff"> </font><font color="#008000">* and prints them on standard output.</font><br> ! <font color="#808080">09 </font><font color="#ffffff"> </font><font color="#008000">*/</font><br> ! <font color="#808080">10 </font><font color="#0000c0"><b>public class </b></font><font color="#000000">LinkExtractor </font><font color="#000000">{</font><br> ! <font color="#808080">11 </font><font color="#ffffff"> </font><font color="#0000c0"><b>private </b></font><font color="#000000">String location;</font><br> ! <font color="#808080">12 </font><font color="#ffffff"> </font><font color="#0000c0"><b>private </b></font><font color="#000000">Parser parser;</font><br> ! <font color="#808080">13 </font><font color="#ffffff"> </font><font color="#0000c0"><b>public </b></font><font color="#000000">LinkExtractor</font><font color="#000000">(</font><font color="#000000">String location</font><font color="#000000">) {</font><br> ! <font color="#808080">14 </font><font color="#ffffff"> </font><font color="#0000c0"><b>this</b></font><font color="#000000">.location = location;</font><br> ! <font color="#808080">15 </font><font color="#ffffff"> </font><font color="#0000c0"><b>try </b></font><font color="#000000">{</font><br> ! <font color="#808080"><a name="16">16</a> </font><font color="#ffffff"> </font><font color="#0000c0"><b>this</b></font><font color="#000000">.parser = </font><font color="#0000c0"><b>new </b></font><font color="#000000">Parser</font><font color="#000000">(</font><font color="#000000">location</font><font color="#000000">)</font><font color="#000000">; </font><font color="#008000">// Create the parser object</font><br> ! <font color="#808080"><a name="17">17</a> </font><font color="#ffffff"> </font><font color="#000000">parser.registerScanners</font><font color="#000000">()</font><font color="#000000">; </font><font color="#008000">// Register standard scanners (Very Important)</font><br> ! <font color="#808080">18 </font><font color="#ffffff"> </font><font color="#000000">}</font><br> ! <font color="#808080">19 </font><font color="#ffffff"> </font><font color="#0000c0"><b>catch </b></font><font color="#000000">(</font><font color="#000000">ParserException e</font><font color="#000000">) {</font><br> ! <font color="#808080">20 </font><font color="#ffffff"> </font><font color="#000000">e.printStackTrace</font><font color="#000000">()</font><font color="#000000">;</font><br> ! <font color="#808080">21 </font><font color="#ffffff"> </font><font color="#000000">}</font><br> ! <font color="#808080">22 </font><font color="#ffffff"> </font><br> ! <font color="#808080">23 </font><font color="#ffffff"> </font><font color="#000000">}</font><br> ! <font color="#808080">24 </font><font color="#ffffff"> </font><font color="#0000c0"><b>public </b></font><font color="#c00000"><b>void </b></font><font color="#000000">extractLinks</font><font color="#000000">() </font><font color="#0000c0"><b>throws </b></font><font color="#000000">ParserException </font><font color="#000000">{</font><br> ! <font color="#808080">25 </font><font color="#ffffff"> </font><font color="#000000">System.out.println</font><font color="#000000">(</font><font color="#990000">"Parsing "</font><font color="#000000">+location+</font><font color="#990000">" for links..."</font><font color="#000000">)</font><font color="#000000">;</font><br> ! <font color="#808080"><a name="26">26</a> </font><font color="#ffffff"> </font><font color="#000000">Node </font><font color="#000000">[] </font><font color="#000000">links = parser.extractAllNodesThatAre</font><font color="#000000">(</font><font color="#000000">LinkTag.</font><font color="#0000c0"><b>class</b></font><font color="#000000">)</font><font color="#000000">;</font><br> ! <font color="#808080">27 </font><font color="#ffffff"> </font><font color="#0000c0"><b>for </b></font><font color="#000000">(</font><font color="#c00000"><b>int </b></font><font color="#000000">i = </font><font color="#990000">0</font><font color="#000000">;i < links.length;i++</font><font color="#000000">) {</font><br> ! <font color="#808080">28 </font><font color="#ffffff"> </font><font color="#000000">LinkTag linkTag = </font><font color="#000000">(</font><font color="#000000">LinkTag</font><font color="#000000">)</font><font color="#000000">links</font><font color="#000000">[</font><font color="#000000">i</font><font color="#000000">]</font><font color="#000000">;</font><br> ! <font color="#808080">29 </font><font color="#ffffff"> </font><font color="#008000">// To extract only mail addresses, uncomment the following line</font><br> ! <font color="#808080">30 </font><font color="#ffffff"> </font><font color="#008000">// if (linkTag.isMailLink())</font><br> ! <font color="#808080">31 </font><font color="#ffffff"> </font><font color="#000000">System.out.println</font><font color="#000000">(</font><font color="#000000">linkTag.getLink</font><font color="#000000">())</font><font color="#000000">;</font><br> ! <font color="#808080">32 </font><font color="#ffffff"> </font><font color="#000000">}</font><br> ! <font color="#808080">33 </font><font color="#ffffff"> </font><font color="#000000">}</font><br> ! <font color="#808080">34 </font><font color="#ffffff"></font><br> ! <font color="#808080">35 </font><font color="#ffffff"> </font><font color="#0000c0"><b>public static </b></font><font color="#c00000"><b>void </b></font><font color="#000000">main</font><font color="#000000">(</font><font color="#000000">String</font><font color="#000000">[] </font><font color="#000000">args</font><font color="#000000">) {</font><br> ! <font color="#808080">36 </font><font color="#ffffff"> </font><font color="#0000c0"><b>if </b></font><font color="#000000">(</font><font color="#000000">args.length != </font><font color="#990000">1</font><font color="#000000">) {</font><br> ! <font color="#808080">37 </font><font color="#ffffff"> </font><font color="#000000">System.err.println</font><font color="#000000">(</font><font color="#990000">"Syntax Error : Please provide the location(URL or file) to parse"</font><font color="#000000">)</font><font color="#000000">;</font><br> ! <font color="#808080">38 </font><font color="#ffffff"> </font><font color="#000000">System.exit</font><font color="#000000">(</font><font color="#000000">-</font><font color="#990000">1</font><font color="#000000">)</font><font color="#000000">;</font><br> ! <font color="#808080">39 </font><font color="#ffffff"> </font><font color="#000000">}</font><br> ! <font color="#808080">40 </font><font color="#ffffff"> </font><font color="#000000">LinkExtractor linkExtractor = </font><font color="#0000c0"><b>new </b></font><font color="#000000">LinkExtractor</font><font color="#000000">(</font><font color="#000000">args</font><font color="#000000">[</font><font color="#990000">0</font><font color="#000000">])</font><font color="#000000">;</font><br> ! <font color="#808080">41 </font><font color="#ffffff"> </font><font color="#0000c0"><b>try </b></font><font color="#000000">{</font><br> ! <font color="#808080">42 </font><font color="#ffffff"> </font><font color="#000000">linkExtractor.extractLinks</font><font color="#000000">()</font><font color="#000000">;</font><br> ! <font color="#808080">43 </font><font color="#ffffff"> </font><font color="#000000">}</font><br> ! <font color="#808080">44 </font><font color="#ffffff"> </font><font color="#0000c0"><b>catch </b></font><font color="#000000">(</font><font color="#000000">ParserException e</font><font color="#000000">) {</font><br> ! <font color="#808080">45 </font><font color="#ffffff"> </font><font color="#000000">e.printStackTrace</font><font color="#000000">()</font><font color="#000000">;</font><br> ! <font color="#808080">46 </font><font color="#ffffff"> </font><font color="#000000">}</font><br> ! <font color="#808080">47 </font><font color="#ffffff"> </font><font color="#000000">}</font><br> ! <font color="#808080">48 </font><font color="#000000">}</font><br> ! <font color="#ffffff"></font><font color="#ffffff"> ! </font></code> </td> <!-- end source code --> + </tr> <!-- start J2H link --> <tr> ! <td align="right"> <small> <a href="http://www.java2html.de" target="_blank">Java2html</a> *************** *** 143,170 **** ! ! <p>Let us look at whats really happening here. In <a href="#17">line 17</a>, we are creating a parser object that will work on a given location. The location ! can be a URL or a file. In <a href="#18">line 18</a>, we ask the parser to register itself with certain standard scanners, that give it the capability to handle ! special tags (like links, images, etc..). If line 18 is not provided, the program will not output any link tags.</p> ! <p>The extractLinks() method is the heart of the program. All it really has a ! loop that utilizes an iterator - called HTMLEnumeration. HTMLEnumeration has ! a really simple interface - hasMoreNodes() and nextHTMLNode(). This is very ! similar to java's own Enumeration (hasMoreElements() and nextElement()). The ! reason of not using Enumeration and instead having our own version of it - is ! that we'd like to save previous CPU time on the downcasts. Since we know that ! we're only dealing with HTMLNode objects, we'd like to directly return them. </p> ! <p>Every call to nextHTMLNode() advances the parser to the next object. Please ! remember that this is a streaming parser, and the elements are coming in real ! time. </p> ! <p>Now, we'd like to filter out all objects except links. <a href="#31">Line 31</a> ! accomplishes this. We check if the node is an instance of HTMLLinkTag. If it ! is, then this is the link object that has just been parsed from the webpage. ! We can make a call to its print() method, or we can print it the way we like, using its interface methods such as getLink() and getLinkText(). Try making ! the following modifications to Line 33.</p> <p> <!-- ======================================================== --> --- 94,112 ---- ! <p>Let us look at whats really happening here. In <a href="#16">line 16</a>, we are creating a parser object that will work on a given location. The location ! can be a URL or a file. In <a href="#17">line 17</a>, we ask the parser to register itself with certain standard scanners, that give it the capability to handle ! special tags (like links, images, etc..). If line 17 is not provided, the program will not output any link tags.</p> ! <p>The extractAllNodesThatAre() method call on <a href="#26">line 26</a> in ! extractLinks() is the heart of the program. As it's name suggests, it traverses ! all the nodes in the page and picks out the ones that match the class object ! provided, in this case a LinkTag class. </p> ! <p>We can now cycle through the array of nodes and print out each link URL. ! We can make a call to the LinkTag print() method, or we can print it the way we like, using its interface methods such as getLink() and getLinkText(). Try making ! the following modifications to Line 31.</p> <p> <!-- ======================================================== --> *************** *** 177,181 **** <tr> <!-- start line numbers --> ! <td align="right" valign="top"> <code> <font color="#808080"> 33<br> </font> </code> --- 119,123 ---- <tr> <!-- start line numbers --> ! <td align="right" valign="top"> <code> <font color="#808080"> 31<br> </font> </code> *************** *** 215,219 **** <tr> <!-- start line numbers --> ! <td align="right" valign="top"> <code> <font color="#808080"> 33<br> </font> </code> --- 157,161 ---- <tr> <!-- start line numbers --> ! <td align="right" valign="top"> <code> <font color="#808080"> 31<br> </font> </code> *************** *** 241,287 **** <!-- = END of automatically generated HTML code = --> <!-- ======================================================== --> ! <p>We can also check if the link is a mailto link, using the method HTMLLinkTag.isMailLink(). ! Suppose we wish to modify the above program to print all the email addresses ! from a webpage, we'd only have to modify line 33 to : </p> ! <p> ! <!-- ======================================================== --> ! <!-- = Java Sourcecode to HTML automatically converted code = --> ! <!-- = J2H V2.0 2002 by Markus Gebhard ma...@ja... = --> ! <!-- = Further information: http://www.java2html.de = --> ! </p> ! <center> ! <table align="center" border="2" cellpadding="3" cellspacing="0" bgcolor="#FFFBF0"> ! <tr> ! <!-- start line numbers --> ! <td align="right" valign="top"> <code> <font color="#808080"> 33<br> ! </font> ! </code> ! </td> ! <!-- end line numbers --> ! <!-- start source code --> ! <td valign="top"> ! <code> ! <font color="#0000c0">if </font><font color="#000000">(</font><font color="#000000">linkTag.isMailLink</font><font color="#000000">()) </font><font color="#000000">System.out.println</font><font color="#000000">(</font><font color="#000000">linkTag.getLink</font><font color="#000000">())</font><font color="#000000">;</font></code> ! ! </td> ! </tr> ! <!-- end source code --> ! <!-- start J2H link --> ! <tr> ! <td colspan=2 align=right> ! <small> ! <a href="http://www.java2html.de" target="_blank">Java2html</a> ! </small> ! </td> ! </tr> ! <!-- end J2H link --> ! </table> ! </center> ! <!-- = END of automatically generated HTML code = --> ! <!-- ======================================================== --> ! <p><strong>Limitations of this program</strong></p> ! <p>It cannot provide embedded links - links that are within other recognized tags ! (like the form tag). To uniformly extract embedded links across all tags, check ! <a href="linksEmbedded.html">Extracting Embedded Links/Images</a>.</p> <p><a href="index.html">Back to Samples</a><br> <a href="../index.html">Back to HTMLParser Home Page</a></p> --- 183,188 ---- <!-- = END of automatically generated HTML code = --> <!-- ======================================================== --> ! <p>We can also check if the link is a mailto link, using the method LinkTag.isMailLink() ! by just uncommenting line 30.</p> <p><a href="index.html">Back to Samples</a><br> <a href="../index.html">Back to HTMLParser Home Page</a></p> |
From: <der...@us...> - 2003-09-01 20:24:08
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserapplications In directory sc8-pr-cvs1:/tmp/cvs-serv25734 Modified Files: LinkExtractor.java Log Message: Fix bug #786869 LinkExtractor Sample not working. Index: LinkExtractor.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserapplications/LinkExtractor.java,v retrieving revision 1.41 retrieving revision 1.42 diff -C2 -d -r1.41 -r1.42 *** LinkExtractor.java 24 Aug 2003 21:59:42 -0000 1.41 --- LinkExtractor.java 1 Sep 2003 20:24:04 -0000 1.42 *************** *** 27,31 **** // Website : http://www.industriallogic.com - package org.htmlparser.parserapplications; --- 27,30 ---- *************** *** 40,82 **** */ public class LinkExtractor { ! private String location; ! private Parser parser; ! public LinkExtractor(String location) { ! this.location = location; ! try { ! this.parser = new Parser(location); // Create the parser object ! parser.registerScanners(); // Register standard scanners (Very Important) ! } ! catch (ParserException e) { ! e.printStackTrace(); ! } ! ! } ! public void extractLinks() throws ParserException { ! System.out.println("Parsing "+location+" for links..."); ! Node [] links = parser.extractAllNodesThatAre(LinkTag.class); ! for (int i = 0;i < links.length;i++) { ! LinkTag linkTag = (LinkTag)links[i]; ! // Print it ! // System.out.println(linkTag.toString()); ! System.out.println(linkTag.getLink()); ! // To extract only mail addresses, uncomment the following line ! // if (linkTag.isMailLink()) System.out.println(linkTag.getLink()); ! } ! } ! public static void main(String[] args) { ! if (args.length != 1) { ! System.err.println("Syntax Error : Please provide the location(URL or file) to parse"); ! System.exit(-1); ! } ! LinkExtractor linkExtractor = new LinkExtractor(args[0]); ! try { ! linkExtractor.extractLinks(); ! } ! catch (ParserException e) { ! e.printStackTrace(); ! } ! } } --- 39,79 ---- */ public class LinkExtractor { ! private String location; ! private Parser parser; ! public LinkExtractor(String location) { ! this.location = location; ! try { ! this.parser = new Parser(location); // Create the parser object ! parser.registerScanners(); // Register standard scanners (Very Important) ! } ! catch (ParserException e) { ! e.printStackTrace(); ! } ! ! } ! public void extractLinks() throws ParserException { ! System.out.println("Parsing "+location+" for links..."); ! Node [] links = parser.extractAllNodesThatAre(LinkTag.class); ! for (int i = 0;i < links.length;i++) { ! LinkTag linkTag = (LinkTag)links[i]; ! // To extract only mail addresses, uncomment the following line ! // if (linkTag.isMailLink()) ! System.out.println(linkTag.getLink()); ! } ! } ! public static void main(String[] args) { ! if (args.length != 1) { ! System.err.println("Syntax Error : Please provide the location(URL or file) to parse"); ! System.exit(-1); ! } ! LinkExtractor linkExtractor = new LinkExtractor(args[0]); ! try { ! linkExtractor.extractLinks(); ! } ! catch (ParserException e) { ! e.printStackTrace(); ! } ! } } |
From: <der...@us...> - 2003-09-01 19:56:06
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/parserHelperTests In directory sc8-pr-cvs1:/tmp/cvs-serv19769/tests/parserHelperTests Modified Files: RemarkNodeParserTest.java Log Message: Workaround for bug #788746 parser crashes on comments like <!-- foobar --!>. No real solution because the codebase assumes remarks end with -->, so this just avoids the crash, but the toHtml() output will output --!-->, which isn't really correct. Added the test case as RemarkNodeParserTest.testExclamationComment(). Index: RemarkNodeParserTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/parserHelperTests/RemarkNodeParserTest.java,v retrieving revision 1.32 retrieving revision 1.33 diff -C2 -d -r1.32 -r1.33 *** RemarkNodeParserTest.java 24 Aug 2003 21:59:43 -0000 1.32 --- RemarkNodeParserTest.java 1 Sep 2003 19:55:59 -0000 1.33 *************** *** 39,230 **** public class RemarkNodeParserTest extends ParserTestCase { ! public RemarkNodeParserTest(String name) { ! super(name); ! } ! ! /** ! * The bug being reproduced is this : <BR> ! * <!-- saved from url=(0022)http://internet.e-mail --> ! * <HTML> ! * <HEAD><META name="title" content="Training Introduction"> ! * <META name="subject" content=""> ! * <!-- ! Whats gonna happen now ? ! * --> ! * <TEST> ! * </TEST> ! * ! * The above line is incorrectly parsed - the remark is not correctly identified. ! * This bug was reported by Serge Kruppa (2002-Feb-08). ! */ ! public void testRemarkNodeBug() throws ParserException ! { ! createParser( ! "<!-- saved from url=(0022)http://internet.e-mail -->\n"+ ! "<HTML>\n"+ ! "<HEAD><META name=\"title\" content=\"Training Introduction\">\n"+ ! "<META name=\"subject\" content=\"\">\n"+ ! "<!--\n"+ ! " Whats gonna happen now ?\n"+ ! "-->\n"+ ! "<TEST>\n"+ ! "</TEST>\n"); ! Parser.setLineSeparator("\r\n"); ! parseAndAssertNodeCount(8); ! // The first node should be a HTMLRemarkNode ! assertTrue("First node should be a HTMLRemarkNode",node[0] instanceof RemarkNode); ! RemarkNode remarkNode = (RemarkNode)node[0]; ! assertEquals("Text of the remarkNode #1"," saved from url=(0022)http://internet.e-mail ",remarkNode.getText()); ! // The sixth node should be a HTMLRemarkNode ! assertTrue("Sixth node should be a HTMLRemarkNode",node[5] instanceof RemarkNode); ! remarkNode = (RemarkNode)node[5]; ! assertEquals("Text of the remarkNode #6","\r\n Whats gonna happen now ?\r\n",remarkNode.getText()); ! } ! public void testToPlainTextString() throws ParserException { ! createParser( ! "<!-- saved from url=(0022)http://internet.e-mail -->\n"+ ! "<HTML>\n"+ ! "<HEAD><META name=\"title\" content=\"Training Introduction\">\n"+ ! "<META name=\"subject\" content=\"\">\n"+ ! "<!--\n"+ ! " Whats gonna happen now ?\n"+ ! "-->\n"+ ! "<TEST>\n"+ ! "</TEST>\n"); ! Parser.setLineSeparator("\r\n"); ! parseAndAssertNodeCount(8); ! // The first node should be a HTMLRemarkNode ! assertTrue("First node should be a HTMLRemarkNode",node[0] instanceof RemarkNode); ! RemarkNode remarkNode = (RemarkNode)node[0]; ! assertEquals("Plain Text of the remarkNode #1"," saved from url=(0022)http://internet.e-mail ",remarkNode.toPlainTextString()); ! // The sixth node should be a HTMLRemarkNode ! assertTrue("Sixth node should be a HTMLRemarkNode",node[5] instanceof RemarkNode); ! remarkNode = (RemarkNode)node[5]; ! assertEquals("Plain Text of the remarkNode #6","\r\n Whats gonna happen now ?\r\n",remarkNode.getText()); ! ! } ! public void testToRawString() throws ParserException { ! createParser( ! "<!-- saved from url=(0022)http://internet.e-mail -->\n"+ ! "<HTML>\n"+ ! "<HEAD><META name=\"title\" content=\"Training Introduction\">\n"+ ! "<META name=\"subject\" content=\"\">\n"+ ! "<!--\n"+ ! " Whats gonna happen now ?\n"+ ! "-->\n"+ ! "<TEST>\n"+ ! "</TEST>\n"); ! Parser.setLineSeparator("\r\n"); ! parseAndAssertNodeCount(8); ! // The first node should be a HTMLRemarkNode ! assertTrue("First node should be a HTMLRemarkNode",node[0] instanceof RemarkNode); ! RemarkNode remarkNode = (RemarkNode)node[0]; ! assertStringEquals("Raw String of the remarkNode #1","<!-- saved from url=(0022)http://internet.e-mail -->",remarkNode.toHtml()); ! // The sixth node should be a HTMLRemarkNode ! assertTrue("Sixth node should be a HTMLRemarkNode",node[5] instanceof RemarkNode); ! remarkNode = (RemarkNode)node[5]; ! assertStringEquals("Raw String of the remarkNode #6","<!--\r\n Whats gonna happen now ?\r\n-->",remarkNode.toHtml()); ! } ! ! public void testNonRemarkNode() throws ParserException { ! createParser(" <![endif]>"); ! parseAndAssertNodeCount(2); ! // The first node should be a HTMLRemarkNode ! assertTrue("First node should be a string node",node[0] instanceof StringNode); ! assertTrue("Second node should be a Tag",node[1] instanceof Tag); ! StringNode stringNode = (StringNode)node[0]; ! Tag tag = (Tag)node[1]; ! assertEquals("Text contents"," ",stringNode.getText()); ! assertEquals("Tag Contents","![endif]",tag.getText()); ! ! } ! ! /** ! * This is the simulation of bug report 586756, submitted ! * by John Zook. ! * If all the comment contains is a blank line, it breaks ! * the state ! */ ! public void testRemarkNodeWithBlankLine() throws ParserException { ! createParser("<!--\n"+ ! "\n"+ ! "-->"); ! Parser.setLineSeparator("\r\n"); ! parseAndAssertNodeCount(1); ! assertTrue("Node should be a HTMLRemarkNode",node[0] instanceof RemarkNode); ! RemarkNode remarkNode = (RemarkNode)node[0]; ! assertEquals("Expected contents","\r\n",remarkNode.getText()); ! ! } ! ! /** ! * This is the simulation of a bug report submitted ! * by Claude Duguay. ! * If it is a comment with nothing in it, parser crashes ! */ ! public void testRemarkNodeWithNothing() throws ParserException { ! createParser("<!-->"); ! parseAndAssertNodeCount(1); ! assertTrue("Node should be a HTMLRemarkNode",node[0] instanceof RemarkNode); ! RemarkNode remarkNode = (RemarkNode)node[0]; ! assertEquals("Expected contents","",remarkNode.getText()); ! ! } ! ! /** ! * Reproduction of bug reported by John Zook [594301] ! * When we have tags like : ! * <!-- <A> --> ! * it doesent get parsed correctly ! */ ! public void testTagWithinRemarkNode() throws ParserException { ! createParser("<!-- \n"+ ! "<A>\n"+ ! "bcd -->"); ! Parser.setLineSeparator("\n"); ! parseAndAssertNodeCount(1); ! assertTrue("Node should be a HTMLRemarkNode",node[0] instanceof RemarkNode); ! RemarkNode remarkNode = (RemarkNode)node[0]; ! assertStringEquals("Expected contents"," \n<A>\nbcd ",remarkNode.getText()); ! ! } ! ! /** ! * Bug reported by John Zook [594301], invalid remark nodes are accepted as remark nodes. ! * <<br> ! * -<br> ! * -<br> ! * ssd --><br> ! * This is not supposed to be a remarknode ! */ ! public void testInvalidTag() throws ParserException { ! createParser("<!\n"+ ! "-\n"+ ! "-\n"+ ! "ssd -->"); ! Parser.setLineSeparator("\n"); ! parseAndAssertNodeCount(1); ! assertTrue("Node should be a Tag but was "+node[0],node[0] instanceof Tag); ! Tag tag = (Tag)node[0]; ! assertStringEquals("Expected contents","!\n"+ ! "-\n"+ ! "-\n"+ ! "ssd --",tag.getText()); ! Parser.setLineSeparator("\r\n"); ! } ! ! /** ! * Bug reported by John Zook [594301] ! * If dashes exist in a comment, they dont get added to the comment text ! */ ! public void testDashesInComment() throws ParserException{ ! createParser("<!-- -- -->"); ! parseAndAssertNodeCount(1); ! assertTrue("Node should be a HTMLRemarkNode but was "+node[0],node[0] instanceof RemarkNode); ! RemarkNode remarkNode = (RemarkNode)node[0]; ! assertEquals("Remark Node contents"," -- ",remarkNode.getText()); ! } --- 39,230 ---- public class RemarkNodeParserTest extends ParserTestCase { ! public RemarkNodeParserTest(String name) { ! super(name); ! } ! ! /** ! * The bug being reproduced is this : <BR> ! * <!-- saved from url=(0022)http://internet.e-mail --> ! * <HTML> ! * <HEAD><META name="title" content="Training Introduction"> ! * <META name="subject" content=""> ! * <!-- ! Whats gonna happen now ? ! * --> ! * <TEST> ! * </TEST> ! * ! * The above line is incorrectly parsed - the remark is not correctly identified. ! * This bug was reported by Serge Kruppa (2002-Feb-08). ! */ ! public void testRemarkNodeBug() throws ParserException ! { ! createParser( ! "<!-- saved from url=(0022)http://internet.e-mail -->\n"+ ! "<HTML>\n"+ ! "<HEAD><META name=\"title\" content=\"Training Introduction\">\n"+ ! "<META name=\"subject\" content=\"\">\n"+ ! "<!--\n"+ ! " Whats gonna happen now ?\n"+ ! "-->\n"+ ! "<TEST>\n"+ ! "</TEST>\n"); ! Parser.setLineSeparator("\r\n"); ! parseAndAssertNodeCount(8); ! // The first node should be a HTMLRemarkNode ! assertTrue("First node should be a HTMLRemarkNode",node[0] instanceof RemarkNode); ! RemarkNode remarkNode = (RemarkNode)node[0]; ! assertEquals("Text of the remarkNode #1"," saved from url=(0022)http://internet.e-mail ",remarkNode.getText()); ! // The sixth node should be a HTMLRemarkNode ! assertTrue("Sixth node should be a HTMLRemarkNode",node[5] instanceof RemarkNode); ! remarkNode = (RemarkNode)node[5]; ! assertEquals("Text of the remarkNode #6","\r\n Whats gonna happen now ?\r\n",remarkNode.getText()); ! } ! public void testToPlainTextString() throws ParserException { ! createParser( ! "<!-- saved from url=(0022)http://internet.e-mail -->\n"+ ! "<HTML>\n"+ ! "<HEAD><META name=\"title\" content=\"Training Introduction\">\n"+ ! "<META name=\"subject\" content=\"\">\n"+ ! "<!--\n"+ ! " Whats gonna happen now ?\n"+ ! "-->\n"+ ! "<TEST>\n"+ ! "</TEST>\n"); ! Parser.setLineSeparator("\r\n"); ! parseAndAssertNodeCount(8); ! // The first node should be a HTMLRemarkNode ! assertTrue("First node should be a HTMLRemarkNode",node[0] instanceof RemarkNode); ! RemarkNode remarkNode = (RemarkNode)node[0]; ! assertEquals("Plain Text of the remarkNode #1"," saved from url=(0022)http://internet.e-mail ",remarkNode.toPlainTextString()); ! // The sixth node should be a HTMLRemarkNode ! assertTrue("Sixth node should be a HTMLRemarkNode",node[5] instanceof RemarkNode); ! remarkNode = (RemarkNode)node[5]; ! assertEquals("Plain Text of the remarkNode #6","\r\n Whats gonna happen now ?\r\n",remarkNode.getText()); ! ! } ! public void testToRawString() throws ParserException { ! createParser( ! "<!-- saved from url=(0022)http://internet.e-mail -->\n"+ ! "<HTML>\n"+ ! "<HEAD><META name=\"title\" content=\"Training Introduction\">\n"+ ! "<META name=\"subject\" content=\"\">\n"+ ! "<!--\n"+ ! " Whats gonna happen now ?\n"+ ! "-->\n"+ ! "<TEST>\n"+ ! "</TEST>\n"); ! Parser.setLineSeparator("\r\n"); ! parseAndAssertNodeCount(8); ! // The first node should be a HTMLRemarkNode ! assertTrue("First node should be a HTMLRemarkNode",node[0] instanceof RemarkNode); ! RemarkNode remarkNode = (RemarkNode)node[0]; ! assertStringEquals("Raw String of the remarkNode #1","<!-- saved from url=(0022)http://internet.e-mail -->",remarkNode.toHtml()); ! // The sixth node should be a HTMLRemarkNode ! assertTrue("Sixth node should be a HTMLRemarkNode",node[5] instanceof RemarkNode); ! remarkNode = (RemarkNode)node[5]; ! assertStringEquals("Raw String of the remarkNode #6","<!--\r\n Whats gonna happen now ?\r\n-->",remarkNode.toHtml()); ! } ! ! public void testNonRemarkNode() throws ParserException { ! createParser(" <![endif]>"); ! parseAndAssertNodeCount(2); ! // The first node should be a HTMLRemarkNode ! assertTrue("First node should be a string node",node[0] instanceof StringNode); ! assertTrue("Second node should be a Tag",node[1] instanceof Tag); ! StringNode stringNode = (StringNode)node[0]; ! Tag tag = (Tag)node[1]; ! assertEquals("Text contents"," ",stringNode.getText()); ! assertEquals("Tag Contents","![endif]",tag.getText()); ! ! } ! ! /** ! * This is the simulation of bug report 586756, submitted ! * by John Zook. ! * If all the comment contains is a blank line, it breaks ! * the state ! */ ! public void testRemarkNodeWithBlankLine() throws ParserException { ! createParser("<!--\n"+ ! "\n"+ ! "-->"); ! Parser.setLineSeparator("\r\n"); ! parseAndAssertNodeCount(1); ! assertTrue("Node should be a HTMLRemarkNode",node[0] instanceof RemarkNode); ! RemarkNode remarkNode = (RemarkNode)node[0]; ! assertEquals("Expected contents","\r\n",remarkNode.getText()); ! ! } ! ! /** ! * This is the simulation of a bug report submitted ! * by Claude Duguay. ! * If it is a comment with nothing in it, parser crashes ! */ ! public void testRemarkNodeWithNothing() throws ParserException { ! createParser("<!-->"); ! parseAndAssertNodeCount(1); ! assertTrue("Node should be a HTMLRemarkNode",node[0] instanceof RemarkNode); ! RemarkNode remarkNode = (RemarkNode)node[0]; ! assertEquals("Expected contents","",remarkNode.getText()); ! ! } ! ! /** ! * Reproduction of bug reported by John Zook [594301] ! * When we have tags like : ! * <!-- <A> --> ! * it doesent get parsed correctly ! */ ! public void testTagWithinRemarkNode() throws ParserException { ! createParser("<!-- \n"+ ! "<A>\n"+ ! "bcd -->"); ! Parser.setLineSeparator("\n"); ! parseAndAssertNodeCount(1); ! assertTrue("Node should be a HTMLRemarkNode",node[0] instanceof RemarkNode); ! RemarkNode remarkNode = (RemarkNode)node[0]; ! assertStringEquals("Expected contents"," \n<A>\nbcd ",remarkNode.getText()); ! ! } ! ! /** ! * Bug reported by John Zook [594301], invalid remark nodes are accepted as remark nodes. ! * <<br> ! * -<br> ! * -<br> ! * ssd --><br> ! * This is not supposed to be a remarknode ! */ ! public void testInvalidTag() throws ParserException { ! createParser("<!\n"+ ! "-\n"+ ! "-\n"+ ! "ssd -->"); ! Parser.setLineSeparator("\n"); ! parseAndAssertNodeCount(1); ! assertTrue("Node should be a Tag but was "+node[0],node[0] instanceof Tag); ! Tag tag = (Tag)node[0]; ! assertStringEquals("Expected contents","!\n"+ ! "-\n"+ ! "-\n"+ ! "ssd --",tag.getText()); ! Parser.setLineSeparator("\r\n"); ! } ! ! /** ! * Bug reported by John Zook [594301] ! * If dashes exist in a comment, they dont get added to the comment text ! */ ! public void testDashesInComment() throws ParserException{ ! createParser("<!-- -- -->"); ! parseAndAssertNodeCount(1); ! assertTrue("Node should be a HTMLRemarkNode but was "+node[0],node[0] instanceof RemarkNode); ! RemarkNode remarkNode = (RemarkNode)node[0]; ! assertEquals("Remark Node contents"," -- ",remarkNode.getText()); ! } *************** *** 258,262 **** ParserException { ! createParser( "<HTML>\n" + "<HEAD>\n" --- 258,262 ---- ParserException { ! createParser( "<HTML>\n" + "<HEAD>\n" *************** *** 268,275 **** + "</HTML>\n" ); ! parseAndAssertNodeCount(10); ! assertTrue("Node should be a HTMLRemarkNode but was "+node[7],node[7] instanceof RemarkNode); ! RemarkNode remarkNode = (RemarkNode)node[7]; ! assertEquals("Remark Node contents"," Id: html-sgml.sgm,v 1.5 1995/05/26 21:29:50 connolly Exp ",remarkNode.getText()); } --- 268,275 ---- + "</HTML>\n" ); ! parseAndAssertNodeCount(10); ! assertTrue("Node should be a HTMLRemarkNode but was "+node[7],node[7] instanceof RemarkNode); ! RemarkNode remarkNode = (RemarkNode)node[7]; ! assertEquals("Remark Node contents"," Id: html-sgml.sgm,v 1.5 1995/05/26 21:29:50 connolly Exp ",remarkNode.getText()); } *************** *** 281,285 **** ParserException { ! createParser( "<HTML>\n" + "<HEAD>\n" --- 281,285 ---- ParserException { ! createParser( "<HTML>\n" + "<HEAD>\n" *************** *** 291,298 **** + "</HTML>\n" ); ! parseAndAssertNodeCount(10); ! assertTrue("Node should be a HTMLRemarkNode but was "+node[7],node[7] instanceof RemarkNode); ! RemarkNode remarkNode = (RemarkNode)node[7]; ! assertEquals("Remark Node contents"," another -- -- comment ",remarkNode.getText()); } --- 291,298 ---- + "</HTML>\n" ); ! parseAndAssertNodeCount(10); ! assertTrue("Node should be a HTMLRemarkNode but was "+node[7],node[7] instanceof RemarkNode); ! RemarkNode remarkNode = (RemarkNode)node[7]; ! assertEquals("Remark Node contents"," another -- -- comment ",remarkNode.getText()); } *************** *** 304,308 **** ParserException { ! createParser( "<HTML>\n" + "<HEAD>\n" --- 304,308 ---- ParserException { ! createParser( "<HTML>\n" + "<HEAD>\n" *************** *** 314,321 **** + "</HTML>\n" ); ! parseAndAssertNodeCount(10); ! assertTrue("Node should be a HTMLRemarkNode but was "+node[7],node[7] instanceof RemarkNode); ! RemarkNode remarkNode = (RemarkNode)node[7]; ! assertEquals("Remark Node contents","",remarkNode.getText()); } --- 314,321 ---- + "</HTML>\n" ); ! parseAndAssertNodeCount(10); ! assertTrue("Node should be a HTMLRemarkNode but was "+node[7],node[7] instanceof RemarkNode); ! RemarkNode remarkNode = (RemarkNode)node[7]; ! assertEquals("Remark Node contents","",remarkNode.getText()); } *************** *** 330,334 **** // HTMLParserException // { ! // createParser( // "<HTML>\n" // + "<HEAD>\n" --- 330,334 ---- // HTMLParserException // { ! // createParser( // "<HTML>\n" // + "<HEAD>\n" *************** *** 340,348 **** // + "</HTML>\n" // ); ! // parseAndAssertNodeCount(10); ! // assertTrue("Node should not be a HTMLRemarkNode",!(node[7] instanceof HTMLRemarkNode)); ! // assertTrue("Node should be a HTMLStringNode but was "+node[7],node[7].getType()==HTMLStringNode.TYPE); ! // HTMLStringNode stringNode = (HTMLStringNode)node[7]; ! // assertEquals("String Node contents","<!- not a comment, just regular old data characters ->\n",stringNode.getText()); // } } --- 340,369 ---- // + "</HTML>\n" // ); ! // parseAndAssertNodeCount(10); ! // assertTrue("Node should not be a HTMLRemarkNode",!(node[7] instanceof HTMLRemarkNode)); ! // assertTrue("Node should be a HTMLStringNode but was "+node[7],node[7].getType()==HTMLStringNode.TYPE); ! // HTMLStringNode stringNode = (HTMLStringNode)node[7]; ! // assertEquals("String Node contents","<!- not a comment, just regular old data characters ->\n",stringNode.getText()); // } + + /** + * Test a comment ending with !--. + */ + public void testExclamationComment () + throws + ParserException + { + createParser ( + "<html>\n" + + "<head>\n" + + "<title>foobar</title>\n" + + "</head>\n" + + "<body>\n" + + "<!-- foobar --!>\n" + + "</body>\n" + + "</html>\n" + ); + parseAndAssertNodeCount (10); + } + } |
From: <der...@us...> - 2003-09-01 19:56:05
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser In directory sc8-pr-cvs1:/tmp/cvs-serv19769 Modified Files: RemarkNodeParser.java Log Message: Workaround for bug #788746 parser crashes on comments like <!-- foobar --!>. No real solution because the codebase assumes remarks end with -->, so this just avoids the crash, but the toHtml() output will output --!-->, which isn't really correct. Added the test case as RemarkNodeParserTest.testExclamationComment(). Index: RemarkNodeParser.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/RemarkNodeParser.java,v retrieving revision 1.27 retrieving revision 1.28 diff -C2 -d -r1.27 -r1.28 *** RemarkNodeParser.java 24 Aug 2003 21:59:41 -0000 1.27 --- RemarkNodeParser.java 1 Sep 2003 19:55:59 -0000 1.28 *************** *** 30,172 **** public class RemarkNodeParser { ! public final static int REMARK_NODE_BEFORE_PARSING_STATE=0; ! public final static int REMARK_NODE_OPENING_ANGLE_BRACKET_STATE=1; ! public final static int REMARK_NODE_EXCLAMATION_RECEIVED_STATE=2; ! public final static int REMARK_NODE_FIRST_DASH_RECEIVED_STATE=3; ! public final static int REMARK_NODE_ACCEPTING_STATE=4; ! public final static int REMARK_NODE_CLOSING_FIRST_DASH_RECEIVED_STATE=5; ! public final static int REMARK_NODE_CLOSING_SECOND_DASH_RECEIVED_STATE=6; ! public final static int REMARK_NODE_ACCEPTED_STATE=7; ! public final static int REMARK_NODE_ILLEGAL_STATE=8; ! public final static int REMARK_NODE_FINISHED_PARSING_STATE=2; ! ! /** ! * Locate the remark tag withing the input string, by parsing from the given position ! * @param reader HTML reader to be provided so as to allow reading of next line ! * @param input Input String ! * @param position Position to start parsing from ! */ ! public RemarkNode find(NodeReader reader,String input,int position) ! { ! int state = REMARK_NODE_BEFORE_PARSING_STATE; ! StringBuffer tagContents = new StringBuffer(); ! int tagBegin=0; ! int tagEnd=0; ! int i=position; ! int inputLen = input.length(); ! char ch,prevChar=' '; ! while (i < inputLen && state < REMARK_NODE_ACCEPTED_STATE) ! { ! ch = input.charAt(i); ! if (state == REMARK_NODE_CLOSING_SECOND_DASH_RECEIVED_STATE) { ! if (ch == '>') ! { ! state=REMARK_NODE_ACCEPTED_STATE; ! tagEnd=i; ! } else if (ch=='-') { ! tagContents.append(prevChar); ! } else ! { ! // Rollback last 2 characters (assumed same) ! state = REMARK_NODE_ACCEPTING_STATE; ! tagContents.append(prevChar); ! tagContents.append(prevChar); ! } ! } ! if (state==REMARK_NODE_CLOSING_FIRST_DASH_RECEIVED_STATE) ! { ! if (ch == '-') ! { ! state=REMARK_NODE_CLOSING_SECOND_DASH_RECEIVED_STATE; ! } else ! { ! // Rollback ! state = REMARK_NODE_ACCEPTING_STATE; ! tagContents.append(prevChar); ! } ! } ! if (state==REMARK_NODE_ACCEPTING_STATE) { ! if (ch == '-') { ! state=REMARK_NODE_CLOSING_FIRST_DASH_RECEIVED_STATE; ! } /*else ! if (ch == '<') ! { ! state=REMARK_NODE_ILLEGAL_STATE; ! } */ ! } ! if (state==REMARK_NODE_ACCEPTING_STATE) ! { ! // We can append contents now ! tagContents.append(ch); ! } ! ! if (state==REMARK_NODE_FIRST_DASH_RECEIVED_STATE) ! { ! if (ch == '-') { ! state=REMARK_NODE_ACCEPTING_STATE; ! // Do a lookahead and see if the next char is > ! if (input.length()>i+1 && input.charAt(i+1)=='>') { ! state=REMARK_NODE_ACCEPTED_STATE;tagEnd=i+1; ! } ! } ! else state=REMARK_NODE_ILLEGAL_STATE; ! } ! if (state==REMARK_NODE_EXCLAMATION_RECEIVED_STATE) ! { ! if (ch == '-') ! state=REMARK_NODE_FIRST_DASH_RECEIVED_STATE; ! else if (ch == '>') ! { ! state=REMARK_NODE_ACCEPTED_STATE; ! tagEnd=i; ! } ! else state=REMARK_NODE_ILLEGAL_STATE; ! } ! if (state==REMARK_NODE_OPENING_ANGLE_BRACKET_STATE) ! { ! if (ch == '!') ! state=REMARK_NODE_EXCLAMATION_RECEIVED_STATE; ! else state = REMARK_NODE_ILLEGAL_STATE; // This is not a remark tag ! } ! if (state == REMARK_NODE_BEFORE_PARSING_STATE) ! { ! if (ch=='<') { ! // Transition from State 0 to State 1 - Record data till > is encountered ! tagBegin = i; ! state = REMARK_NODE_OPENING_ANGLE_BRACKET_STATE; ! } ! else if (ch!=' ') { ! // Its not a space, hence this is probably a string node, not a remark node ! state = REMARK_NODE_ILLEGAL_STATE; ! } ! } ! // if (state > REMARK_NODE_OPENING_ANGLE_BRACKET_STATE && state < REMARK_NODE_ACCEPTED_STATE && i == input.length() - 1) ! if (state >=REMARK_NODE_ACCEPTING_STATE && state < REMARK_NODE_ACCEPTED_STATE && i == input.length() - 1) ! { ! // We need to continue parsing to the next line ! //input = reader.getNextLine(); ! tagContents.append(Parser.getLineSeparator()); ! do { ! input = reader.getNextLine(); ! } ! while (input!=null && input.length()==0); ! if (input!=null) ! inputLen = input.length(); else inputLen=-1; ! i=-1; ! } ! if (state==REMARK_NODE_ILLEGAL_STATE) ! { ! return null; ! } ! i++; ! prevChar = ch; ! } ! if (state==REMARK_NODE_ACCEPTED_STATE) ! return new RemarkNode(tagBegin,tagEnd,tagContents.toString()); ! else ! return null; ! } } --- 30,176 ---- public class RemarkNodeParser { ! public final static int REMARK_NODE_BEFORE_PARSING_STATE=0; ! public final static int REMARK_NODE_OPENING_ANGLE_BRACKET_STATE=1; ! public final static int REMARK_NODE_EXCLAMATION_RECEIVED_STATE=2; ! public final static int REMARK_NODE_FIRST_DASH_RECEIVED_STATE=3; ! public final static int REMARK_NODE_ACCEPTING_STATE=4; ! public final static int REMARK_NODE_CLOSING_FIRST_DASH_RECEIVED_STATE=5; ! public final static int REMARK_NODE_CLOSING_SECOND_DASH_RECEIVED_STATE=6; ! public final static int REMARK_NODE_ACCEPTED_STATE=7; ! public final static int REMARK_NODE_ILLEGAL_STATE=8; ! public final static int REMARK_NODE_FINISHED_PARSING_STATE=2; ! ! /** ! * Locate the remark tag withing the input string, by parsing from the given position ! * @param reader HTML reader to be provided so as to allow reading of next line ! * @param input Input String ! * @param position Position to start parsing from ! */ ! public RemarkNode find(NodeReader reader,String input,int position) ! { ! int state = REMARK_NODE_BEFORE_PARSING_STATE; ! StringBuffer tagContents = new StringBuffer(); ! int tagBegin=0; ! int tagEnd=0; ! int i=position; ! int inputLen = input.length(); ! char ch,prevChar=' '; ! while (i < inputLen && state < REMARK_NODE_ACCEPTED_STATE) ! { ! ch = input.charAt(i); ! if (state == REMARK_NODE_CLOSING_SECOND_DASH_RECEIVED_STATE) { ! if (ch == '>') ! { ! state=REMARK_NODE_ACCEPTED_STATE; ! tagEnd=i; ! } else if (ch=='-') { ! tagContents.append(prevChar); ! } else if (ch=='!') { ! tagContents.append(prevChar); ! tagContents.append(prevChar); ! tagContents.append(ch); ! } else ! { ! // Rollback last 2 characters (assumed same) ! state = REMARK_NODE_ACCEPTING_STATE; ! tagContents.append(prevChar); ! tagContents.append(prevChar); ! } ! } ! if (state==REMARK_NODE_CLOSING_FIRST_DASH_RECEIVED_STATE) ! { ! if (ch == '-') ! { ! state=REMARK_NODE_CLOSING_SECOND_DASH_RECEIVED_STATE; ! } else ! { ! // Rollback ! state = REMARK_NODE_ACCEPTING_STATE; ! tagContents.append(prevChar); ! } ! } ! if (state==REMARK_NODE_ACCEPTING_STATE) { ! if (ch == '-') { ! state=REMARK_NODE_CLOSING_FIRST_DASH_RECEIVED_STATE; ! } /*else ! if (ch == '<') ! { ! state=REMARK_NODE_ILLEGAL_STATE; ! } */ ! } ! if (state==REMARK_NODE_ACCEPTING_STATE) ! { ! // We can append contents now ! tagContents.append(ch); ! } ! ! if (state==REMARK_NODE_FIRST_DASH_RECEIVED_STATE) ! { ! if (ch == '-') { ! state=REMARK_NODE_ACCEPTING_STATE; ! // Do a lookahead and see if the next char is > ! if (input.length()>i+1 && input.charAt(i+1)=='>') { ! state=REMARK_NODE_ACCEPTED_STATE;tagEnd=i+1; ! } ! } ! else state=REMARK_NODE_ILLEGAL_STATE; ! } ! if (state==REMARK_NODE_EXCLAMATION_RECEIVED_STATE) ! { ! if (ch == '-') ! state=REMARK_NODE_FIRST_DASH_RECEIVED_STATE; ! else if (ch == '>') ! { ! state=REMARK_NODE_ACCEPTED_STATE; ! tagEnd=i; ! } ! else state=REMARK_NODE_ILLEGAL_STATE; ! } ! if (state==REMARK_NODE_OPENING_ANGLE_BRACKET_STATE) ! { ! if (ch == '!') ! state=REMARK_NODE_EXCLAMATION_RECEIVED_STATE; ! else state = REMARK_NODE_ILLEGAL_STATE; // This is not a remark tag ! } ! if (state == REMARK_NODE_BEFORE_PARSING_STATE) ! { ! if (ch=='<') { ! // Transition from State 0 to State 1 - Record data till > is encountered ! tagBegin = i; ! state = REMARK_NODE_OPENING_ANGLE_BRACKET_STATE; ! } ! else if (ch!=' ') { ! // Its not a space, hence this is probably a string node, not a remark node ! state = REMARK_NODE_ILLEGAL_STATE; ! } ! } ! // if (state > REMARK_NODE_OPENING_ANGLE_BRACKET_STATE && state < REMARK_NODE_ACCEPTED_STATE && i == input.length() - 1) ! if (state >=REMARK_NODE_ACCEPTING_STATE && state < REMARK_NODE_ACCEPTED_STATE && i == input.length() - 1) ! { ! // We need to continue parsing to the next line ! //input = reader.getNextLine(); ! tagContents.append(Parser.getLineSeparator()); ! do { ! input = reader.getNextLine(); ! } ! while (input!=null && input.length()==0); ! if (input!=null) ! inputLen = input.length(); else inputLen=-1; ! i=-1; ! } ! if (state==REMARK_NODE_ILLEGAL_STATE) ! { ! return null; ! } ! i++; ! prevChar = ch; ! } ! if (state==REMARK_NODE_ACCEPTED_STATE) ! return new RemarkNode(tagBegin,tagEnd,tagContents.toString()); ! else ! return null; ! } } |
From: <der...@us...> - 2003-09-01 19:20:38
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser In directory sc8-pr-cvs1:/tmp/cvs-serv13104 Modified Files: Parser.java Log Message: Fixed bug #798553 setInputHtml does not set text. Added ! (not) to the guard. Index: Parser.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/Parser.java,v retrieving revision 1.57 retrieving revision 1.58 diff -C2 -d -r1.57 -r1.58 *** Parser.java 1 Sep 2003 13:53:46 -0000 1.57 --- Parser.java 1 Sep 2003 19:20:35 -0000 1.58 *************** *** 1166,1176 **** } ! /** Initializes the parser with the given input HTML String. * @param inputHTML the input HTML that is to be parsed. */ ! public void setInputHTML(String inputHTML) { ! if ("".equals(inputHTML)) { ! reader = new NodeReader(new StringReader(inputHTML),""); ! } } --- 1166,1177 ---- } ! /** ! * Initializes the parser with the given input HTML String. * @param inputHTML the input HTML that is to be parsed. */ ! public void setInputHTML (String inputHTML) ! { ! if (!"".equals (inputHTML)) ! reader = new NodeReader (new StringReader (inputHTML), ""); } |
From: <der...@us...> - 2003-09-01 19:12:00
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags In directory sc8-pr-cvs1:/tmp/cvs-serv11603/tags Modified Files: AppletTag.java Log Message: Fixed bug #798554 Applet Tag does not update codebase data. Rewrote the AppletTag class to honour setting codebase, archive, applet class and applet params. Added 4 new test cases in AppletTagTest. Index: AppletTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/AppletTag.java,v retrieving revision 1.24 retrieving revision 1.25 diff -C2 -d -r1.24 -r1.25 *** AppletTag.java 24 Aug 2003 21:59:42 -0000 1.24 --- AppletTag.java 1 Sep 2003 19:11:56 -0000 1.25 *************** *** 37,158 **** import org.htmlparser.util.NodeList; import org.htmlparser.util.SimpleNodeIterator; /** ! * HTMLAppletTag represents an <Applet> tag */ public class AppletTag extends CompositeTag { ! private java.lang.String codeBase; ! private java.lang.String archive; ! private java.lang.String appletClass; ! private Hashtable appletParams; ! /** ! * HTMLAppletTag constructor comment. ! * @param tagData The data for this tag. ! * @param compositeTagData The data for this composite tag. ! */ ! public AppletTag(TagData tagData,CompositeTagData compositeTagData) ! { ! super(tagData,compositeTagData); ! this.appletClass = compositeTagData.getStartTag().getAttribute("CODE"); ! this.codeBase = compositeTagData.getStartTag().getAttribute("CODEBASE"); ! this.archive = compositeTagData.getStartTag().getAttribute("ARCHIVE"); ! NodeList children = compositeTagData.getChildren(); ! appletParams = new Hashtable(); ! createAppletParamsTable(children); ! } ! public void createAppletParamsTable(NodeList children) { ! for (int i=0;i<children.size();i++) { ! Node node = children.elementAt(i); ! if (node instanceof Tag) { ! Tag tag = (Tag)node; ! if (tag.getTagName().equals("PARAM")) { ! String paramName = tag.getAttribute("NAME"); ! if (paramName!=null && paramName.length()!=0) ! { ! String paramValue = tag.getAttribute("VALUE"); ! appletParams.put(paramName,paramValue); ! } ! } ! } ! } ! } ! ! public java.lang.String getAppletClass() { ! return appletClass; ! } ! ! public Hashtable getAppletParams() { ! return appletParams; ! } ! ! public java.lang.String getArchive() { ! return archive; ! } ! ! public java.lang.String getCodeBase() { ! return codeBase; ! } ! ! public String getAttribute(String key) ! { ! return (String)appletParams.get(key); ! } ! ! public Enumeration getParameterNames() ! { ! return appletParams.keys(); ! } ! ! public void setAppletClass(java.lang.String newAppletClass) { ! appletClass = newAppletClass; ! } ! ! public void setAppletParams(Hashtable newAppletParams) { ! appletParams = newAppletParams; ! } ! ! public void setArchive(java.lang.String newArchive) { ! archive = newArchive; ! } ! ! public void setCodeBase(java.lang.String newCodeBase) { ! codeBase = newCodeBase; ! } ! ! public String toString() ! { ! StringBuffer sb = new StringBuffer(); ! sb.append("Applet Tag\n"); ! sb.append("**********\n"); ! sb.append("Class Name = "+appletClass+"\n"); ! sb.append("Archive = "+archive+"\n"); ! sb.append("Codebase = "+codeBase+"\n"); ! Enumeration params = appletParams.keys(); ! if (params==null) ! sb.append("No Params found.\n"); ! else ! { ! int cnt = 0; ! for (;params.hasMoreElements();) ! { ! String paramName = (String)params.nextElement(); ! String paramValue = (String)appletParams.get(paramName); ! sb.append((cnt++)+": Parameter name = "+paramName+", Parameter value = "+paramValue+"\n"); ! } ! } ! if (children()==null) ! sb.append("No Miscellaneous items\n"); else ! { ! sb.append("Miscellaneous items :\n"); ! for (SimpleNodeIterator e = children();e.hasMoreNodes();) ! { ! sb.append(((Tag)e.nextNode()).toString()); ! } ! } ! sb.append("End of Applet Tag\n"); ! sb.append("*****************\n"); ! return sb.toString(); ! } } --- 37,281 ---- import org.htmlparser.util.NodeList; import org.htmlparser.util.SimpleNodeIterator; + /** ! * AppletTag represents an <Applet> tag. ! * It extends a basic tag by providing accessors to the class, codebase, ! * archive and parameters. */ public class AppletTag extends CompositeTag { ! /** ! * Create a new AppletTag with the dats given. ! * @param tagData The data for this tag. ! * @param compositeTagData The data for this composite tag. ! */ ! public AppletTag (TagData tagData,CompositeTagData compositeTagData) ! { ! super(tagData,compositeTagData); ! } ! /** ! * Extract the applet <code>PARAM</code> tags from the child list. ! * @return The list of applet parameters (keys and values are String objects). ! */ ! public Hashtable createAppletParamsTable () ! { ! NodeList kids; ! Node node; ! Tag tag; ! String paramName; ! String paramValue; ! Hashtable ret; ! ret = new Hashtable (); ! kids = getChildren (); ! for (int i = 0; i < kids.size (); i++) ! { ! node = children.elementAt(i); ! if (node instanceof Tag) ! { ! tag = (Tag)node; ! if (tag.getTagName().equals ("PARAM")) ! { ! paramName = tag.getAttribute ("NAME"); ! if (null != paramName && 0 != paramName.length ()) ! { ! paramValue = tag.getAttribute ("VALUE"); ! ret.put (paramName,paramValue); ! } ! } ! } ! } ! ! return (ret); ! } ! ! /** ! * Get the class name of the applet. ! * @return The value of the <code>CODE</code> attribute. ! */ ! public String getAppletClass () ! { ! return (getAttribute ("CODE")); ! } ! ! /** ! * Get the applet parameters. ! * @return The list of parameter values (keys and values are String objects). ! */ ! public Hashtable getAppletParams () ! { ! return (createAppletParamsTable ()); ! } ! ! /** ! * Get the jar file of the applet. ! * @return The value of the <code>ARCHIVE</code> attribute, or <code>null</code> if it wasn't specified. ! */ ! public String getArchive() ! { ! return (getAttribute ("ARCHIVE")); ! } ! ! /** ! * Get the code base of the applet. ! * @return The value of the <code>CODEBASE</code> attribute, or <code>null</code> if it wasn't specified. ! */ ! public String getCodeBase() ! { ! return (getAttribute ("CODEBASE")); ! } ! ! /** ! * Get the <code>PARAM<code> tag with the given name. ! * <em>NOTE: This was called (erroneously) getAttribute() in previous versions.</em> ! * @param key The applet parameter name to get. ! * @return The value of the parameter or <code>null</code> if there is no parameter of that name. ! */ ! public String getParameter (String key) ! { ! return ((String)(getAppletParams ().get (key))); ! } ! ! /** ! * Get an enumeration over the (String) parameter names. ! * @return An enumeration of the <code>PARAM<code> tag <code>NAME<code> attributes. ! */ ! public Enumeration getParameterNames () ! { ! return (getAppletParams ().keys ()); ! } ! ! /** ! * Set the <code>CODE<code> attribute. ! * @param The new applet class. ! */ ! public void setAppletClass (String newAppletClass) ! { ! setAttribute ("CODE", newAppletClass); ! } ! ! /** ! * Set the enclosed <code>PARM<code> children. ! * @param The new parameters. ! */ ! public void setAppletParams (Hashtable newAppletParams) ! { ! NodeList kids; ! Node node; ! Tag tag; ! String paramName; ! String paramValue; ! String s; ! TagData tagData; ! ! kids = getChildren (); ! // erase appletParams from kids ! for (int i = 0; i < kids.size (); ) ! { ! node = kids.elementAt (i); ! if (node instanceof Tag) ! if (((Tag)node).getTagName ().equals ("PARAM")) ! kids.remove (i); ! else ! i++; ! else ! i++; ! } ! ! // add newAppletParams to kids ! for (Enumeration e = newAppletParams.keys (); e.hasMoreElements (); ) ! { ! paramName = (String)e.nextElement (); ! paramValue = (String)newAppletParams.get (paramName); ! s = "PARAM VALUE=\"" + paramValue + "\" NAME=\"" + paramName + "\""; ! tagData = new TagData (0, 0, 0, 0, s, s, "", false); // what, no URL? ! kids.add (new Tag (tagData)); ! } ! ! //set kids as new children ! setChildren (kids); ! } ! ! /** ! * Set the <code>ARCHIVE<code> attribute. ! * @param The new archive file. ! */ ! public void setArchive (String newArchive) ! { ! setAttribute ("ARCHIVE", newArchive); ! } ! ! /** ! * Set the <code>CODEBASE<code> attribute. ! * @param The new applet code base. ! */ ! public void setCodeBase (String newCodeBase) ! { ! setAttribute ("CODEBASE", newCodeBase); ! } ! ! /** ! * Output a string representing this applet tag. ! * @return A string showing the contents of the applet tag. ! */ ! public String toString () ! { ! Hashtable parameters; ! Enumeration params; ! String paramName; ! String paramValue; ! boolean found; ! Node node; ! StringBuffer ret; ! ! ret = new StringBuffer(500); ! ret.append ("Applet Tag\n"); ! ret.append ("**********\n"); ! ret.append ("Class Name = "); ! ret.append (getAppletClass ()); ! ret.append ("\n"); ! ret.append ("Archive = "); ! ret.append (getArchive ()); ! ret.append ("\n"); ! ret.append ("Codebase = "); ! ret.append (getCodeBase ()); ! ret.append ("\n"); ! parameters = getAppletParams (); ! params = parameters.keys (); ! if (null == params) ! ret.append ("No Params found.\n"); ! else ! for (int cnt = 0; params.hasMoreElements (); cnt++) ! { ! paramName = (String)params.nextElement (); ! paramValue = (String)parameters.get (paramName); ! ret.append (cnt); ! ret.append (": Parameter name = "); ! ret.append (paramName); ! ret.append (", Parameter value = "); ! ret.append (paramValue); ! ret.append ("\n"); ! } ! found = false; ! for (SimpleNodeIterator e = children (); e.hasMoreNodes ();) ! { ! node = e.nextNode (); ! if (node instanceof Tag) ! if (((Tag)node).getTagName ().equals ("PARAM")) ! continue; ! if (!found) ! ret.append ("Miscellaneous items :\n"); ! else ! ret.append (" "); ! found = true; ! ret.append (node.toString ()); ! } ! if (found) ! ret.append ("\n"); ! ret.append ("End of Applet Tag\n"); ! ret.append ("*****************\n"); ! ! return (ret.toString ()); ! } } |
From: <der...@us...> - 2003-09-01 19:12:00
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests In directory sc8-pr-cvs1:/tmp/cvs-serv11603/tests/tagTests Modified Files: AppletTagTest.java Log Message: Fixed bug #798554 Applet Tag does not update codebase data. Rewrote the AppletTag class to honour setting codebase, archive, applet class and applet params. Added 4 new test cases in AppletTagTest. Index: AppletTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/AppletTagTest.java,v retrieving revision 1.24 retrieving revision 1.25 diff -C2 -d -r1.24 -r1.25 *** AppletTagTest.java 24 Aug 2003 21:59:44 -0000 1.24 --- AppletTagTest.java 1 Sep 2003 19:11:57 -0000 1.25 *************** *** 37,70 **** public class AppletTagTest extends ParserTestCase { ! public AppletTagTest(String name) { ! super(name); ! } ! public void testToHTML() throws ParserException { ! String [][]paramsData = {{"Param1","Value1"},{"Name","Somik"},{"Age","23"}}; ! Hashtable paramsMap = new Hashtable(); ! String testHTML = new String("<APPLET CODE=Myclass.class ARCHIVE=test.jar CODEBASE=www.kizna.com>\n"); ! for (int i = 0;i<paramsData.length;i++) ! { ! testHTML+="<PARAM NAME=\""+paramsData[i][0]+"\" VALUE=\""+paramsData[i][1]+"\">\n"; ! paramsMap.put(paramsData[i][0],paramsData[i][1]); ! } ! testHTML+= ! "</APPLET>\n"+ ! "</HTML>"; ! createParser(testHTML); ! // Register the image scanner ! parser.registerScanners(); ! parseAndAssertNodeCount(2); ! assertTrue("Node should be an applet tag",node[0] instanceof AppletTag); ! // Check the data in the applet tag ! AppletTag appletTag = (AppletTag)node[0]; ! String expectedRawString = ! "<APPLET CODE=\"Myclass.class\" CODEBASE=\"www.kizna.com\" ARCHIVE=\"test.jar\">\r\n"+ ! "<PARAM VALUE=\"Value1\" NAME=\"Param1\">\r\n"+ ! "<PARAM VALUE=\"Somik\" NAME=\"Name\">\r\n"+ ! "<PARAM VALUE=\"23\" NAME=\"Age\">\r\n"+ ! "</APPLET>"; ! assertStringEquals("toHTML()",expectedRawString,appletTag.toHtml()); ! } } --- 37,187 ---- public class AppletTagTest extends ParserTestCase { ! public AppletTagTest(String name) { ! super(name); ! } ! public void testToHTML() throws ParserException { ! String [][]paramsData = {{"Param1","Value1"},{"Name","Somik"},{"Age","23"}}; ! Hashtable paramsMap = new Hashtable(); ! String testHTML = new String("<APPLET CODE=Myclass.class ARCHIVE=test.jar CODEBASE=www.kizna.com>\n"); ! for (int i = 0;i<paramsData.length;i++) ! { ! testHTML+="<PARAM NAME=\""+paramsData[i][0]+"\" VALUE=\""+paramsData[i][1]+"\">\n"; ! paramsMap.put(paramsData[i][0],paramsData[i][1]); ! } ! testHTML+= ! "</APPLET>\n"+ ! "</HTML>"; ! createParser(testHTML); ! parser.registerScanners(); ! parseAndAssertNodeCount(2); ! assertTrue("Node should be an applet tag",node[0] instanceof AppletTag); ! // Check the data in the applet tag ! AppletTag appletTag = (AppletTag)node[0]; ! String expectedRawString = ! "<APPLET CODE=\"Myclass.class\" CODEBASE=\"www.kizna.com\" ARCHIVE=\"test.jar\">\r\n"+ ! "<PARAM VALUE=\"Value1\" NAME=\"Param1\">\r\n"+ ! "<PARAM VALUE=\"Somik\" NAME=\"Name\">\r\n"+ ! "<PARAM VALUE=\"23\" NAME=\"Age\">\r\n"+ ! "</APPLET>"; ! assertStringEquals("toHTML()",expectedRawString,appletTag.toHtml()); ! } ! ! public void testChangeCodebase() throws ParserException { ! String [][]paramsData = {{"Param1","Value1"},{"Name","Somik"},{"Age","23"}}; ! Hashtable paramsMap = new Hashtable(); ! String testHTML = new String("<APPLET CODE=Myclass.class ARCHIVE=test.jar CODEBASE=www.kizna.com>\n"); ! for (int i = 0;i<paramsData.length;i++) ! { ! testHTML+="<PARAM NAME=\""+paramsData[i][0]+"\" VALUE=\""+paramsData[i][1]+"\">\n"; ! paramsMap.put(paramsData[i][0],paramsData[i][1]); ! } ! testHTML+= ! "</APPLET>\n"+ ! "</HTML>"; ! createParser(testHTML); ! parser.registerScanners(); ! parseAndAssertNodeCount(2); ! assertTrue("Node should be an applet tag",node[0] instanceof AppletTag); ! AppletTag appletTag = (AppletTag)node[0]; ! appletTag.setCodeBase ("htmlparser.sourceforge.net"); ! // Check the data in the applet tag ! String expectedRawString = ! "<APPLET CODE=\"Myclass.class\" CODEBASE=\"htmlparser.sourceforge.net\" ARCHIVE=\"test.jar\">\r\n"+ ! "<PARAM VALUE=\"Value1\" NAME=\"Param1\">\r\n"+ ! "<PARAM VALUE=\"Somik\" NAME=\"Name\">\r\n"+ ! "<PARAM VALUE=\"23\" NAME=\"Age\">\r\n"+ ! "</APPLET>"; ! assertStringEquals("toHTML()",expectedRawString,appletTag.toHtml()); ! } ! ! public void testChangeArchive() throws ParserException { ! String [][]paramsData = {{"Param1","Value1"},{"Name","Somik"},{"Age","23"}}; ! Hashtable paramsMap = new Hashtable(); ! String testHTML = new String("<APPLET CODE=Myclass.class ARCHIVE=test.jar CODEBASE=www.kizna.com>\n"); ! for (int i = 0;i<paramsData.length;i++) ! { ! testHTML+="<PARAM NAME=\""+paramsData[i][0]+"\" VALUE=\""+paramsData[i][1]+"\">\n"; ! paramsMap.put(paramsData[i][0],paramsData[i][1]); ! } ! testHTML+= ! "</APPLET>\n"+ ! "</HTML>"; ! createParser(testHTML); ! parser.registerScanners(); ! parseAndAssertNodeCount(2); ! assertTrue("Node should be an applet tag",node[0] instanceof AppletTag); ! AppletTag appletTag = (AppletTag)node[0]; ! appletTag.setArchive ("htmlparser.jar"); ! // Check the data in the applet tag ! String expectedRawString = ! "<APPLET CODE=\"Myclass.class\" CODEBASE=\"www.kizna.com\" ARCHIVE=\"htmlparser.jar\">\r\n"+ ! "<PARAM VALUE=\"Value1\" NAME=\"Param1\">\r\n"+ ! "<PARAM VALUE=\"Somik\" NAME=\"Name\">\r\n"+ ! "<PARAM VALUE=\"23\" NAME=\"Age\">\r\n"+ ! "</APPLET>"; ! assertStringEquals("toHTML()",expectedRawString,appletTag.toHtml()); ! } ! ! public void testChangeAppletClass() throws ParserException { ! String [][]paramsData = {{"Param1","Value1"},{"Name","Somik"},{"Age","23"}}; ! Hashtable paramsMap = new Hashtable(); ! String testHTML = new String("<APPLET CODE=Myclass.class ARCHIVE=test.jar CODEBASE=www.kizna.com>\n"); ! for (int i = 0;i<paramsData.length;i++) ! { ! testHTML+="<PARAM NAME=\""+paramsData[i][0]+"\" VALUE=\""+paramsData[i][1]+"\">\n"; ! paramsMap.put(paramsData[i][0],paramsData[i][1]); ! } ! testHTML+= ! "</APPLET>\n"+ ! "</HTML>"; ! createParser(testHTML); ! parser.registerScanners(); ! parseAndAssertNodeCount(2); ! assertTrue("Node should be an applet tag",node[0] instanceof AppletTag); ! AppletTag appletTag = (AppletTag)node[0]; ! appletTag.setAppletClass ("MyOtherClass.class"); ! // Check the data in the applet tag ! String expectedRawString = ! "<APPLET CODE=\"MyOtherClass.class\" CODEBASE=\"www.kizna.com\" ARCHIVE=\"test.jar\">\r\n"+ ! "<PARAM VALUE=\"Value1\" NAME=\"Param1\">\r\n"+ ! "<PARAM VALUE=\"Somik\" NAME=\"Name\">\r\n"+ ! "<PARAM VALUE=\"23\" NAME=\"Age\">\r\n"+ ! "</APPLET>"; ! assertStringEquals("toHTML()",expectedRawString,appletTag.toHtml()); ! } ! ! public void testChangeAppletParams() throws ParserException { ! String [][]paramsData = {{"Param1","Value1"},{"Name","Somik"},{"Age","23"}}; ! Hashtable paramsMap = new Hashtable(); ! String testHTML = new String("<APPLET CODE=Myclass.class ARCHIVE=test.jar CODEBASE=www.kizna.com>\n"); ! for (int i = 0;i<paramsData.length;i++) ! { ! testHTML+="<PARAM NAME=\""+paramsData[i][0]+"\" VALUE=\""+paramsData[i][1]+"\">\n"; ! paramsMap.put(paramsData[i][0],paramsData[i][1]); ! } ! testHTML+= ! "</APPLET>\n"+ ! "</HTML>"; ! createParser(testHTML); ! parser.registerScanners(); ! parseAndAssertNodeCount(2); ! assertTrue("Node should be an applet tag",node[0] instanceof AppletTag); ! AppletTag appletTag = (AppletTag)node[0]; ! paramsMap = new Hashtable(); ! String [][] newparamsData = {{"First","One"},{"Second","Two"},{"Third","3"}}; ! for (int i = 0;i<paramsData.length;i++) ! { ! paramsMap.put(newparamsData[i][0],newparamsData[i][1]); ! } ! appletTag.setAppletParams (paramsMap); ! // Check the data in the applet tag ! String expectedRawString = ! "<APPLET CODE=\"Myclass.class\" CODEBASE=\"www.kizna.com\" ARCHIVE=\"test.jar\">\r\n"+ ! "<PARAM VALUE=\"Two\" NAME=\"Second\">"+ ! "<PARAM VALUE=\"One\" NAME=\"First\">"+ ! "<PARAM VALUE=\"3\" NAME=\"Third\">"+ ! "</APPLET>"; ! assertStringEquals("toHTML()",expectedRawString,appletTag.toHtml()); ! } } |
From: <der...@us...> - 2003-09-01 13:53:52
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser In directory sc8-pr-cvs1:/tmp/cvs-serv18016 Modified Files: Parser.java Log Message: Fix bug #798552 Sample for node iterator incorrect DocComment changes only. Tabs converted to spaces. Index: Parser.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/Parser.java,v retrieving revision 1.56 retrieving revision 1.57 diff -C2 -d -r1.56 -r1.57 *** Parser.java 24 Aug 2003 21:59:41 -0000 1.56 --- Parser.java 1 Sep 2003 13:53:46 -0000 1.57 *************** *** 96,102 **** * // In this example, we are registering all the common scanners * parser.registerScanners(); ! * for (NodeIterator i = parser.elements();e.hasMoreNodes();) { ! * Node node = i.nextNode(); ! * node.print(); * } * </pre> Below is some sample code to parse Yahoo.com and print only the text --- 96,102 ---- * // In this example, we are registering all the common scanners * parser.registerScanners(); [...1259 lines suppressed...] ! return parser; ! } ! /** ! * @return String lineSeparator that will be used in toHTML() ! */ ! public static String getLineSeparator() { ! return lineSeparator; ! } ! public StringNodeFactory getStringNodeFactory() { ! if (stringNodeFactory == null) ! stringNodeFactory = new StringNodeFactory(); ! return stringNodeFactory; ! } ! ! public void setStringNodeFactory(StringNodeFactory stringNodeFactory) { ! this.stringNodeFactory = stringNodeFactory; ! } } |