Re: [Htmlparser-user] stack overflow in toHtml() after removing attributes
Brought to you by:
derrickoswald
|
From: Derrick O. <Der...@Ro...> - 2006-04-03 00:33:46
|
I don't see anything obviously wrong with it...
...it doesn't overflow if you don't remove the attributes?
dust wrote:
>Hello,
>
>
>Am I doing something wrong in the attached code?
>
>I generates stack overflow error when run with the
>default url found in main.
>
>
>
>Exception in thread "main" java.lang.StackOverflowError
> at java.lang.StringBuffer.append(Unknown Source)
> at
>org.htmlparser.lexer.InputStreamSource.getCharacters(InputStreamSource.java:641)
> at org.htmlparser.lexer.Page.getText(Page.java:1021)
> at org.htmlparser.lexer.PageAttribute.getRawValue(PageAttribute.java:384)
> at org.htmlparser.Attribute.toString(Attribute.java:730)
> at org.htmlparser.nodes.TagNode.toHtml(TagNode.java:686)
> at org.htmlparser.tags.CompositeTag.toHtml(CompositeTag.java:177)
> at org.htmlparser.tags.CompositeTag.putEndTagInto(CompositeTag.java:167)
> at org.htmlparser.tags.CompositeTag.toHtml(CompositeTag.java:182)
> at org.htmlparser.tags.CompositeTag.putEndTagInto(CompositeTag.java:167)
> at org.htmlparser.tags.CompositeTag.toHtml(CompositeTag.java:182)
> at org.htmlparser.tags.CompositeTag.putEndTagInto(CompositeTag.java:167)
> at org.htmlparser.tags.CompositeTag.toHtml(CompositeTag.java:182)
> at org.htmlparser.tags.CompositeTag.putEndTagInto(CompositeTag.java:167)
> at org.htmlparser.tags.CompositeTag.toHtml(CompositeTag.java:182)
>
>etc,
>
>--
>
>
>------------------------------------------------------------------------
>
>import java.util.HashSet;
>import java.util.Set;
>import java.util.Vector;
>
>import org.htmlparser.Attribute;
>import org.htmlparser.Node;
>import org.htmlparser.Parser;
>import org.htmlparser.Tag;
>import org.htmlparser.util.NodeList;
>import org.htmlparser.util.ParserException;
>import org.htmlparser.util.SimpleNodeIterator;
>
>public class HtmlParser
>{
> Parser parser;
>
> public HtmlParser (String link) throws ParserException
> {
> parser = new Parser (link);
> }
>
> public static void main (String[] args) throws ParserException
> {
> String link="http://mips.gsf.de/projects/fungi/fungi_db.html";
> if(args.length>0)
> link=args[0];
>
> HtmlParser htmlParser = new HtmlParser (link);
> String html = htmlParser.parse();
> System.out.println(html);
> }
>
> private String parse() throws ParserException {
>
> NodeList list = parser.parse(null);
>
> recurse(list);
> System.err.println("done, trying toHtml()");
> return list.toHtml();
> }
>
> private NodeList recurse(NodeList list) {
> if(list==null)
> return null;
> Node node;
> SimpleNodeIterator iterator = list.elements();
> while(iterator.hasMoreNodes())
> {
> node = iterator.nextNode();
> if(node==null)
> break;
> if(node instanceof Tag)
> {
> Tag tag = (Tag)node;
> removeAttributes(tag);
> recurse(node.getChildren());
> }
> }
> return null;
> }
>
> static private void removeAttributes(Tag tag) {
> String[] allowedAttrs = {""};
> Set allowed = new HashSet();
> for(int i=0;i<allowedAttrs.length;i++)
> allowed.add(allowedAttrs[i]);
>
> allowed.add(tag.getRawTagName());
> allowed.add("/"+tag.getRawTagName());
>
> Vector attrs = tag.getAttributesEx();
> for(int i=0;i<attrs.size();i++)
> {
> Attribute attr = (Attribute)attrs.get(i);
> if(attr.getName()==null)
> continue;
> if(!allowed.contains(attr.getName()))
> {
> tag.removeAttribute(attr.getName());
> System.out.println("Removed attr: "+attr.getName());
> }
> }
> }
>}
>
>
|