[Htmlparser-user] HTML parser bug with closing tag
Brought to you by:
derrickoswald
From: Karsten O. <wid...@t-...> - 2007-09-12 09:19:04
|
Hello, HTMPParser does not work like expected. If some XML conforming tags like <br/> are closed immediately, the following happens if an attribute is added: <br /id="test"> I would instead expect this: <br id="test"/> The attached test can be used for showing the problem. Regards, Karsten import org.htmlparser.Node; import org.htmlparser.Parser; import org.htmlparser.nodes.TagNode; import org.htmlparser.util.NodeIterator; import org.htmlparser.util.NodeList; import org.htmlparser.util.ParserException; import org.junit.Test; public class HTMLParserBug { private final String invalid = "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">" + "<html>" + "<head>" + "<meta http-equiv=\"content-type\" content=\"text/html; charset=ISO-8859-1\">" + "</head>" + "<body>" + "Text" + "<br/>" + "Text" + "</body>" + "</html>"; @Test public void testClosingTag() { try { Parser parser = Parser.createParser(invalid, "ISO-8859-1"); NodeIterator it = parser.elements(); processNode(it); } catch (ParserException e) { e.printStackTrace(); } } private static void processNode(NodeIterator it) throws ParserException { while (it.hasMoreNodes()) { Node node = it.nextNode(); System.out.println(node); if (node instanceof TagNode) { ((TagNode) node).setAttribute("id", "test"); System.out.println(node); NodeList list = ((TagNode) node).getChildren(); if (list != null) { processNode(list.elements()); } } } } } |