Hello,
HTMPParser does not work like expected.
If some XML conforming tags like <br/> are closed immediately, the
following happens if an attribute is added:
<br /id="test">
I would instead expect this: <br id="test"/>
The attached test can be used for showing the problem.
Regards,
Karsten
import org.htmlparser.Node;
import org.htmlparser.Parser;
import org.htmlparser.nodes.TagNode;
import org.htmlparser.util.NodeIterator;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;
import org.junit.Test;
public class HTMLParserBug {
private final String invalid = "<!DOCTYPE html PUBLIC \"-//W3C//DTD
HTML 4.01 Transitional//EN\">" + "<html>" + "<head>"
+ "<meta
http-equiv=\"content-type\" content=\"text/html; charset=ISO-8859-1\">"
+ "</head>" + "<body>"
+ "Text" + "<br/>" + "Text" +
"</body>" + "</html>";
@Test
public void testClosingTag() {
try {
Parser parser = Parser.createParser(invalid, "ISO-8859-1");
NodeIterator it = parser.elements();
processNode(it);
} catch (ParserException e) {
e.printStackTrace();
}
}
private static void processNode(NodeIterator it) throws
ParserException {
while (it.hasMoreNodes()) {
Node node = it.nextNode();
System.out.println(node);
if (node instanceof TagNode) {
((TagNode) node).setAttribute("id", "test");
System.out.println(node);
NodeList list = ((TagNode) node).getChildren();
if (list != null) {
processNode(list.elements());
}
}
}
}
}
|