Re: [Htmlparser-user] HTML parser bug with closing tag
Brought to you by:
derrickoswald
From: Derrick O. <der...@ro...> - 2007-09-12 12:14:20
|
=0AThis has been fixed in the trunk version of the subversion repository, b= ut not yet released as a package, sorry.=0ASee bug #1761484 tag.setAttribut= e() not compatible with <tag/>=0A=0A----- Original Message ----=0AFrom: Kar= sten Ohme <wid...@t-...>=0ATo: htm...@li...urceforge.= net=0ASent: Wednesday, September 12, 2007 5:15:16 AM=0ASubject: [Htmlparser= -user] HTML parser bug with closing tag=0A=0AHello,=0A=0AHTMPParser does no= t work like expected.=0AIf some XML conforming tags like <br/> are closed i= mmediately, the =0Afollowing happens if an attribute is added:=0A=0A<br /id= =3D"test">=0A=0AI would instead expect this: <br id=3D"test"/>=0A=0AThe att= ached test can be used for showing the problem.=0A=0ARegards,=0AKarsten=0A= =0Aimport org.htmlparser.Node;=0Aimport org.htmlparser.Parser;=0Aimport org= .htmlparser.nodes.TagNode;=0Aimport org.htmlparser.util.NodeIterator;=0Aimp= ort org.htmlparser.util.NodeList;=0Aimport org.htmlparser.util.ParserExcept= ion;=0Aimport org.junit.Test;=0A=0A=0Apublic class HTMLParserBug {=0A=0A = private final String invalid =3D "<!DOCTYPE html PUBLIC \"-//W3C//DTD =0AH= TML 4.01 Transitional//EN\">" + "<html>" + "<head>"=0A = + "<meta =0Ahttp-equiv=3D\"content-type\" content=3D\"t= ext/html; charset=3DISO-8859-1\">" =0A+ "</head>" + "<body>"=0A = + "Text" + "<br/>" + "Text" + =0A"</body>" + "= </html>";=0A=0A @Test=0A public void testClosingTag() {=0A try= {=0A Parser parser =3D Parser.createParser(invalid, "ISO-8859-1= ");=0A NodeIterator it =3D parser.elements();=0A proc= essNode(it);=0A } catch (ParserException e) {=0A e.printS= tackTrace();=0A }=0A }=0A=0A private static void processNode(N= odeIterator it) throws =0AParserException {=0A while (it.hasMoreNode= s()) {=0A Node node =3D it.nextNode();=0A System.out.= println(node);=0A if (node instanceof TagNode) {=0A = ((TagNode) node).setAttribute("id", "test");=0A System.ou= t.println(node);=0A NodeList list =3D ((TagNode) node).getCh= ildren();=0A if (list !=3D null) {=0A pro= cessNode(list.elements());=0A }=0A }=0A }= =0A }=0A=0A}=0A=0A=0A---------------------------------------------------= ----------------------=0AThis SF.net email is sponsored by: Microsoft=0ADef= y all challenges. Microsoft(R) Visual Studio 2005.=0Ahttp://clk.atdmt.com/M= RT/go/vse0120000070mrt/direct/01/=0A_______________________________________= ________=0AHtmlparser-user mailing list=0AH...@li...urceforge= .net=0Ahttps://lists.sourceforge.net/lists/listinfo/htmlparser-user=0A=0A= =0A=0A=0A |