// This code doesn't compile. It complains "The constructor TextNode() is
undefined". I got this in the documentation and thought it was a way to
override textnodes?
// My goal is to override TextNode so that I can process text and turn
http://link.com into a real link <a href="link.com">link.com</a>
// Any ideas?
import org.htmlparser.Node;
import org.htmlparser.Parser;
import org.htmlparser.Text;
import org.htmlparser.lexer.Page;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;
import org.htmlparser.PrototypicalNodeFactory;
import org.htmlparser.tags.*;
import org.htmlparser.nodes.TextNode;
public static void main (String[] args) throws ParserException
{
String html =
"<html><body>\n<script>alert('hi');</script><select id=\"da\"></select>" +
"<p><a
href=\"http://googlelink.com\">123456</a><br/>" +
"<h1>hello</h1><a href=cnn.com></a>\n" +
"http://google.com</br>" +
"<b>https://cnn.com/?test=3&2=d</b></p>\n" +
"<table><tr><td>http://table.com</td></tr></table>" +
"<a href=\"a.html\">123</a>\n<a
href=\"http://www.alreadylinkified.com/\">http://www.alreadylinkified.com</a
>\n</body></html>";
PrototypicalNodeFactory factory = new
PrototypicalNodeFactory();
factory.setTextPrototype (new TextNode () {
public String toPlainTextString()
{
return (org.htmlparser.util.Translate.decode
(super.toPlainTextString ()));
}
});
Parser parser = new Parser(html);
parser.setNodeFactory(factory);
NodeList all = parser.parse(null);
System.out.println( all.toHtml());
}
}
|