No end tag was found for FIELDSET tag, while it present at html
Brought to you by:
derrickoswald
private Parser parser = null;
private NodeFilter filter = null;
public HtmlparserTagNameEventer() {
parser = new Parser();
filter = new NotFilter(new OrFilter(
new TagNameFilter[] {
new TagNameFilter("DOCTYPE"),
new TagNameFilter("!DOCTYPE"),
new TagNameFilter("script"),
}
));
}
public void parse(final String content, String url) throws ParserException {
parser.setInputHTML(content);
NodeList nl = parser.parse(filter).extractAllNodesThatMatch(filter);
nl.visitAllNodesWith(new NodeVisitor() {
Stack<Tag> stack = new Stack<Tag>();
Stack<Tag> stackProblemed = new Stack<Tag>();
@Override
public void beginParsing() {
stack.clear();
stackProblemed.clear();
onBefore();
}
@Override
public void finishedParsing() {
onAfterParse();
}
@Override
public void visitTag(Tag tag) {
if (tag.getTagName().equalsIgnoreCase("fieldset")) {
// for debug
System.err.printf("%s [%s]%s%s", tag.getTagName(),
(tag.isEndTag() ? "close" : "open"),
(tag.isEmptyXmlTag() ? " [selfclosed]" : ""),
(tag.getEndTag() != null ? String.format(" [has end tag: %s]", tag.getEndTag().getTagName()) : ""));
}
if (!tag.isEndTag() && !tag.isEmptyXmlTag() && tag.getEndTag() != null) {
System.out.printf("<%s>%n", tag.getTagName());
stack.push(tag);
printStack(stack);
} else if (!tag.isEndTag() && !tag.isEmptyXmlTag() && tag.getEndTag() == null) {
stackProblemed.push(tag);
}
}
@Override
public void visitEndTag(Tag tag) {
System.out.printf("</%s>%n", tag.getTagName());
if (stack.size() > 0) {
if (stack.peek().tag.getTagName().equals(tag.getTagName())) {
stack.pop();
printStack(stack);
} else {
if (stackProblemed.size() > 0 && stackProblemed.peek().tag.getTagName().equalsIgnoreCase(tag.getTagName())) {
Tag ppped = stackProblemed.pop();
System.out.printf("error: tag is not on main stack <%s> popped%n", ppped.getTagName());
System.exit(1);
//System.out.printf(" warning: problem tag <%s> popped%n", tag.getTagName());
} else {
System.err.printf("problem situation [tag not match] </%s>%n", tag.getTagName());
System.exit(1);
}
}
} else {
System.err.printf("problem situation [empty stack] </%s>%n", tag.getTagName());
System.exit(1);
}
}
private void printStack(Stack<Tg> stack) {
StringBuilder sb = new StringBuilder();
int k = 0;
for (Tag tag : stack) {
sb.append(k > 0 ? "->" : "").append(tag.getTagName());
k++;
}
System.out.println(sb.toString());
}
});
}
/*
After running parse() you will see that when
(content is read from file in attachment)
*/