Menu

#297 No end tag was found for FIELDSET tag, while it present at html

v1.6
open
nobody
endtag (1)
3
2014-12-07
2013-07-03
No
private Parser parser = null;
private NodeFilter filter = null;

public HtmlparserTagNameEventer() {
    parser = new Parser();
    filter = new NotFilter(new OrFilter(
            new TagNameFilter[] {
                    new TagNameFilter("DOCTYPE"),
                    new TagNameFilter("!DOCTYPE"),
                    new TagNameFilter("script"),
            }
            ));
}

public void parse(final String content, String url) throws ParserException {
    parser.setInputHTML(content);
    NodeList nl = parser.parse(filter).extractAllNodesThatMatch(filter);
    nl.visitAllNodesWith(new NodeVisitor() {
        Stack<Tag> stack = new Stack<Tag>();
        Stack<Tag> stackProblemed = new Stack<Tag>();

        @Override
        public void beginParsing() {
            stack.clear();
            stackProblemed.clear();
            onBefore();
        }

        @Override
        public void finishedParsing() {
            onAfterParse();
        }

        @Override
        public void visitTag(Tag tag) {
            if (tag.getTagName().equalsIgnoreCase("fieldset")) {
                // for debug
                System.err.printf("%s [%s]%s%s", tag.getTagName(),
                        (tag.isEndTag() ? "close" : "open"),
                        (tag.isEmptyXmlTag() ? " [selfclosed]" : ""),
                        (tag.getEndTag() != null ? String.format(" [has end tag: %s]", tag.getEndTag().getTagName()) : ""));
            }
            if (!tag.isEndTag() && !tag.isEmptyXmlTag() && tag.getEndTag() != null) {
                System.out.printf("<%s>%n", tag.getTagName());
                stack.push(tag);
                printStack(stack);
            } else if (!tag.isEndTag() && !tag.isEmptyXmlTag() && tag.getEndTag() == null) {
                stackProblemed.push(tag);
            }
        }

        @Override
        public void visitEndTag(Tag tag) {
            System.out.printf("</%s>%n", tag.getTagName());
            if (stack.size() > 0) {
                if (stack.peek().tag.getTagName().equals(tag.getTagName())) {
                    stack.pop();
                    printStack(stack);
                } else {
                    if (stackProblemed.size() > 0 && stackProblemed.peek().tag.getTagName().equalsIgnoreCase(tag.getTagName())) {
                        Tag ppped = stackProblemed.pop();
                        System.out.printf("error: tag is not on main stack <%s> popped%n", ppped.getTagName());
                        System.exit(1);
                        //System.out.printf(" warning: problem tag <%s> popped%n", tag.getTagName());
                    } else {
                        System.err.printf("problem situation [tag not match] </%s>%n", tag.getTagName());
                        System.exit(1);
                    }
                }
            } else {
                System.err.printf("problem situation [empty stack] </%s>%n", tag.getTagName());
                System.exit(1);
            }
        }

        private void printStack(Stack<Tg> stack) {
            StringBuilder sb = new StringBuilder();
            int k = 0;
            for (Tag tag : stack) {
                sb.append(k > 0 ? "->" : "").append(tag.getTagName());
                k++;
            }
            System.out.println(sb.toString());
        }
    });
}

/*
After running parse() you will see that when <fieldset> tag visited Tag.getEndTag == null condition says that it has no end tag (</fieldset>). While this tag is present as you can see in html, and moreover meets as closing tag in visitEndTag(Tag tag).
Something I do not understand: does condition tag.getEndTag == null mean that tag has no closing tag or not? Javadoc says that it does...

(content is read from file in attachment)
*/

1 Attachments

Discussion


Log in to post a comment.