[Htmlparser-cvs] htmlparser/src/org/htmlparser/visitors HtmlPage.java,1.35,1.36 NodeVisitor.java,1.3
Brought to you by:
derrickoswald
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/visitors In directory sc8-pr-cvs1:/tmp/cvs-serv11047/visitors Modified Files: HtmlPage.java NodeVisitor.java TagFindingVisitor.java TextExtractingVisitor.java UrlModifyingVisitor.java Log Message: Fixed up the broken visitor logic. Added some docos on NodeVisitor. Index: HtmlPage.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/visitors/HtmlPage.java,v retrieving revision 1.35 retrieving revision 1.36 diff -C2 -d -r1.35 -r1.36 *** HtmlPage.java 28 Sep 2003 15:34:00 -0000 1.35 --- HtmlPage.java 28 Sep 2003 19:30:04 -0000 1.36 *************** *** 64,85 **** public void visitTag(Tag tag) { ! if (tag.isEndTag ()) ! { if (isBodyTag(tag)) ! bodyTagBegin = false; ! addTagToBodyIfApplicable(tag); } ! else ! { ! addTagToBodyIfApplicable(tag); ! if (isTable(tag)) { ! tables.add(tag); ! } ! else { ! if (isBodyTag(tag)) ! bodyTagBegin = true; ! } ! } } --- 64,83 ---- public void visitTag(Tag tag) { ! addTagToBodyIfApplicable(tag); ! ! if (isTable(tag)) { ! tables.add(tag); ! } ! else { if (isBodyTag(tag)) ! bodyTagBegin = true; } ! } ! public void visitEndTag(Tag tag) ! { ! if (isBodyTag(tag)) ! bodyTagBegin = false; ! addTagToBodyIfApplicable(tag); } Index: NodeVisitor.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/visitors/NodeVisitor.java,v retrieving revision 1.30 retrieving revision 1.31 diff -C2 -d -r1.30 -r1.31 *** NodeVisitor.java 28 Sep 2003 15:34:00 -0000 1.30 --- NodeVisitor.java 28 Sep 2003 19:30:04 -0000 1.31 *************** *** 36,90 **** import org.htmlparser.tags.TitleTag; ! public abstract class NodeVisitor { ! private boolean recurseChildren; ! private boolean recurseSelf; ! ! public NodeVisitor() { ! this(true); } ! ! public NodeVisitor(boolean recurseChildren) { ! this.recurseChildren = recurseChildren; ! this.recurseSelf = true; } ! ! public NodeVisitor(boolean recurseChildren,boolean recurseSelf) { ! this.recurseChildren = recurseChildren; ! this.recurseSelf = recurseSelf; } ! public void visitTag(Tag tag) { ! } ! public void visitStringNode(StringNode stringNode) { } ! ! public void visitLinkTag(LinkTag linkTag) { } ! ! public void visitImageTag(ImageTag imageTag) { } ! ! public void visitTitleTag(TitleTag titleTag) { ! } ! public void visitRemarkNode(RemarkNode remarkNode) { ! } ! public boolean shouldRecurseChildren() { ! return recurseChildren; } ! ! public boolean shouldRecurseSelf() { ! return recurseSelf; } ! /** ! * Override this method if you wish to do special ! * processing upon completion of parsing ! */ ! public void finishedParsing() { } } --- 36,154 ---- import org.htmlparser.tags.TitleTag; ! /** ! * The base class for the 'Visitor' pattern. ! * Classes that wish to use <code>visitAllNodesWith()</code> will subclass ! * this class and provide implementations for methods they are interested in ! * processing.<p> ! * The operation of <code>visitAllNodesWith()</code> is to call ! * <code>beginParsing()</code>, then <code>visitXXX()</code> according to the ! * types of nodes encountered in depth-first order and finally ! * <code>finishedParsing()</code>.<p> ! * There are currently three specialized <code>visitXXX()</code> calls for ! * titles, images and links. Thes call their specialized visit, and then ! * perform the generic processing. ! * Typical code to print all the link tags: ! * <pre> ! * import org.htmlparser.Parser; ! * import org.htmlparser.tags.LinkTag; ! * import org.htmlparser.util.ParserException; ! * import org.htmlparser.visitors.NodeVisitor; ! * ! * public class Visitor extends NodeVisitor ! * { ! * public Visitor () ! * { ! * } ! * public void visitLinkTag (LinkTag linkTag) ! * { ! * System.out.println (linkTag); ! * } ! * public static void main (String[] args) throws ParserException ! * { ! * Parser parser = new Parser ("http://cbc.ca"); ! * parser.registerScanners (); ! * Visitor visitor = new Visitor (); ! * parser.visitAllNodesWith (visitor); ! * } ! * } ! * </pre> ! */ ! public abstract class NodeVisitor ! { ! private boolean mRecurseChildren; ! private boolean mRecurseSelf; ! ! public NodeVisitor () ! { ! this (true); } ! ! public NodeVisitor (boolean recurseChildren) ! { ! this (recurseChildren, true); } ! ! public NodeVisitor (boolean recurseChildren, boolean recurseSelf) ! { ! mRecurseChildren = recurseChildren; ! mRecurseSelf = recurseSelf; } ! /** ! * Override this method if you wish to do special ! * processing prior to the start of parsing. ! */ ! public void beginParsing () ! { } ! public void visitTag (Tag tag) ! { ! } ! ! public void visitEndTag (Tag tag) ! { ! } ! ! public void visitStringNode (StringNode stringNode) ! { } ! ! public void visitRemarkNode (RemarkNode remarkNode) ! { ! } ! ! /** ! * Override this method if you wish to do special ! * processing upon completion of parsing. ! */ ! public void finishedParsing () ! { } ! public void visitLinkTag (LinkTag linkTag) ! { } ! ! public void visitImageTag (ImageTag imageTag) ! { ! } ! ! public void visitTitleTag (TitleTag titleTag) ! { ! } ! public boolean shouldRecurseChildren () ! { ! return (mRecurseChildren); ! } ! ! public boolean shouldRecurseSelf () ! { ! return (mRecurseSelf); } } Index: TagFindingVisitor.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/visitors/TagFindingVisitor.java,v retrieving revision 1.36 retrieving revision 1.37 diff -C2 -d -r1.36 -r1.37 *** TagFindingVisitor.java 28 Sep 2003 15:34:00 -0000 1.36 --- TagFindingVisitor.java 28 Sep 2003 19:30:04 -0000 1.37 *************** *** 69,86 **** public void visitTag(Tag tag) { - if (tag.isEndTag ()) - { - if (!endTagCheck) return; - for (int i=0;i<tagsToBeFound.length;i++) - if (tag.getTagName().substring (1).equalsIgnoreCase(tagsToBeFound[i])) - { - endTagCount[i]++; - endTags[i].add(tag); - } - } for (int i=0;i<tagsToBeFound.length;i++) if (tag.getTagName().equalsIgnoreCase(tagsToBeFound[i])) { count[i]++; tags[i].add(tag); } } --- 69,87 ---- public void visitTag(Tag tag) { for (int i=0;i<tagsToBeFound.length;i++) if (tag.getTagName().equalsIgnoreCase(tagsToBeFound[i])) { count[i]++; tags[i].add(tag); + } + } + + public void visitEndTag(Tag tag) + { + if (!endTagCheck) return; + for (int i=0;i<tagsToBeFound.length;i++) + if (tag.getTagName().equalsIgnoreCase(tagsToBeFound[i])) + { + endTagCount[i]++; + endTags[i].add(tag); } } Index: TextExtractingVisitor.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/visitors/TextExtractingVisitor.java,v retrieving revision 1.34 retrieving revision 1.35 diff -C2 -d -r1.34 -r1.35 *** TextExtractingVisitor.java 28 Sep 2003 15:34:00 -0000 1.34 --- TextExtractingVisitor.java 28 Sep 2003 19:30:04 -0000 1.35 *************** *** 67,74 **** } - public void visitTitleTag(TitleTag titleTag) { - textAccumulator.append(titleTag.getTitle ()); - } - private String replaceNonBreakingSpaceWithOrdinarySpace(String text) { return text.replace('\u00a0',' '); --- 67,70 ---- *************** *** 77,90 **** public void visitTag(Tag tag) { ! if (tag.isEndTag ()) ! { ! if (isPreTag(tag)) ! preTagBeingProcessed = false; ! } ! else ! { ! if (isPreTag(tag)) ! preTagBeingProcessed = true; ! } } --- 73,84 ---- public void visitTag(Tag tag) { ! if (isPreTag(tag)) ! preTagBeingProcessed = true; ! } ! ! public void visitEndTag(Tag tag) ! { ! if (isPreTag(tag)) ! preTagBeingProcessed = false; } Index: UrlModifyingVisitor.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/visitors/UrlModifyingVisitor.java,v retrieving revision 1.33 retrieving revision 1.34 diff -C2 -d -r1.33 -r1.34 *** UrlModifyingVisitor.java 28 Sep 2003 15:34:00 -0000 1.33 --- UrlModifyingVisitor.java 28 Sep 2003 19:30:05 -0000 1.34 *************** *** 62,66 **** public void visitImageTag(ImageTag imageTag) { imageTag.setImageURL(linkPrefix + imageTag.getImageURL()); - modifiedResult.append(imageTag.toHtml()); } --- 62,65 ---- *************** *** 71,78 **** public void visitTag(Tag tag) { ! if (tag.isEndTag ()) ! modifiedResult.append(tag.toHtml()); ! else ! modifiedResult.append(tag.toHtml()); } --- 70,79 ---- public void visitTag(Tag tag) { ! modifiedResult.append(tag.toHtml()); ! } ! ! public void visitEndTag(Tag tag) ! { ! modifiedResult.append(tag.toHtml()); } |