[Htmlparser-cvs] htmlparser/src/org/htmlparser/visitors HtmlPage.java,1.42,1.43 LinkFindingVisitor.j
Brought to you by:
derrickoswald
From: Derrick O. <der...@us...> - 2004-05-24 00:38:34
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/visitors In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv31556/visitors Modified Files: HtmlPage.java LinkFindingVisitor.java NodeVisitor.java ObjectFindingVisitor.java TagFindingVisitor.java TextExtractingVisitor.java UrlModifyingVisitor.java Log Message: Part two of a multiphase refactoring. Part one added the Tag interface. This submission eliminates some of the duplication between the lexer.nodes package and the htmlparser package by removing the tag specific signatures, visitTitleTag, visitLinkTag and visitImageTag, from the NodeVisitor class. This allows the lexer to return htmlparser level classes for StringNode and RemarkNode. The TagNode is still present in the lexer.nodes package, but will move next. This means that classes derived from NodeVisitor *will not* work using the above signatures; instead a check for tag class (or name) should be performed in visitTag. A document will be added to the visitors package with comprehensive porting instructions. Index: LinkFindingVisitor.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/visitors/LinkFindingVisitor.java,v retrieving revision 1.35 retrieving revision 1.36 diff -C2 -d -r1.35 -r1.36 *** LinkFindingVisitor.java 25 Jan 2004 21:33:14 -0000 1.35 --- LinkFindingVisitor.java 24 May 2004 00:38:19 -0000 1.36 *************** *** 28,34 **** import java.util.Locale; - import org.htmlparser.tags.LinkTag; public class LinkFindingVisitor extends NodeVisitor { --- 28,35 ---- import java.util.Locale; import org.htmlparser.tags.LinkTag; + import org.htmlparser.Tag; + public class LinkFindingVisitor extends NodeVisitor { *************** *** 49,56 **** } ! public void visitLinkTag(LinkTag linkTag) { ! if (-1 != linkTag.getLinkText ().toUpperCase (locale).indexOf (linkTextToFind)) ! count++; } --- 50,58 ---- } ! public void visitTag(Tag tag) { ! if (tag instanceof LinkTag) ! if (-1 != ((LinkTag)tag).getLinkText ().toUpperCase (locale).indexOf (linkTextToFind)) ! count++; } Index: TagFindingVisitor.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/visitors/TagFindingVisitor.java,v retrieving revision 1.41 retrieving revision 1.42 diff -C2 -d -r1.41 -r1.42 *** TagFindingVisitor.java 2 Jan 2004 16:24:58 -0000 1.41 --- TagFindingVisitor.java 24 May 2004 00:38:19 -0000 1.42 *************** *** 28,32 **** import org.htmlparser.Node; ! import org.htmlparser.tags.Tag; import org.htmlparser.util.NodeList; --- 28,32 ---- import org.htmlparser.Node; ! import org.htmlparser.Tag; import org.htmlparser.util.NodeList; Index: HtmlPage.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/visitors/HtmlPage.java,v retrieving revision 1.42 retrieving revision 1.43 diff -C2 -d -r1.42 -r1.43 *** HtmlPage.java 14 Jan 2004 02:53:47 -0000 1.42 --- HtmlPage.java 24 May 2004 00:38:19 -0000 1.43 *************** *** 30,34 **** import org.htmlparser.tags.BodyTag; import org.htmlparser.tags.TableTag; ! import org.htmlparser.tags.Tag; import org.htmlparser.tags.TitleTag; import org.htmlparser.util.NodeList; --- 30,34 ---- import org.htmlparser.tags.BodyTag; import org.htmlparser.tags.TableTag; ! import org.htmlparser.Tag; import org.htmlparser.tags.TitleTag; import org.htmlparser.util.NodeList; *************** *** 60,63 **** --- 60,65 ---- else if (isBodyTag(tag)) nodesInBody = tag.getChildren (); + else if (isTitleTag(tag)) + title = ((TitleTag)tag).getTitle(); } *************** *** 72,75 **** --- 74,82 ---- } + private boolean isTitleTag(Tag tag) + { + return (tag instanceof TitleTag); + } + public NodeList getBody() { return nodesInBody; *************** *** 82,89 **** return tableArr; } - - public void visitTitleTag(TitleTag titleTag) - { - title = titleTag.getTitle(); - } } --- 89,91 ---- Index: ObjectFindingVisitor.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/visitors/ObjectFindingVisitor.java,v retrieving revision 1.39 retrieving revision 1.40 diff -C2 -d -r1.39 -r1.40 *** ObjectFindingVisitor.java 2 Jan 2004 16:24:58 -0000 1.39 --- ObjectFindingVisitor.java 24 May 2004 00:38:19 -0000 1.40 *************** *** 28,32 **** import org.htmlparser.Node; ! import org.htmlparser.tags.Tag; import org.htmlparser.util.NodeList; --- 28,32 ---- import org.htmlparser.Node; ! import org.htmlparser.Tag; import org.htmlparser.util.NodeList; Index: NodeVisitor.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/visitors/NodeVisitor.java,v retrieving revision 1.36 retrieving revision 1.37 diff -C2 -d -r1.36 -r1.37 *** NodeVisitor.java 2 Jan 2004 16:24:58 -0000 1.36 --- NodeVisitor.java 24 May 2004 00:38:19 -0000 1.37 *************** *** 29,36 **** import org.htmlparser.RemarkNode; import org.htmlparser.StringNode; ! import org.htmlparser.tags.Tag; ! import org.htmlparser.tags.ImageTag; ! import org.htmlparser.tags.LinkTag; ! import org.htmlparser.tags.TitleTag; /** --- 29,33 ---- import org.htmlparser.RemarkNode; import org.htmlparser.StringNode; ! import org.htmlparser.Tag; /** *************** *** 43,49 **** * types of nodes encountered in depth-first order and finally * <code>finishedParsing()</code>.<p> - * There are currently three specialized <code>visitXXX()</code> calls for - * titles, images and links. Thes call their specialized visit, and then - * perform the generic processing. * Typical code to print all the link tags: * <pre> --- 40,43 ---- *************** *** 58,64 **** * { * } ! * public void visitLinkTag (LinkTag linkTag) * { ! * System.out.println (linkTag); * } * public static void main (String[] args) throws ParserException --- 52,59 ---- * { * } ! * public void visitTag (Tag tag) * { ! * if (tag instanceof LinkTag) ! * System.out.println (tag); * } * public static void main (String[] args) throws ParserException *************** *** 75,79 **** private boolean mRecurseChildren; private boolean mRecurseSelf; ! public NodeVisitor () { --- 70,77 ---- private boolean mRecurseChildren; private boolean mRecurseSelf; ! ! /** ! * Creates a node visitor that recurses itself and it's children. ! */ public NodeVisitor () { *************** *** 81,84 **** --- 79,88 ---- } + /** + * Creates a node visitor that recurses itself and it's children + * only if <code>recurseChildren</code> is <code>true</code>. + * @param recurseChildren If <code>true</code>, the visitor will + * visit children, otherwise only the top level nodes are recursed. + */ public NodeVisitor (boolean recurseChildren) { *************** *** 86,89 **** --- 90,102 ---- } + /** + * Creates a node visitor that recurses itself only if + * <code>recurseSelf</code> is <code>true</code> and it's children + * only if <code>recurseChildren</code> is <code>true</code>. + * @param recurseChildren If <code>true</code>, the visitor will + * visit children, otherwise only the top level nodes are recursed. + * @param recurseSelf If <code>true</code>, the visitor will + * visit the top level node. + */ public NodeVisitor (boolean recurseChildren, boolean recurseSelf) { *************** *** 100,122 **** } public void visitTag (Tag tag) { - } public void visitEndTag (Tag tag) { - } ! public void visitStringNode (StringNode stringNode) { } ! public void visitRemarkNode (RemarkNode remarkNode) { - } ! /** * Override this method if you wish to do special --- 113,148 ---- } + /** + * Called for each <code>Tag</code> visited. + * @param tag The tag being visited. + */ public void visitTag (Tag tag) { } + /** + * Called for each <code>Tag</code> visited that is an end tag. + * @param tag The end tag being visited. + */ public void visitEndTag (Tag tag) { } ! /** ! * Called for each <code>StringNode</code> visited. ! * @param string The string node being visited. ! */ ! public void visitStringNode (StringNode string) { } ! /** ! * Called for each <code>RemarkNode</code> visited. ! * @param remark The remark node being visited. ! */ ! public void visitRemarkNode (RemarkNode remark) { } ! /** * Override this method if you wish to do special *************** *** 127,143 **** } ! public void visitLinkTag (LinkTag linkTag) ! { ! } ! ! public void visitImageTag (ImageTag imageTag) ! { ! } ! ! public void visitTitleTag (TitleTag titleTag) ! { ! ! } ! public boolean shouldRecurseChildren () { --- 153,160 ---- } ! /** ! * Depth traversal predicate. ! * @return <code>true</code> if children are to be visited. ! */ public boolean shouldRecurseChildren () { *************** *** 145,148 **** --- 162,169 ---- } + /** + * Self traversal predicate. + * @return <code>true</code> if a node itself is to be visited. + */ public boolean shouldRecurseSelf () { Index: TextExtractingVisitor.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/visitors/TextExtractingVisitor.java,v retrieving revision 1.40 retrieving revision 1.41 diff -C2 -d -r1.40 -r1.41 *** TextExtractingVisitor.java 14 Jan 2004 02:53:47 -0000 1.40 --- TextExtractingVisitor.java 24 May 2004 00:38:19 -0000 1.41 *************** *** 28,32 **** import org.htmlparser.StringNode; ! import org.htmlparser.tags.Tag; import org.htmlparser.util.Translate; --- 28,32 ---- import org.htmlparser.StringNode; ! import org.htmlparser.Tag; import org.htmlparser.util.Translate; Index: UrlModifyingVisitor.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/visitors/UrlModifyingVisitor.java,v retrieving revision 1.43 retrieving revision 1.44 diff -C2 -d -r1.43 -r1.44 *** UrlModifyingVisitor.java 2 Jan 2004 16:24:58 -0000 1.43 --- UrlModifyingVisitor.java 24 May 2004 00:38:19 -0000 1.44 *************** *** 34,38 **** import org.htmlparser.tags.ImageTag; import org.htmlparser.tags.LinkTag; ! import org.htmlparser.tags.Tag; public class UrlModifyingVisitor extends NodeVisitor { --- 34,38 ---- import org.htmlparser.tags.ImageTag; import org.htmlparser.tags.LinkTag; ! import org.htmlparser.Tag; public class UrlModifyingVisitor extends NodeVisitor { *************** *** 48,59 **** } - public void visitLinkTag(LinkTag linkTag) { - linkTag.setLink(linkPrefix + linkTag.getLink()); - } - - public void visitImageTag(ImageTag imageTag) { - imageTag.setImageURL(linkPrefix + imageTag.getImageURL()); - } - public void visitRemarkNode (RemarkNode remarkNode) { --- 48,51 ---- *************** *** 67,71 **** public void visitTag(Tag tag) ! { // process only those nodes that won't be processed by an end tag, // nodes without parents or parents without an end tag, since // the complete processing of all children should happen before --- 59,68 ---- public void visitTag(Tag tag) ! { ! if (tag instanceof LinkTag) ! ((LinkTag)tag).setLink(linkPrefix + ((LinkTag)tag).getLink()); ! else if (tag instanceof ImageTag) ! ((ImageTag)tag).setImageURL(linkPrefix + ((ImageTag)tag).getImageURL()); ! // process only those nodes that won't be processed by an end tag, // nodes without parents or parents without an end tag, since // the complete processing of all children should happen before |