[Htmlparser-cvs] htmlparser/src/org/htmlparser/visitors HtmlPage.java,1.30,1.31 LinkFindingVisitor.j
Brought to you by:
derrickoswald
From: <der...@us...> - 2003-09-03 23:38:03
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/visitors In directory sc8-pr-cvs1:/tmp/cvs-serv31228/visitors Modified Files: HtmlPage.java LinkFindingVisitor.java NodeVisitor.java ObjectFindingVisitor.java StringFindingVisitor.java TagFindingVisitor.java TextExtractingVisitor.java UrlModifyingVisitor.java Log Message: Change tabs to spaces in all source files. Index: HtmlPage.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/visitors/HtmlPage.java,v retrieving revision 1.30 retrieving revision 1.31 diff -C2 -d -r1.30 -r1.31 *** HtmlPage.java 24 Aug 2003 21:59:45 -0000 1.30 --- HtmlPage.java 3 Sep 2003 23:36:22 -0000 1.31 *************** *** 41,121 **** public class HtmlPage extends NodeVisitor { ! private String title; ! private NodeList nodesInBody; ! private NodeList tables; ! private boolean bodyTagBegin; ! ! public HtmlPage(Parser parser) { ! super(false); ! parser.registerScanners(); ! parser.addScanner(new TableScanner(parser)); ! nodesInBody = new NodeList(); ! tables = new NodeList(); ! bodyTagBegin = false; ! } ! ! public String getTitle() { ! return title; ! } ! public void setTitle(String title) { ! this.title = title; ! } ! public void visitTag(Tag tag) { ! addTagToBodyIfApplicable(tag); ! ! if (isTable(tag)) { ! tables.add(tag); ! } ! else { ! if (isBodyTag(tag)) ! bodyTagBegin = true; ! } ! } ! private boolean isTable(Tag tag) { ! return tag instanceof TableTag; ! } ! private void addTagToBodyIfApplicable(Node node) { ! if (bodyTagBegin) ! nodesInBody.add(node); ! } ! public void visitEndTag(EndTag endTag) { ! if (isBodyTag(endTag)) ! bodyTagBegin = false; ! addTagToBodyIfApplicable(endTag); ! } ! public void visitRemarkNode(RemarkNode remarkNode) { ! addTagToBodyIfApplicable(remarkNode); ! } ! public void visitStringNode(StringNode stringNode) { ! addTagToBodyIfApplicable(stringNode); ! } ! ! private boolean isBodyTag(Tag tag) { ! return tag.getTagName().equals("BODY"); ! } ! ! public NodeList getBody() { ! return nodesInBody; ! } ! ! public TableTag [] getTables() { ! TableTag [] tableArr = new TableTag[tables.size()]; ! for (int i=0;i<tables.size();i++) ! tableArr[i] = (TableTag)tables.elementAt(i); ! return tableArr; ! } ! public void visitTitleTag(TitleTag titleTag) { ! title = titleTag.getTitle(); ! } } --- 41,121 ---- public class HtmlPage extends NodeVisitor { ! private String title; ! private NodeList nodesInBody; ! private NodeList tables; ! private boolean bodyTagBegin; ! ! public HtmlPage(Parser parser) { ! super(false); ! parser.registerScanners(); ! parser.addScanner(new TableScanner(parser)); ! nodesInBody = new NodeList(); ! tables = new NodeList(); ! bodyTagBegin = false; ! } ! ! public String getTitle() { ! return title; ! } ! public void setTitle(String title) { ! this.title = title; ! } ! public void visitTag(Tag tag) { ! addTagToBodyIfApplicable(tag); ! ! if (isTable(tag)) { ! tables.add(tag); ! } ! else { ! if (isBodyTag(tag)) ! bodyTagBegin = true; ! } ! } ! private boolean isTable(Tag tag) { ! return tag instanceof TableTag; ! } ! private void addTagToBodyIfApplicable(Node node) { ! if (bodyTagBegin) ! nodesInBody.add(node); ! } ! public void visitEndTag(EndTag endTag) { ! if (isBodyTag(endTag)) ! bodyTagBegin = false; ! addTagToBodyIfApplicable(endTag); ! } ! public void visitRemarkNode(RemarkNode remarkNode) { ! addTagToBodyIfApplicable(remarkNode); ! } ! public void visitStringNode(StringNode stringNode) { ! addTagToBodyIfApplicable(stringNode); ! } ! ! private boolean isBodyTag(Tag tag) { ! return tag.getTagName().equals("BODY"); ! } ! ! public NodeList getBody() { ! return nodesInBody; ! } ! ! public TableTag [] getTables() { ! TableTag [] tableArr = new TableTag[tables.size()]; ! for (int i=0;i<tables.size();i++) ! tableArr[i] = (TableTag)tables.elementAt(i); ! return tableArr; ! } ! public void visitTitleTag(TitleTag titleTag) { ! title = titleTag.getTitle(); ! } } Index: LinkFindingVisitor.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/visitors/LinkFindingVisitor.java,v retrieving revision 1.25 retrieving revision 1.26 diff -C2 -d -r1.25 -r1.26 *** LinkFindingVisitor.java 24 Aug 2003 21:59:45 -0000 1.25 --- LinkFindingVisitor.java 3 Sep 2003 23:36:22 -0000 1.26 *************** *** 32,58 **** public class LinkFindingVisitor extends NodeVisitor { ! private String linkTextToFind; ! private boolean linkTagFound = false; ! private int count = 0; ! ! public LinkFindingVisitor(String linkTextToFind) { ! this.linkTextToFind = linkTextToFind.toUpperCase(); ! } ! public void visitLinkTag(LinkTag linkTag) { ! System.out.println("Matching with "+linkTag.getLinkText()); ! if (linkTag.getLinkText().toUpperCase().indexOf(linkTextToFind)!=-1) { ! linkTagFound = true; ! count++; ! } ! } ! ! public boolean linkTextFound() { ! return linkTagFound; ! } ! ! public int getCount() { ! return count; ! } } --- 32,58 ---- public class LinkFindingVisitor extends NodeVisitor { ! private String linkTextToFind; ! private boolean linkTagFound = false; ! private int count = 0; ! ! public LinkFindingVisitor(String linkTextToFind) { ! this.linkTextToFind = linkTextToFind.toUpperCase(); ! } ! public void visitLinkTag(LinkTag linkTag) { ! System.out.println("Matching with "+linkTag.getLinkText()); ! if (linkTag.getLinkText().toUpperCase().indexOf(linkTextToFind)!=-1) { ! linkTagFound = true; ! count++; ! } ! } ! ! public boolean linkTextFound() { ! return linkTagFound; ! } ! ! public int getCount() { ! return count; ! } } Index: NodeVisitor.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/visitors/NodeVisitor.java,v retrieving revision 1.25 retrieving revision 1.26 diff -C2 -d -r1.25 -r1.26 *** NodeVisitor.java 24 Aug 2003 21:59:45 -0000 1.25 --- NodeVisitor.java 3 Sep 2003 23:36:22 -0000 1.26 *************** *** 38,95 **** public abstract class NodeVisitor { ! private boolean recurseChildren; ! private boolean recurseSelf; ! ! public NodeVisitor() { ! this(true); ! } ! ! public NodeVisitor(boolean recurseChildren) { ! this.recurseChildren = recurseChildren; ! this.recurseSelf = true; ! } ! ! public NodeVisitor(boolean recurseChildren,boolean recurseSelf) { ! this.recurseChildren = recurseChildren; ! this.recurseSelf = recurseSelf; ! } ! public void visitTag(Tag tag) { ! ! } ! public void visitStringNode(StringNode stringNode) { ! } ! ! public void visitLinkTag(LinkTag linkTag) { ! } ! ! public void visitImageTag(ImageTag imageTag) { ! } ! ! public void visitEndTag(EndTag endTag) { ! ! } ! ! public void visitTitleTag(TitleTag titleTag) { ! ! } ! public void visitRemarkNode(RemarkNode remarkNode) { ! ! } ! ! public boolean shouldRecurseChildren() { ! return recurseChildren; ! } ! ! public boolean shouldRecurseSelf() { ! return recurseSelf; ! } ! /** ! * Override this method if you wish to do special ! * processing upon completion of parsing ! */ ! public void finishedParsing() { ! } } --- 38,95 ---- public abstract class NodeVisitor { ! private boolean recurseChildren; ! private boolean recurseSelf; ! ! public NodeVisitor() { ! this(true); ! } ! ! public NodeVisitor(boolean recurseChildren) { ! this.recurseChildren = recurseChildren; ! this.recurseSelf = true; ! } ! ! public NodeVisitor(boolean recurseChildren,boolean recurseSelf) { ! this.recurseChildren = recurseChildren; ! this.recurseSelf = recurseSelf; ! } ! public void visitTag(Tag tag) { ! ! } ! public void visitStringNode(StringNode stringNode) { ! } ! ! public void visitLinkTag(LinkTag linkTag) { ! } ! ! public void visitImageTag(ImageTag imageTag) { ! } ! ! public void visitEndTag(EndTag endTag) { ! ! } ! ! public void visitTitleTag(TitleTag titleTag) { ! ! } ! public void visitRemarkNode(RemarkNode remarkNode) { ! ! } ! ! public boolean shouldRecurseChildren() { ! return recurseChildren; ! } ! ! public boolean shouldRecurseSelf() { ! return recurseSelf; ! } ! /** ! * Override this method if you wish to do special ! * processing upon completion of parsing ! */ ! public void finishedParsing() { ! } } Index: ObjectFindingVisitor.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/visitors/ObjectFindingVisitor.java,v retrieving revision 1.30 retrieving revision 1.31 diff -C2 -d -r1.30 -r1.31 *** ObjectFindingVisitor.java 24 Aug 2003 21:59:45 -0000 1.30 --- ObjectFindingVisitor.java 3 Sep 2003 23:36:22 -0000 1.31 *************** *** 36,66 **** public class ObjectFindingVisitor extends NodeVisitor { ! private Class classTypeToFind; ! private int count = 0; ! private NodeList tags; ! ! public ObjectFindingVisitor(Class classTypeToFind) { ! this(classTypeToFind,false); ! } ! ! public ObjectFindingVisitor(Class classTypeToFind,boolean recurse) { ! super(recurse); ! this.classTypeToFind = classTypeToFind; ! this.tags = new NodeList(); ! } ! ! public int getCount() { ! return count; ! } ! public void visitTag(Tag tag) { ! if (tag.getClass().getName().equals(classTypeToFind.getName())) { ! count++; ! tags.add(tag); ! } ! } ! public Node[] getTags() { ! return tags.toNodeArray(); ! } } --- 36,66 ---- public class ObjectFindingVisitor extends NodeVisitor { ! private Class classTypeToFind; ! private int count = 0; ! private NodeList tags; ! ! public ObjectFindingVisitor(Class classTypeToFind) { ! this(classTypeToFind,false); ! } ! ! public ObjectFindingVisitor(Class classTypeToFind,boolean recurse) { ! super(recurse); ! this.classTypeToFind = classTypeToFind; ! this.tags = new NodeList(); ! } ! ! public int getCount() { ! return count; ! } ! public void visitTag(Tag tag) { ! if (tag.getClass().getName().equals(classTypeToFind.getName())) { ! count++; ! tags.add(tag); ! } ! } ! public Node[] getTags() { ! return tags.toNodeArray(); ! } } Index: StringFindingVisitor.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/visitors/StringFindingVisitor.java,v retrieving revision 1.30 retrieving revision 1.31 diff -C2 -d -r1.30 -r1.31 *** StringFindingVisitor.java 24 Aug 2003 21:59:45 -0000 1.30 --- StringFindingVisitor.java 3 Sep 2003 23:36:22 -0000 1.31 *************** *** 34,75 **** public class StringFindingVisitor extends NodeVisitor { ! private boolean stringFound = false; ! private String stringToFind; ! private int foundCount; ! private boolean multipleSearchesWithinStrings; ! ! public StringFindingVisitor(String stringToFind) { ! this.stringToFind = stringToFind.toUpperCase(); ! foundCount = 0; ! multipleSearchesWithinStrings = false; ! } ! ! public void doMultipleSearchesWithinStrings() { ! multipleSearchesWithinStrings = true; ! } ! ! public void visitStringNode(StringNode stringNode) { ! String stringToBeSearched = stringNode.getText().toUpperCase(); ! if (!multipleSearchesWithinStrings && ! stringToBeSearched.indexOf(stringToFind) != -1) { ! stringFound = true; ! foundCount++; ! } else if (multipleSearchesWithinStrings) { ! int index = -1; ! do { ! index = stringToBeSearched.indexOf(stringToFind, index+1); ! if (index!=-1) ! foundCount++; ! } while (index != -1); ! } ! } ! ! public boolean stringWasFound() { ! return stringFound; ! } ! ! public int stringFoundCount() { ! return foundCount; ! } } --- 34,75 ---- public class StringFindingVisitor extends NodeVisitor { ! private boolean stringFound = false; ! private String stringToFind; ! private int foundCount; ! private boolean multipleSearchesWithinStrings; ! ! public StringFindingVisitor(String stringToFind) { ! this.stringToFind = stringToFind.toUpperCase(); ! foundCount = 0; ! multipleSearchesWithinStrings = false; ! } ! ! public void doMultipleSearchesWithinStrings() { ! multipleSearchesWithinStrings = true; ! } ! ! public void visitStringNode(StringNode stringNode) { ! String stringToBeSearched = stringNode.getText().toUpperCase(); ! if (!multipleSearchesWithinStrings && ! stringToBeSearched.indexOf(stringToFind) != -1) { ! stringFound = true; ! foundCount++; ! } else if (multipleSearchesWithinStrings) { ! int index = -1; ! do { ! index = stringToBeSearched.indexOf(stringToFind, index+1); ! if (index!=-1) ! foundCount++; ! } while (index != -1); ! } ! } ! ! public boolean stringWasFound() { ! return stringFound; ! } ! ! public int stringFoundCount() { ! return foundCount; ! } } Index: TagFindingVisitor.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/visitors/TagFindingVisitor.java,v retrieving revision 1.31 retrieving revision 1.32 diff -C2 -d -r1.31 -r1.32 *** TagFindingVisitor.java 24 Aug 2003 21:59:45 -0000 1.31 --- TagFindingVisitor.java 3 Sep 2003 23:36:22 -0000 1.32 *************** *** 37,95 **** public class TagFindingVisitor extends NodeVisitor { ! private String [] tagsToBeFound; ! private int count []; ! private int endTagCount []; ! private NodeList [] tags; ! private NodeList [] endTags; ! private boolean endTagCheck; ! ! public TagFindingVisitor(String [] tagsToBeFound) { ! this(tagsToBeFound,false); ! } ! public TagFindingVisitor(String [] tagsToBeFound, boolean endTagCheck) { ! this.tagsToBeFound = tagsToBeFound; ! this.tags = new NodeList[tagsToBeFound.length]; ! if (endTagCheck) { ! endTags = new NodeList[tagsToBeFound.length]; ! endTagCount = new int[tagsToBeFound.length]; ! } ! for (int i=0;i<tagsToBeFound.length;i++) { ! tags[i] = new NodeList(); ! if (endTagCheck) ! endTags[i] = new NodeList(); ! } ! this.count = new int[tagsToBeFound.length]; ! this.endTagCheck = endTagCheck; ! } ! ! public int getTagCount(int index) { ! return count[index]; ! } ! public void visitTag(Tag tag) { ! for (int i=0;i<tagsToBeFound.length;i++) ! if (tag.getTagName().equalsIgnoreCase(tagsToBeFound[i])) { ! count[i]++; ! tags[i].add(tag); ! } ! } ! public Node [] getTags(int index) { ! return tags[index].toNodeArray(); ! } ! public void visitEndTag(EndTag endTag) { ! if (!endTagCheck) return; ! for (int i=0;i<tagsToBeFound.length;i++) ! if (endTag.getTagName().equalsIgnoreCase(tagsToBeFound[i])) { ! endTagCount[i]++; ! endTags[i].add(endTag); ! } ! } ! ! public int getEndTagCount(int index) { ! return endTagCount[index]; ! } ! } --- 37,95 ---- public class TagFindingVisitor extends NodeVisitor { ! private String [] tagsToBeFound; ! private int count []; ! private int endTagCount []; ! private NodeList [] tags; ! private NodeList [] endTags; ! private boolean endTagCheck; ! ! public TagFindingVisitor(String [] tagsToBeFound) { ! this(tagsToBeFound,false); ! } ! public TagFindingVisitor(String [] tagsToBeFound, boolean endTagCheck) { ! this.tagsToBeFound = tagsToBeFound; ! this.tags = new NodeList[tagsToBeFound.length]; ! if (endTagCheck) { ! endTags = new NodeList[tagsToBeFound.length]; ! endTagCount = new int[tagsToBeFound.length]; ! } ! for (int i=0;i<tagsToBeFound.length;i++) { ! tags[i] = new NodeList(); ! if (endTagCheck) ! endTags[i] = new NodeList(); ! } ! this.count = new int[tagsToBeFound.length]; ! this.endTagCheck = endTagCheck; ! } ! ! public int getTagCount(int index) { ! return count[index]; ! } ! public void visitTag(Tag tag) { ! for (int i=0;i<tagsToBeFound.length;i++) ! if (tag.getTagName().equalsIgnoreCase(tagsToBeFound[i])) { ! count[i]++; ! tags[i].add(tag); ! } ! } ! public Node [] getTags(int index) { ! return tags[index].toNodeArray(); ! } ! public void visitEndTag(EndTag endTag) { ! if (!endTagCheck) return; ! for (int i=0;i<tagsToBeFound.length;i++) ! if (endTag.getTagName().equalsIgnoreCase(tagsToBeFound[i])) { ! endTagCount[i]++; ! endTags[i].add(endTag); ! } ! } ! ! public int getEndTagCount(int index) { ! return endTagCount[index]; ! } ! } Index: TextExtractingVisitor.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/visitors/TextExtractingVisitor.java,v retrieving revision 1.29 retrieving revision 1.30 diff -C2 -d -r1.29 -r1.30 *** TextExtractingVisitor.java 24 Aug 2003 21:59:45 -0000 1.29 --- TextExtractingVisitor.java 3 Sep 2003 23:36:22 -0000 1.30 *************** *** 47,92 **** */ public class TextExtractingVisitor extends NodeVisitor { ! private StringBuffer textAccumulator; ! private boolean preTagBeingProcessed; ! ! public TextExtractingVisitor() { ! textAccumulator = new StringBuffer(); ! preTagBeingProcessed = false; ! } ! public String getExtractedText() { ! return textAccumulator.toString(); ! } ! public void visitStringNode(StringNode stringNode) { ! String text = stringNode.getText(); ! if (!preTagBeingProcessed) { ! text = Translate.decode(text); ! text = replaceNonBreakingSpaceWithOrdinarySpace(text); ! } ! textAccumulator.append(text); ! } ! public void visitTitleTag(TitleTag titleTag) { ! textAccumulator.append(titleTag.getTitle ()); ! } ! private String replaceNonBreakingSpaceWithOrdinarySpace(String text) { ! return text.replace('\u00a0',' '); ! } ! public void visitEndTag(EndTag endTag) { ! if (isPreTag(endTag)) ! preTagBeingProcessed = false; ! } ! public void visitTag(Tag tag) { ! if (isPreTag(tag)) ! preTagBeingProcessed = true; ! } ! private boolean isPreTag(Tag tag) { ! return tag.getTagName().equals("PRE"); ! } } --- 47,92 ---- */ public class TextExtractingVisitor extends NodeVisitor { ! private StringBuffer textAccumulator; ! private boolean preTagBeingProcessed; ! ! public TextExtractingVisitor() { ! textAccumulator = new StringBuffer(); ! preTagBeingProcessed = false; ! } ! public String getExtractedText() { ! return textAccumulator.toString(); ! } ! public void visitStringNode(StringNode stringNode) { ! String text = stringNode.getText(); ! if (!preTagBeingProcessed) { ! text = Translate.decode(text); ! text = replaceNonBreakingSpaceWithOrdinarySpace(text); ! } ! textAccumulator.append(text); ! } ! public void visitTitleTag(TitleTag titleTag) { ! textAccumulator.append(titleTag.getTitle ()); ! } ! private String replaceNonBreakingSpaceWithOrdinarySpace(String text) { ! return text.replace('\u00a0',' '); ! } ! public void visitEndTag(EndTag endTag) { ! if (isPreTag(endTag)) ! preTagBeingProcessed = false; ! } ! public void visitTag(Tag tag) { ! if (isPreTag(tag)) ! preTagBeingProcessed = true; ! } ! private boolean isPreTag(Tag tag) { ! return tag.getTagName().equals("PRE"); ! } } Index: UrlModifyingVisitor.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/visitors/UrlModifyingVisitor.java,v retrieving revision 1.28 retrieving revision 1.29 diff -C2 -d -r1.28 -r1.29 *** UrlModifyingVisitor.java 24 Aug 2003 21:59:45 -0000 1.28 --- UrlModifyingVisitor.java 3 Sep 2003 23:36:22 -0000 1.29 *************** *** 39,83 **** public class UrlModifyingVisitor extends NodeVisitor { ! private String linkPrefix; ! private StringBuffer modifiedResult; ! private Parser parser; ! ! public UrlModifyingVisitor(Parser parser, String linkPrefix) { ! super(true,false); ! this.parser = parser; ! LinkScanner linkScanner = new LinkScanner(); ! parser.addScanner(linkScanner); ! parser.addScanner( ! linkScanner.createImageScanner( ! ImageTag.IMAGE_TAG_FILTER ! ) ! ); ! this.linkPrefix =linkPrefix; ! modifiedResult = new StringBuffer(); ! } ! ! public void visitLinkTag(LinkTag linkTag) { ! linkTag.setLink(linkPrefix + linkTag.getLink()); ! } ! public void visitImageTag(ImageTag imageTag) { ! imageTag.setImageURL(linkPrefix + imageTag.getImageURL()); ! modifiedResult.append(imageTag.toHtml()); ! } ! ! public void visitEndTag(EndTag endTag) { ! modifiedResult.append(endTag.toHtml()); ! } ! public void visitStringNode(StringNode stringNode) { ! modifiedResult.append(stringNode.toHtml()); ! } ! public void visitTag(Tag tag) { ! modifiedResult.append(tag.toHtml()); ! } ! ! public String getModifiedResult() { ! return modifiedResult.toString(); ! } } --- 39,83 ---- public class UrlModifyingVisitor extends NodeVisitor { ! private String linkPrefix; ! private StringBuffer modifiedResult; ! private Parser parser; ! ! public UrlModifyingVisitor(Parser parser, String linkPrefix) { ! super(true,false); ! this.parser = parser; ! LinkScanner linkScanner = new LinkScanner(); ! parser.addScanner(linkScanner); ! parser.addScanner( ! linkScanner.createImageScanner( ! ImageTag.IMAGE_TAG_FILTER ! ) ! ); ! this.linkPrefix =linkPrefix; ! modifiedResult = new StringBuffer(); ! } ! ! public void visitLinkTag(LinkTag linkTag) { ! linkTag.setLink(linkPrefix + linkTag.getLink()); ! } ! public void visitImageTag(ImageTag imageTag) { ! imageTag.setImageURL(linkPrefix + imageTag.getImageURL()); ! modifiedResult.append(imageTag.toHtml()); ! } ! ! public void visitEndTag(EndTag endTag) { ! modifiedResult.append(endTag.toHtml()); ! } ! public void visitStringNode(StringNode stringNode) { ! modifiedResult.append(stringNode.toHtml()); ! } ! public void visitTag(Tag tag) { ! modifiedResult.append(tag.toHtml()); ! } ! ! public String getModifiedResult() { ! return modifiedResult.toString(); ! } } |