[Htmlparser-cvs] htmlparser/src/org/htmlparser AbstractNode.java,1.14,1.15 Parser.java,1.61,1.62 Rem
Brought to you by:
derrickoswald
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser In directory sc8-pr-cvs1:/tmp/cvs-serv30684 Modified Files: AbstractNode.java Parser.java RemarkNode.java StringNode.java Removed Files: NodeReader.java RemarkNodeParser.java Log Message: Lexer Integration Removed old Parser classes. Removed EndTag, this class was replaced by a call to the new isEndTag() method on the Tag class The StringNode, RemarkNode and tags.Tag class now derive from their lexeme counterparts in lexer.nodes instead of the other way around. The beginnings of a node factory interface are included. This was added so the lexer could return 'visitable' nodes to the parser. The parser acts as it's own node factory, as does the Lexer. The node count for parsing goes up in most cases because every whitespace (i.e. newline) now counts as a StringNode. This has whacked out a lot of the tests that were expecting fewer nodes or a certain type of node at a particular index. Attributes now maintain their order and case. The count of attributes also went up because whitespace is maintained within tags too. The storage in a Vector means the element 0 Attribute is actually the name of the tag, rather than having the $TAGNAME entry in a HashTable. Index: AbstractNode.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/AbstractNode.java,v retrieving revision 1.14 retrieving revision 1.15 diff -C2 -d -r1.14 -r1.15 *** AbstractNode.java 22 Sep 2003 02:39:58 -0000 1.14 --- AbstractNode.java 28 Sep 2003 15:33:57 -0000 1.15 *************** *** 156,163 **** * deep the links are embedded. */ ! public void collectInto(NodeList collectionList, Class nodeType) { ! if (nodeType.getName().equals(this.getClass().getName())) { collectionList.add(this); - } } --- 156,163 ---- * deep the links are embedded. */ ! public void collectInto(NodeList collectionList, Class nodeType) ! { ! if (nodeType.getName().equals(this.getClass().getName())) collectionList.add(this); } Index: Parser.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/Parser.java,v retrieving revision 1.61 retrieving revision 1.62 diff -C2 -d -r1.61 -r1.62 *** Parser.java 22 Sep 2003 02:39:58 -0000 1.61 --- Parser.java 28 Sep 2003 15:33:57 -0000 1.62 *************** *** 44,50 **** import java.util.Hashtable; import java.util.Map; import org.htmlparser.parserHelper.ParserHelper; - import org.htmlparser.parserHelper.TagParser; import org.htmlparser.scanners.AppletScanner; import org.htmlparser.scanners.BodyScanner; --- 44,56 ---- import java.util.Hashtable; import java.util.Map; [...999 lines suppressed...] + name = ret.getTagName (); + scanner = (TagScanner)scanners.get (name); + save = getPreviousOpenScanner (); + if ((null != scanner) && scanner.evaluate (ret.getText (), save)) + { + setPreviousOpenScanner (scanner); + try + { + ret = scanner.createScannedNode (ret, lexer.getPage ().getUrl (), lexer); + } + finally + { + setPreviousOpenScanner (save); + } + } + } + + return (ret); } } Index: RemarkNode.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/RemarkNode.java,v retrieving revision 1.31 retrieving revision 1.32 diff -C2 -d -r1.31 -r1.32 *** RemarkNode.java 22 Sep 2003 02:39:58 -0000 1.31 --- RemarkNode.java 28 Sep 2003 15:33:57 -0000 1.32 *************** *** 30,33 **** --- 30,34 ---- package org.htmlparser; + import org.htmlparser.lexer.Page; import org.htmlparser.util.NodeList; import org.htmlparser.visitors.NodeVisitor; *************** *** 36,73 **** * The remark tag is identified and represented by this class. */ ! public class RemarkNode extends AbstractNode { public final static String REMARK_NODE_FILTER="-r"; ! /** ! * Tag contents will have the contents of the comment tag. ! */ ! String tagContents; ! /** ! * The HTMLRemarkTag is constructed by providing the beginning posn, ending posn ! * and the tag contents. ! * @param nodeBegin beginning position of the tag ! * @param nodeEnd ending position of the tag ! * @param tagContents contents of the remark tag ! */ ! public RemarkNode(int nodeBegin, int nodeEnd, String tagContents) ! { ! super(nodeBegin,nodeEnd); ! this.tagContents = tagContents; ! } /** ! * Returns the text contents of the comment tag. */ ! public String getText() { ! return tagContents; ! } ! public String toPlainTextString() { ! return tagContents; ! } ! public String toHtml() { ! return "<!--"+tagContents+"-->"; } /** * Print the contents of the remark tag. --- 37,75 ---- * The remark tag is identified and represented by this class. */ ! public class RemarkNode ! extends ! org.htmlparser.lexer.nodes.RemarkNode { public final static String REMARK_NODE_FILTER="-r"; ! // /** ! // * Tag contents will have the contents of the comment tag. ! // */ ! // String tagContents; ! // ! // /** ! // * The HTMLRemarkTag is constructed by providing the beginning posn, ending posn ! // * and the tag contents. ! // * @param nodeBegin beginning position of the tag ! // * @param nodeEnd ending position of the tag ! // * @param tagContents contents of the remark tag ! // */ ! // public RemarkNode(int nodeBegin, int nodeEnd, String tagContents) ! // { ! // super(nodeBegin,nodeEnd); ! // this.tagContents = tagContents; ! // } /** ! * Constructor takes in the text string, beginning and ending posns. ! * @param page The page this string is on. ! * @param start The beginning position of the string. ! * @param end The ending positiong of the string. */ ! public RemarkNode (Page page, int start, int end) { ! super (page, start, end); } + /** * Print the contents of the remark tag. *************** *** 75,79 **** public String toString() { ! return "Comment Tag : "+tagContents+"; begins at : "+elementBegin()+"; ends at : "+elementEnd()+"\n"; } --- 77,81 ---- public String toString() { ! return "Comment Tag : "+getText()+"; begins at : "+elementBegin()+"; ends at : "+elementEnd()+"\n"; } *************** *** 82,87 **** } ! public void accept(Object visitor) { ! ((NodeVisitor)visitor).visitRemarkNode(this); } --- 84,90 ---- } ! public void accept(Object visitor) ! { ! ((NodeVisitor)visitor).visitRemarkNode (this); } Index: StringNode.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/StringNode.java,v retrieving revision 1.39 retrieving revision 1.40 diff -C2 -d -r1.39 -r1.40 *** StringNode.java 22 Sep 2003 02:39:58 -0000 1.39 --- StringNode.java 28 Sep 2003 15:33:57 -0000 1.40 *************** *** 30,33 **** --- 30,34 ---- package org.htmlparser; + import org.htmlparser.lexer.Page; import org.htmlparser.util.NodeList; import org.htmlparser.visitors.NodeVisitor; *************** *** 36,48 **** * Normal text in the html document is identified and represented by this class. */ ! public class StringNode extends AbstractNode { public static final String STRING_FILTER="-string"; ! /** ! * The text of the string. ! */ ! protected StringBuffer textBuffer; ! /** * Constructor takes in the text string, beginning and ending posns. --- 37,51 ---- * Normal text in the html document is identified and represented by this class. */ ! public class StringNode ! extends ! org.htmlparser.lexer.nodes.StringNode { public static final String STRING_FILTER="-string"; ! // /** ! // * The text of the string. ! // */ ! // protected StringBuffer textBuffer; ! // /** * Constructor takes in the text string, beginning and ending posns. *************** *** 53,81 **** public StringNode (StringBuffer text, int textBegin,int textEnd) { ! super(textBegin,textEnd); ! this.textBuffer = text; } /** ! * Returns the text of the string line ! */ ! public String getText() { ! return textBuffer.toString(); ! } ! /** ! * Sets the string contents of the node. ! * @param text The new text for the node. */ ! public void setText(String text) { ! textBuffer = new StringBuffer (text); ! } ! ! public String toPlainTextString() { ! return textBuffer.toString(); ! } ! ! public String toHtml() { ! return textBuffer.toString(); } --- 56,71 ---- public StringNode (StringBuffer text, int textBegin,int textEnd) { ! super(new Page (text.toString ()), textBegin,textEnd); } /** ! * Constructor takes in the text string, beginning and ending posns. ! * @param page The page this string is on. ! * @param start The beginning position of the string. ! * @param end The ending positiong of the string. */ ! public StringNode (Page page, int start, int end) { ! super (page, start, end); } *************** *** 88,93 **** } ! public void accept(Object visitor) { ! ((NodeVisitor)visitor).visitStringNode(this); } } --- 78,84 ---- } ! public void accept(Object visitor) ! { ! ((NodeVisitor)visitor).visitStringNode (this); } } --- NodeReader.java DELETED --- --- RemarkNodeParser.java DELETED --- |