[Htmlparser-cvs] htmlparser/src/org/htmlparser/lexer/nodes Attribute.java,1.14,1.15 RemarkNode.java,
Brought to you by:
derrickoswald
From: <der...@us...> - 2003-12-07 23:42:13
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes In directory sc8-pr-cvs1:/tmp/cvs-serv16537/lexer/nodes Modified Files: Attribute.java RemarkNode.java StringNode.java TagNode.java Log Message: Remove most of the scanners. The only scanners left are ones that really do something different (script and jsp). Instead of registering a scanner to enable returning a specific tag you now add a tag to the a PrototypicalNodeFactory. All known tags are 'registered' by default in a new Parser which is similar to having called the old 'registerDOMScanners()', so tags are fully nested. This is different behaviour, and specifically, you will need to recurse into returned nodes to get at what you want. I've tried to adjust the applications accordingly, but worked examples are still scarce. If you want to return only some of the derived tags while keeping most as generic tags, there are various constructors and manipulators on the factory. See the javadocs and examples in the tests package. Nearly all the old scanner tests are folded into the tag tests. toString() has been revamped. This means that the default Parser mainline now returns an indented listing of tags, making it easy to see the structure of a page. The downside is the text of the page had to have newlines, tabs etc. turned into escape sequences. But if you were really interested in content you would be using toHtml() or toPlainTextString(). Index: Attribute.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes/Attribute.java,v retrieving revision 1.14 retrieving revision 1.15 diff -C2 -d -r1.14 -r1.15 *** Attribute.java 9 Nov 2003 17:07:09 -0000 1.14 --- Attribute.java 7 Dec 2003 23:41:40 -0000 1.15 *************** *** 33,36 **** --- 33,37 ---- package org.htmlparser.lexer.nodes; + import java.io.Serializable; import org.htmlparser.lexer.Page; import org.htmlparser.util.Translate; *************** *** 198,201 **** --- 199,204 ---- */ public class Attribute + implements + Serializable { /** Index: RemarkNode.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes/RemarkNode.java,v retrieving revision 1.13 retrieving revision 1.14 diff -C2 -d -r1.13 -r1.14 *** RemarkNode.java 9 Nov 2003 17:07:09 -0000 1.13 --- RemarkNode.java 7 Dec 2003 23:41:40 -0000 1.14 *************** *** 34,37 **** --- 34,38 ---- import org.htmlparser.lexer.Page; import org.htmlparser.util.NodeList; + import org.htmlparser.util.ParserException; /** *************** *** 81,84 **** --- 82,86 ---- return (mPage.getText (getStartPosition (), getEndPosition ())); } + /** * Print the contents of the remark tag. *************** *** 86,95 **** public String toString() { Cursor start; Cursor end; ! start = new Cursor (getPage (), getStartPosition ()); ! end = new Cursor (getPage (), getEndPosition ()); ! return ("Rem (" + start.toString () + "," + end.toString () + "): " + getText ()); } --- 88,140 ---- public String toString() { + int startpos; + int endpos; Cursor start; Cursor end; + char c; + StringBuffer ret; ! startpos = getStartPosition (); ! endpos = getEndPosition (); ! ret = new StringBuffer (endpos - startpos + 20); ! start = new Cursor (getPage (), startpos); ! end = new Cursor (getPage (), endpos); ! ret.append ("Rem ("); ! ret.append (start); ! ret.append (","); ! ret.append (end); ! ret.append ("): "); ! while (start.getPosition () < endpos) ! { ! try ! { ! c = mPage.getCharacter (start); ! switch (c) ! { ! case '\t': ! ret.append ("\\t"); ! break; ! case '\n': ! ret.append ("\\n"); ! break; ! case '\r': ! ret.append ("\\r"); ! break; ! default: ! ret.append (c); ! } ! } ! catch (ParserException pe) ! { ! // not really expected, but we'return only doing toString, so ignore ! } ! if (77 <= ret.length ()) ! { ! ret.append ("..."); ! break; ! } ! } ! ! return (ret.toString ()); } Index: StringNode.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes/StringNode.java,v retrieving revision 1.14 retrieving revision 1.15 diff -C2 -d -r1.14 -r1.15 *** StringNode.java 9 Nov 2003 17:07:09 -0000 1.14 --- StringNode.java 7 Dec 2003 23:41:40 -0000 1.15 *************** *** 55,59 **** /** ! * Returns the text of the string line */ public String getText () --- 55,59 ---- /** ! * Returns the text of the string line. */ public String getText () *************** *** 93,104 **** } public String toString () { Cursor start; Cursor end; ! start = new Cursor (getPage (), getStartPosition ()); ! end = new Cursor (getPage (), getEndPosition ()); ! return ("Txt (" + start.toString () + "," + end.toString () + "): " + getText ()); } --- 93,154 ---- } + /** + * Express this string node as a printable string + * This is suitable for display in a debugger or output to a printout. + * Control characters are replaced by their equivalent escape + * sequence and contents is truncated to 80 characters. + * @return A string representation of the string node. + */ public String toString () { + int startpos; + int endpos; Cursor start; Cursor end; + char c; + StringBuffer ret; ! startpos = getStartPosition (); ! endpos = getEndPosition (); ! ret = new StringBuffer (endpos - startpos + 20); ! start = new Cursor (getPage (), startpos); ! end = new Cursor (getPage (), endpos); ! ret.append ("Txt ("); ! ret.append (start); ! ret.append (","); ! ret.append (end); ! ret.append ("): "); ! while (start.getPosition () < endpos) ! { ! try ! { ! c = mPage.getCharacter (start); ! switch (c) ! { ! case '\t': ! ret.append ("\\t"); ! break; ! case '\n': ! ret.append ("\\n"); ! break; ! case '\r': ! ret.append ("\\r"); ! break; ! default: ! ret.append (c); ! } ! } ! catch (ParserException pe) ! { ! // not really expected, but we'return only doing toString, so ignore ! } ! if (77 <= ret.length ()) ! { ! ret.append ("..."); ! break; ! } ! } ! ! return (ret.toString ()); } Index: TagNode.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes/TagNode.java,v retrieving revision 1.24 retrieving revision 1.25 diff -C2 -d -r1.24 -r1.25 *** TagNode.java 9 Nov 2003 17:07:09 -0000 1.24 --- TagNode.java 7 Dec 2003 23:41:40 -0000 1.25 *************** *** 637,644 **** --- 637,648 ---- public String toString () { + String text; String type; Cursor start; Cursor end; + StringBuffer ret; + text = getText (); + ret = new StringBuffer (20 + text.length ()); if (isEndTag ()) type = "End"; *************** *** 647,651 **** start = new Cursor (getPage (), getStartPosition ()); end = new Cursor (getPage (), getEndPosition ()); ! return (type + " (" + start.toString () + "," + end.toString () + "): " + getText ()); } --- 651,670 ---- start = new Cursor (getPage (), getStartPosition ()); end = new Cursor (getPage (), getEndPosition ()); ! ret.append (type); ! ret.append (" ("); ! ret.append (start); ! ret.append (","); ! ret.append (end); ! ret.append ("): "); ! if (80 < ret.length () + text.length ()) ! { ! text = text.substring (0, 77 - ret.length ()); ! ret.append (text); ! ret.append ("..."); ! } ! else ! ret.append (text); ! ! return (ret.toString ()); } |