[Htmlparser-cvs] htmlparser/src/org/htmlparser/lexer/nodes RemarkNode.java,1.16,1.17 StringNode.java
Brought to you by:
derrickoswald
From: <der...@us...> - 2004-02-29 13:10:25
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv25994/lexer/nodes Modified Files: RemarkNode.java StringNode.java Log Message: Fix bug #900128 RemarkNode.setText() does not set Text Add override setText() to StringNode and RemarkNode. Add unit tests to excercise the new code. Remove remaining XX_FILTER constants. Index: RemarkNode.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes/RemarkNode.java,v retrieving revision 1.16 retrieving revision 1.17 diff -C2 -d -r1.16 -r1.17 *** RemarkNode.java 2 Jan 2004 16:24:53 -0000 1.16 --- RemarkNode.java 29 Feb 2004 12:52:20 -0000 1.17 *************** *** 38,56 **** public class RemarkNode extends AbstractNode { ! public final static String REMARK_NODE_FILTER="-r"; /** ! * Constructor takes in the text string, beginning and ending posns. ! * @param page The page this string is on. ! * @param start The beginning position of the string. ! * @param end The ending positiong of the string. */ public RemarkNode (Page page, int start, int end) { super (page, start, end); } /** * Returns the text contents of the comment tag. */ public String getText() --- 38,72 ---- public class RemarkNode extends AbstractNode { ! /** ! * The contents of the remark node, or override text. ! */ ! protected String mText; /** ! * Constructor takes in the text string. ! * @param text The string node text. For correct generation of HTML, this ! * should not contain representations of tags (unless they are balanced). ! */ ! public RemarkNode (String text) ! { ! super (null, 0, 0); ! setText (text); ! } ! ! /** ! * Constructor takes in the page and beginning and ending posns. ! * @param page The page this remark is on. ! * @param start The beginning position of the remark. ! * @param end The ending positiong of the remark. */ public RemarkNode (Page page, int start, int end) { super (page, start, end); + mText = null; } /** * Returns the text contents of the comment tag. + * @return The contents of the text inside the comment delimiters. */ public String getText() *************** *** 60,73 **** String ret; ! start = getStartPosition () + 4; ! end = getEndPosition () - 3; ! if (start >= end) ! ret = ""; else ! ret = mPage.getText (start, end); return (ret); } public String toPlainTextString() { --- 76,108 ---- String ret; ! if (null == mText) ! { ! start = getStartPosition () + 4; // <!-- ! end = getEndPosition () - 3; // --> ! if (start >= end) ! ret = ""; ! else ! ret = mPage.getText (start, end); ! } else ! ret = mText; return (ret); } + /** + * Sets the string contents of the node. + * If the text has the remark delimiters (<!-- -->), these are stripped off. + * @param text The new text for the node. + */ + public void setText (String text) + { + mText = text; + if (text.startsWith ("<!--") && text.endsWith ("-->")) + mText = text.substring (4, text.length () - 3); + nodeBegin = 0; + nodeEnd = mText.length (); + } + public String toPlainTextString() { *************** *** 77,85 **** public String toHtml() { ! return (mPage.getText (getStartPosition (), getEndPosition ())); } /** * Print the contents of the remark tag. */ public String toString() --- 112,138 ---- public String toHtml() { ! StringBuffer buffer; ! String ret; ! ! if (null == mText) ! ret = mPage.getText (getStartPosition (), getEndPosition ()); ! else ! { ! buffer = new StringBuffer (mText.length () + 7); ! buffer.append ("<!--"); ! buffer.append (mText); ! buffer.append ("-->"); ! ret = buffer.toString (); ! } ! ! return (ret); } /** * Print the contents of the remark tag. + * This is suitable for display in a debugger or output to a printout. + * Control characters are replaced by their equivalent escape + * sequence and contents is truncated to 80 characters. + * @return A string representation of the remark node. */ public String toString() *************** *** 95,110 **** endpos = getEndPosition (); ret = new StringBuffer (endpos - startpos + 20); ! start = new Cursor (getPage (), startpos); ! end = new Cursor (getPage (), endpos); ! ret.append ("Rem ("); ! ret.append (start); ! ret.append (","); ! ret.append (end); ! ret.append ("): "); ! while (start.getPosition () < endpos) { ! try { ! c = mPage.getCharacter (start); switch (c) { --- 148,203 ---- endpos = getEndPosition (); ret = new StringBuffer (endpos - startpos + 20); ! if (null == mText) { ! start = new Cursor (getPage (), startpos); ! end = new Cursor (getPage (), endpos); ! ret.append ("Rem ("); ! ret.append (start); ! ret.append (","); ! ret.append (end); ! ret.append ("): "); ! start.setPosition (startpos + 4); // <!-- ! endpos -= 3; // --> ! while (start.getPosition () < endpos) { ! try ! { ! c = mPage.getCharacter (start); ! switch (c) ! { ! case '\t': ! ret.append ("\\t"); ! break; ! case '\n': ! ret.append ("\\n"); ! break; ! case '\r': ! ret.append ("\\r"); ! break; ! default: ! ret.append (c); ! } ! } ! catch (ParserException pe) ! { ! // not really expected, but we're only doing toString, so ignore ! } ! if (77 <= ret.length ()) ! { ! ret.append ("..."); ! break; ! } ! } ! } ! else ! { ! ret.append ("Rem ("); ! ret.append (startpos); ! ret.append (","); ! ret.append (endpos); ! ret.append ("): "); ! while (startpos < endpos) ! { ! c = mText.charAt (startpos); switch (c) { *************** *** 121,133 **** ret.append (c); } ! } ! catch (ParserException pe) ! { ! // not really expected, but we'return only doing toString, so ignore ! } ! if (77 <= ret.length ()) ! { ! ret.append ("..."); ! break; } } --- 214,223 ---- ret.append (c); } ! if (77 <= ret.length ()) ! { ! ret.append ("..."); ! break; ! } ! startpos++; } } Index: StringNode.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes/StringNode.java,v retrieving revision 1.17 retrieving revision 1.18 diff -C2 -d -r1.17 -r1.18 *** StringNode.java 2 Jan 2004 16:24:53 -0000 1.17 --- StringNode.java 29 Feb 2004 12:52:20 -0000 1.18 *************** *** 38,45 **** public class StringNode extends AbstractNode { ! public static final String STRING_FILTER = "-string"; /** ! * Constructor takes in the text string, beginning and ending posns. * @param page The page this string is on. * @param start The beginning position of the string. --- 38,59 ---- public class StringNode extends AbstractNode { ! /** ! * The contents of the string node, or override text. ! */ ! protected String mText; /** ! * Constructor takes in the text string. ! * @param text The string node text. For correct generation of HTML, this ! * should not contain representations of tags (unless they are balanced). ! */ ! public StringNode (String text) ! { ! super (null, 0, 0); ! setText (text); ! } ! ! /** ! * Constructor takes in the page and beginning and ending posns. * @param page The page this string is on. * @param start The beginning position of the string. *************** *** 49,52 **** --- 63,67 ---- { super (page, start, end); + mText = null; } *************** *** 65,81 **** public void setText (String text) { ! mPage = new Page (text); nodeBegin = 0; ! nodeEnd = text.length (); ! // TODO: this really needs work ! try ! { ! Cursor cursor = new Cursor (mPage, nodeBegin); ! for (int i = nodeBegin; i < nodeEnd; i++) ! mPage.getCharacter (cursor); ! } ! catch (ParserException pe) ! { ! } } --- 80,86 ---- public void setText (String text) { ! mText = text; nodeBegin = 0; ! nodeEnd = mText.length (); } *************** *** 87,91 **** public String toHtml () { ! return (mPage.getText (getStartPosition (), getEndPosition ())); } --- 92,102 ---- public String toHtml () { ! String ret; ! ! ret = mText; ! if (null == ret) ! ret = mPage.getText (getStartPosition (), getEndPosition ()); ! ! return (ret); } *************** *** 109,124 **** endpos = getEndPosition (); ret = new StringBuffer (endpos - startpos + 20); ! start = new Cursor (getPage (), startpos); ! end = new Cursor (getPage (), endpos); ! ret.append ("Txt ("); ! ret.append (start); ! ret.append (","); ! ret.append (end); ! ret.append ("): "); ! while (start.getPosition () < endpos) { ! try { ! c = mPage.getCharacter (start); switch (c) { --- 120,173 ---- endpos = getEndPosition (); ret = new StringBuffer (endpos - startpos + 20); ! if (null == mText) { ! start = new Cursor (getPage (), startpos); ! end = new Cursor (getPage (), endpos); ! ret.append ("Txt ("); ! ret.append (start); ! ret.append (","); ! ret.append (end); ! ret.append ("): "); ! while (start.getPosition () < endpos) { ! try ! { ! c = mPage.getCharacter (start); ! switch (c) ! { ! case '\t': ! ret.append ("\\t"); ! break; ! case '\n': ! ret.append ("\\n"); ! break; ! case '\r': ! ret.append ("\\r"); ! break; ! default: ! ret.append (c); ! } ! } ! catch (ParserException pe) ! { ! // not really expected, but we're only doing toString, so ignore ! } ! if (77 <= ret.length ()) ! { ! ret.append ("..."); ! break; ! } ! } ! } ! else ! { ! ret.append ("Txt ("); ! ret.append (startpos); ! ret.append (","); ! ret.append (endpos); ! ret.append ("): "); ! while (startpos < endpos) ! { ! c = mText.charAt (startpos); switch (c) { *************** *** 135,147 **** ret.append (c); } ! } ! catch (ParserException pe) ! { ! // not really expected, but we'return only doing toString, so ignore ! } ! if (77 <= ret.length ()) ! { ! ret.append ("..."); ! break; } } --- 184,193 ---- ret.append (c); } ! if (77 <= ret.length ()) ! { ! ret.append ("..."); ! break; ! } ! startpos++; } } |