[Htmlparser-cvs] htmlparser/src/org/htmlparser/beans StringBean.java,1.44,1.45
Brought to you by:
derrickoswald
From: Derrick O. <der...@us...> - 2006-05-30 01:07:18
|
Update of //cvsroot/htmlparser/htmlparser/src/org/htmlparser/beans In directory sc8-pr-cvs5.sourceforge.net:/tmp/cvs-serv32405/beans Modified Files: StringBean.java Log Message: fix bug#1496863 StringBean collapse() adds extra whitespace Keep collapsing state machine state as member variable. Index: StringBean.java =================================================================== RCS file: //cvsroot/htmlparser/htmlparser/src/org/htmlparser/beans/StringBean.java,v retrieving revision 1.44 retrieving revision 1.45 diff -C2 -d -r1.44 -r1.45 *** StringBean.java 15 May 2005 11:49:03 -0000 1.44 --- StringBean.java 30 May 2006 01:07:14 -0000 1.45 *************** *** 149,152 **** --- 149,157 ---- /** + * The state of the collapse processiung state machine. + */ + protected int mCollapseState; + + /** * The buffer text is stored in while traversing the HTML. */ *************** *** 189,192 **** --- 194,198 ---- mReplaceSpace = true; mCollapse = true; + mCollapseState = 0; mBuffer = new StringBuffer (4096); mIsScript = false; *************** *** 213,216 **** --- 219,223 ---- length - NEWLINE_SIZE, length).equals (NEWLINE)))) mBuffer.append (NEWLINE); + mCollapseState = 0; } *************** *** 238,243 **** { int chars; - int length; - int state; char character; --- 245,248 ---- *************** *** 245,255 **** if (0 != chars) { - length = buffer.length (); - state = ((0 == length) - || (buffer.charAt (length - 1) == ' ') - || ((NEWLINE_SIZE <= length) - && buffer.substring ( - length - NEWLINE_SIZE, length).equals (NEWLINE))) - ? 0 : 1; for (int i = 0; i < chars; i++) { --- 250,253 ---- *************** *** 265,275 **** case '\r': case '\n': ! if (0 != state) ! state = 1; break; default: ! if (1 == state) buffer.append (' '); ! state = 2; buffer.append (character); } --- 263,273 ---- case '\r': case '\n': ! if (0 != mCollapseState) ! mCollapseState = 1; break; default: ! if (1 == mCollapseState) buffer.append (' '); ! mCollapseState = 2; buffer.append (character); } *************** *** 289,292 **** --- 287,291 ---- String ret; + mCollapseState = 0; mParser.visitAllNodesWith (this); ret = mBuffer.toString (); *************** *** 319,322 **** --- 318,322 ---- protected void setStrings () { + mCollapseState = 0; if (null != getURL ()) try *************** *** 341,344 **** --- 341,345 ---- mParser.reset (); mBuffer = new StringBuffer (4096); + mCollapseState = 0; mParser.visitAllNodesWith (this); updateStrings (mBuffer.toString ()); *************** *** 558,561 **** --- 559,565 ---- * If the setting is changed after the URL has been set, the text from the * URL will be reacquired, which is possibly expensive. + * The internal state of the collapse state machine can be reset with + * code like this: + * <code>setCollapse (getCollapse ());</code> * @param collapse If <code>true</code>, sequences of whitespace * will be reduced to a single space. *************** *** 563,566 **** --- 567,571 ---- public void setCollapse (boolean collapse) { + mCollapseState = 0; boolean oldValue = mCollapse; if (oldValue != collapse) |