Thread: [Htmlparser-cvs] htmlparser/src/org/htmlparser/lexer Lexer.java,1.40,1.41 InputStreamSource.java,1.7
Brought to you by:
derrickoswald
|
From: Derrick O. <der...@us...> - 2005-09-19 02:35:13
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv30508/src/org/htmlparser/lexer Modified Files: Lexer.java InputStreamSource.java Log Message: Apply patch #1247128 Bug Fix: #1227213 Particular SCRIPT tags close too late from Keiron McCammon. Index: InputStreamSource.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/InputStreamSource.java,v retrieving revision 1.7 retrieving revision 1.8 diff -C2 -d -r1.7 -r1.8 *** InputStreamSource.java 15 May 2005 11:49:04 -0000 1.7 --- InputStreamSource.java 19 Sep 2005 02:35:05 -0000 1.8 *************** *** 47,51 **** /** * An initial buffer size. ! * Has a default value of {@value}. */ public static int BUFFER_SIZE = 16384; --- 47,51 ---- /** * An initial buffer size. ! * Has a default value of {16384}. */ public static int BUFFER_SIZE = 16384; Index: Lexer.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Lexer.java,v retrieving revision 1.40 retrieving revision 1.41 diff -C2 -d -r1.40 -r1.41 *** Lexer.java 20 Jun 2005 01:56:32 -0000 1.40 --- Lexer.java 19 Sep 2005 02:35:05 -0000 1.41 *************** *** 1182,1185 **** --- 1182,1186 ---- char ch; int end; + boolean comment; start = mCursor.getPosition (); *************** *** 1187,1190 **** --- 1188,1193 ---- done = false; quote = 0; + comment = false; + while (!done) { *************** *** 1199,1203 **** break; case '\'': ! if (quotesmart) if (0 == quote) quote = '\''; // enter quoted state --- 1202,1206 ---- break; case '\'': ! if (quotesmart && !comment) if (0 == quote) quote = '\''; // enter quoted state *************** *** 1206,1210 **** break; case '"': ! if (quotesmart) if (0 == quote) quote = '"'; // enter quoted state --- 1209,1213 ---- break; case '"': ! if (quotesmart && !comment) if (0 == quote) quote = '"'; // enter quoted state *************** *** 1232,1240 **** done = true; else if ('/' == ch) ! { ! do ! ch = mPage.getCharacter (mCursor); ! while ((Page.EOF != ch) && ('\n' != ch)); ! } else if ('*' == ch) { --- 1235,1239 ---- done = true; else if ('/' == ch) ! comment = true; else if ('*' == ch) { *************** *** 1254,1257 **** --- 1253,1259 ---- } break; + case '\n': + comment = false; + break; case '<': if (quotesmart) *************** *** 1276,1279 **** --- 1278,1298 ---- state = 2; break; + case '!': + ch = mPage.getCharacter (mCursor); + if (Page.EOF == ch) + done = true; + else if ('-' == ch) + { + ch = mPage.getCharacter (mCursor); + if (Page.EOF == ch) + done = true; + else if ('-' == ch) + state = 3; + else + state = 0; + } + else + state = 0; + break; default: state = 0; *************** *** 1282,1285 **** --- 1301,1305 ---- break; case 2: // </ + comment = false; if (Page.EOF == ch) done = true; *************** *** 1295,1298 **** --- 1315,1337 ---- state = 0; break; + case 3: // <! + comment = false; + if (Page.EOF == ch) + done = true; + else if ('-' == ch) + { + ch = mPage.getCharacter (mCursor); + if (Page.EOF == ch) + done = true; + else if ('-' == ch) + { + ch = mPage.getCharacter (mCursor); + if (Page.EOF == ch) + done = true; + else if ('>' == ch) + state = 0; + } + } + break; default: throw new IllegalStateException ("how the fuck did we get in state " + state); |