[Htmlparser-cvs] htmlparser/src/org/htmlparser/lexer Lexer.java,1.27,1.27.2.1
Brought to you by:
derrickoswald
From: Derrick O. <der...@us...> - 2004-05-22 20:10:41
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv29429/src/org/htmlparser/lexer Modified Files: Tag: v1_41 Lexer.java Log Message: Fix bug# 919738 Text has not been extracted correctly using StringBean and (duplicate) bug #936392 ScriptTag visitor fails for comments with ' by handling single and multiline ecmascript comments in the Lexer class when called with quotesmart true. Index: Lexer.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Lexer.java,v retrieving revision 1.27 retrieving revision 1.27.2.1 diff -C2 -d -r1.27 -r1.27.2.1 *** Lexer.java 18 Feb 2004 12:34:04 -0000 1.27 --- Lexer.java 22 May 2004 20:10:31 -0000 1.27.2.1 *************** *** 303,306 **** --- 303,307 ---- break; default: + probe.retreat (); // string needs to see leading foreslash ret = parseString (probe, quotesmart); break; *************** *** 412,415 **** --- 413,445 ---- else if (quotesmart && (ch == quote)) quote = 0; // exit quoted state + else if (quotesmart && (0 == quote) && (ch == '/')) + { + // handle multiline and double slash comments (with a quote) in script like: + // I can't handle single quotations. + ch = mPage.getCharacter (cursor); + if (0 == ch) + done = true; + else if ('/' == ch) + { + do + ch = mPage.getCharacter (cursor); + while ((ch != 0) && (ch != '\n')); + } + else if ('*' == ch) + { + do + { + do + ch = mPage.getCharacter (cursor); + while ((ch != 0) && (ch != '*')); + ch = mPage.getCharacter (cursor); + if (ch == '*') + cursor.retreat (); + } + while ((ch != 0) && (ch != '/')); + } + else + cursor.retreat (); + } else if ((0 == quote) && ('<' == ch)) { |