Update of //cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer
In directory sc8-pr-cvs5.sourceforge.net:/tmp/cvs-serv27338/lexer
Modified Files:
Lexer.java
Log Message:
fix bug #1345049 HTMLParser should not terminate a comment with --->
add static STRICT_REMARKS to Lexer class, which when true follows the specification for remarks
Index: Lexer.java
===================================================================
RCS file: //cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Lexer.java,v
retrieving revision 1.45
retrieving revision 1.46
diff -C2 -d -r1.45 -r1.46
*** Lexer.java 14 Apr 2006 22:18:47 -0000 1.45
--- Lexer.java 27 May 2006 14:02:27 -0000 1.46
***************
*** 93,96 ****
--- 93,106 ----
/**
+ * Process remarks strictly flag.
+ * If <code>true</code>, remarks are not terminated by ---$gt;
+ * or --!$gt;, i.e. more than two dashes. If <code>false</code>,
+ * a more lax (and closer to typical browser handling) remark parsing
+ * is used.
+ * Default <code>{@value}</code>.
+ */
+ public static boolean STRICT_REMARKS = true;
+
+ /**
* The page lexemes are retrieved from.
*/
***************
*** 1201,1208 ****
* This method uses a state machine with the following states:
* <ol>
! * <li>state 0 - prior to the first open delimiter</li>
! * <li>state 1 - prior to the second open delimiter</li>
! * <li>state 2 - prior to the first closing delimiter</li>
! * <li>state 3 - prior to the second closing delimiter</li>
* <li>state 4 - prior to the terminating ></li>
* </ol>
--- 1211,1218 ----
* This method uses a state machine with the following states:
* <ol>
! * <li>state 0 - prior to the first open delimiter (first dash)</li>
! * <li>state 1 - prior to the second open delimiter (second dash)</li>
! * <li>state 2 - prior to the first closing delimiter (first dash)</li>
! * <li>state 3 - prior to the second closing delimiter (second dash)</li>
* <li>state 4 - prior to the terminating ></li>
* </ol>
***************
*** 1275,1284 ****
if ('>' == ch)
done = true;
! else if (('!' == ch) || ('-' == ch) || Character.isWhitespace (ch))
{
// stay in state 4
}
else
! state = 2;
break;
default:
--- 1285,1301 ----
if ('>' == ch)
done = true;
! else if (Character.isWhitespace (ch))
{
// stay in state 4
}
else
! if (!STRICT_REMARKS && (('-' == ch) || ('!' == ch)))
! {
! // stay in state 4
! }
! else
! // bug #1345049 HTMLParser should not terminate a comment with --->
! // should maybe issue a warning mentioning STRICT_REMARKS
! state = 2;
break;
default:
|