Thread: [Htmlparser-cvs] htmlparser/src/org/htmlparser/lexer Lexer.java,1.24,1.25
Brought to you by:
derrickoswald
From: <der...@pr...> - 2004-01-27 16:20:24
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv3286/lexer Modified Files: Lexer.java Log Message: Fix bug #880283 Character ">" erroneously inserted by Lexer. Some jsp tags are now handled in a separate jsp parse in the lexer. Jsp tags embedded as attributes are still not handled. Refer to bug #772700 Jsp Tags are not parsed correctly when in quoted attributes, which is now reversed (i.e. in quotes are OK, outside of quotes causes problems), but this points out a deficiency in the data structure holding tag contents (attribute lists) that doesn't provide for tags within attributes. Index: Lexer.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Lexer.java,v retrieving revision 1.24 retrieving revision 1.25 diff -C2 -d -r1.24 -r1.25 *** Lexer.java 2 Jan 2004 16:24:53 -0000 1.24 --- Lexer.java 24 Jan 2004 17:13:43 -0000 1.25 *************** *** 267,270 **** --- 267,275 ---- if (0 == ch) ret = makeString (probe); + else if ('%' == ch) + { + probe.retreat (); + ret = parseJsp (probe); + } else if ('/' == ch || '%' == ch || Character.isLetter (ch)) { *************** *** 974,977 **** --- 979,1128 ---- } + /** + * Parse a java server page node. + * Scan characters until "%>" is encountered, or the input stream is + * exhausted, in which case <code>null</code> is returned. + * @param cursor The position at which to start scanning. + */ + protected Node parseJsp (Cursor cursor) + throws + ParserException + { + boolean done; + char ch; + int state; + Vector attributes; + int code; + Node ret; + + done = false; + state = 0; + code = 0; + attributes = new Vector (); + // <%xyz%> + // 012223d + // <%=xyz%> + // 0122223d + // <%@xyz%d + // 0122223d + while (!done) + { + ch = mPage.getCharacter (cursor); + switch (state) + { + case 0: // prior to the percent + switch (ch) + { + case '%': // <% + state = 1; + break; + // case 0: // <\0 + // case '>': // <> + default: + done = true; + break; + } + break; + case 1: // prior to the optional qualifier + switch (ch) + { + case 0: // <%\0 + case '>': // <%> + done = true; + break; + case '=': // <%= + case '@': // <%@ + code = cursor.getPosition (); + attributes.addElement (new PageAttribute (mPage, mCursor.getPosition () + 1, code, -1, -1, (char)0)); + state = 2; + break; + default: // <%x + code = cursor.getPosition () - 1; + attributes.addElement (new PageAttribute (mPage, mCursor.getPosition () + 1, code, -1, -1, (char)0)); + state = 2; + break; + } + break; + case 2: // prior to the closing percent + switch (ch) + { + case 0: // <%x\0 + case '>': // <%x> + done = true; + break; + case '\'': + case '"':// <%???" + state = ch; + break; + case '%': // <%???% + state = 3; + break; + default: // <%???x + break; + } + break; + case 3: + switch (ch) + { + case 0: // <%x??%\0 + done = true; + break; + case '>': + state = 4; + done = true; + break; + default: // <%???%x + state = 2; + break; + } + break; + case '"': + switch (ch) + { + case 0: // <%x??"\0 + done = true; + break; + case '"': + state = 2; + break; + default: // <%???'??x + break; + } + break; + case '\'': + switch (ch) + { + case 0: // <%x??'\0 + done = true; + break; + case '\'': + state = 2; + break; + default: // <%???"??x + break; + } + break; + default: + throw new IllegalStateException ("how the fuck did we get in state " + state); + } + } + + if (4 == state) // normal exit + { + if (0 != code) + { + state = cursor.getPosition () - 2; // reuse state + attributes.addElement (new PageAttribute (mPage, code, state, -1, -1, (char)0)); + attributes.addElement (new PageAttribute (mPage, state, state + 1, -1, -1, (char)0)); + } + else + throw new IllegalStateException ("jsp with no code!"); + } + else + return (parseString (cursor, true)); // hmmm, true? + + return (makeTag (cursor, attributes)); + } + // // NodeFactory interface |