Update of /cvsroot/squirrel-sql/mavenize/thirdparty-non-maven/ostermiller-syntax/src/main/java/com/Ostermiller/Syntax/Lexer
In directory sfp-cvsdas-3.v30.ch3.sourceforge.com:/tmp/cvs-serv32334/thirdparty-non-maven/ostermiller-syntax/src/main/java/com/Ostermiller/Syntax/Lexer
Added Files:
JavaToken.java SQLLexer.java PlainLexer.java SQLToken.java
LatexLexer.java HTMLLexer1.java JavaScriptLexer.java
JavaScriptToken.java CToken.java package.html Lexer.java
HTMLLexer.java PropertiesToken.java CLexer.java
HTMLToken1.java Token.java PlainToken.java LatexToken.java
JavaLexer.java PropertiesLexer.java HTMLToken.java
Log Message:
Source for thirdparty dependency. Maven central requires a valid source code repository for artifacts that it hosts. This project has none, so we host it here for the time being.
--- NEW FILE: PropertiesToken.java ---
/*
* This file is part of a syntax highlighting package
* Copyright (C) 2002 Stephen Ostermiller
* http://ostermiller.org/contact.pl?regarding=Syntax+Highlighting
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* See COPYING.TXT for details.
*/
package com.Ostermiller.Syntax.Lexer;
/**
* A PropertiesToken is a token that is returned by a lexer that is lexing a Java
* Properties file. It has several attributes describing the token:
* The type of token, the text of the token, the line number on which it
* occurred, the number of characters into the input at which it started, and
* similarly, the number of characters into the input at which it ended. <br>
*/
public class PropertiesToken extends Token {
public final static int EQUAL = 0x102;
public final static int COLON = 0x103;
public final static int LINE_CONTINUE = 0x104;
public final static int NAME = 0x500;
public final static int VALUE = 0x600;
public final static int COMMENT = 0xD00;
public final static int WHITE_SPACE = 0xE00;
public final static int MID_TOKEN_WHITE_SPACE = 0xE01;
private int ID;
private String contents;
private int lineNumber;
private int charBegin;
private int charEnd;
private int state;
/**
* Create a new token.
* The constructor is typically called by the lexer
*
* @param ID the id number of the token
* @param contents A string representing the text of the token
* @param lineNumber the line number of the input on which this token started
* @param charBegin the offset into the input in characters at which this token started
* @param charEnd the offset into the input in characters at which this token ended
*/
public PropertiesToken(int ID, String contents, int lineNumber, int charBegin, int charEnd){
this (ID, contents, lineNumber, charBegin, charEnd, Token.UNDEFINED_STATE);
}
/**
* Create a new token.
* The constructor is typically called by the lexer
*
* @param ID the id number of the token
* @param contents A string representing the text of the token
* @param lineNumber the line number of the input on which this token started
* @param charBegin the offset into the input in characters at which this token started
* @param charEnd the offset into the input in characters at which this token ended
* @param state the state the tokenizer is in after returning this token.
*/
public PropertiesToken(int ID, String contents, int lineNumber, int charBegin, int charEnd, int state){
this.ID = ID;
this.contents = new String(contents);
this.lineNumber = lineNumber;
this.charBegin = charBegin;
this.charEnd = charEnd;
this.state = state;
}
/**
* Get an integer representing the state the tokenizer is in after
* returning this token.
* Those who are interested in incremental tokenizing for performance
* reasons will want to use this method to figure out where the tokenizer
* may be restarted. The tokenizer starts in Token.INITIAL_STATE, so
* any time that it reports that it has returned to this state, the
* tokenizer may be restarted from there.
*/
public int getState(){
return state;
}
/**
* get the ID number of this token
*
* @return the id number of the token
*/
public int getID(){
return ID;
}
/**
* get the contents of this token
*
* @return A string representing the text of the token
*/
public String getContents(){
return (new String(contents));
}
/**
* get the line number of the input on which this token started
*
* @return the line number of the input on which this token started
*/
public int getLineNumber(){
return lineNumber;
}
/**
* get the offset into the input in characters at which this token started
*
* @return the offset into the input in characters at which this token started
*/
public int getCharBegin(){
return charBegin;
}
/**
* get the offset into the input in characters at which this token ended
*
* @return the offset into the input in characters at which this token ended
*/
public int getCharEnd(){
return charEnd;
}
/**
* Checks this token to see if it is a tag.
*
* @return true if this token is a reserved word, false otherwise
*/
public boolean isSeparator(){
return((ID >> 8) == 0x1);
}
/**
* Checks this token to see if it is a name of a name value pair.
*
* @return true if this token is a name, false otherwise
*/
public boolean isName(){
return((ID >> 8) == 0x5);
}
/**
* Checks this token to see if it is a value of a name value pair.
*
* @return true if this token is a value, false otherwise
*/
public boolean isValue(){
return((ID >> 8) == 0x6);
}
/**
* Checks this token to see if it is a comment.
*
* @return true if this token is a comment, false otherwise
*/
public boolean isComment(){
return((ID >> 8) == 0xD);
}
/**
* Checks this token to see if it is White Space.
* Usually tabs, line breaks, form feed, spaces, etc.
*
* @return true if this token is White Space, false otherwise
*/
public boolean isWhiteSpace(){
return((ID >> 8) == 0xE);
}
/**
* Checks this token to see if it is an Error.
* Unfinished comments, numbers that are too big, unclosed strings, etc.
*
* @return true if this token is an Error, false otherwise
*/
public boolean isError(){
return((ID >> 8) == 0xF);
}
/**
* A description of this token. The description should
* be appropriate for syntax highlighting. For example
* "comment" is returned for a comment.
*
* @return a description of this token.
*/
public String getDescription(){
if (isSeparator()){
return("separator");
} else if (isName()){
return("name");
} else if (isValue()){
return("value");
} else if (isComment()){
return("comment");
} else if (isWhiteSpace()){
return("whitespace");
} else if (isError()){
return("error");
} else {
return("unknown");
}
}
/**
* get a String that explains the error, if this token is an error.
*
* @return a String that explains the error, if this token is an error, null otherwise.
*/
public String errorString(){
return "";
}
/**
* get a representation of this token as a human readable string.
* The format of this string is subject to change and should only be used
* for debugging purposes.
*
* @return a string representation of this token
*/
public String toString() {
return ("Token #" + Integer.toHexString(ID) + ": " + getDescription() + " Line " +
lineNumber + " from " +charBegin + " to " + charEnd + " : " + contents);
}
}
--- NEW FILE: PlainToken.java ---
/*
* This file is part of a syntax highlighting package
* Copyright (C) 2002 Stephen Ostermiller
* http://ostermiller.org/contact.pl?regarding=Syntax+Highlighting
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* See COPYING.TXT for details.
*/
package com.Ostermiller.Syntax.Lexer;
/**
* A PlainToken is a token that is returned by a lexer that is lexing a plain
* text file. It has several attributes describing the token:
* The type of token, the text of the token, the line number on which it
* occurred, the number of characters into the input at which it started, and
* similarly, the number of characters into the input at which it ended.
*/
public class PlainToken extends Token {
public final static int TEXT = 0x200;
public final static int WHITE_SPACE = 0xE00;
private int ID;
private String contents;
private int lineNumber;
private int charBegin;
private int charEnd;
private int state;
/**
* Create a new token.
* The constructor is typically called by the lexer
*
* @param ID the id number of the token
* @param contents A string representing the text of the token
* @param lineNumber the line number of the input on which this token started
* @param charBegin the offset into the input in characters at which this token started
* @param charEnd the offset into the input in characters at which this token ended
*/
public PlainToken(int ID, String contents, int lineNumber, int charBegin, int charEnd){
this (ID, contents, lineNumber, charBegin, charEnd, Token.UNDEFINED_STATE);
}
/**
* Create a new token.
* The constructor is typically called by the lexer
*
* @param ID the id number of the token
* @param contents A string representing the text of the token
* @param lineNumber the line number of the input on which this token started
* @param charBegin the offset into the input in characters at which this token started
* @param charEnd the offset into the input in characters at which this token ended
* @param state the state the tokenizer is in after returning this token.
*/
public PlainToken(int ID, String contents, int lineNumber, int charBegin, int charEnd, int state){
this.ID = ID;
this.contents = new String(contents);
this.lineNumber = lineNumber;
this.charBegin = charBegin;
this.charEnd = charEnd;
this.state = state;
}
/**
* Get an integer representing the state the tokenizer is in after
* returning this token.
* Those who are interested in incremental tokenizing for performance
* reasons will want to use this method to figure out where the tokenizer
* may be restarted. The tokenizer starts in Token.INITIAL_STATE, so
* any time that it reports that it has returned to this state, the
* tokenizer may be restarted from there.
*/
public int getState(){
return state;
}
/**
* get the ID number of this token
*
* @return the id number of the token
*/
public int getID(){
return ID;
}
/**
* get the contents of this token
*
* @return A string representing the text of the token
*/
public String getContents(){
return (new String(contents));
}
/**
* get the line number of the input on which this token started
*
* @return the line number of the input on which this token started
*/
public int getLineNumber(){
return lineNumber;
}
/**
* get the offset into the input in characters at which this token started
*
* @return the offset into the input in characters at which this token started
*/
public int getCharBegin(){
return charBegin;
}
/**
* get the offset into the input in characters at which this token ended
*
* @return the offset into the input in characters at which this token ended
*/
public int getCharEnd(){
return charEnd;
}
/**
* Checks this token to see if it is text.
*
* @return true if this token is an text, false otherwise
*/
public boolean isText(){
return((ID >> 8) == 0x2);
}
/**
* Checks this token to see if it is White Space.
* Usually tabs, line breaks, form feed, spaces, etc.
*
* @return true if this token is White Space, false otherwise
*/
public boolean isWhiteSpace(){
return((ID >> 8) == 0xE);
}
/**
* Checks this token to see if it is an Error.
* Unfinished comments, numbers that are too big, unclosed strings, etc.
*
* @return true if this token is an Error, false otherwise
*/
public boolean isError(){
return false;
}
/**
* Checks this token to see if it is a comment.
*
* @return true if this token is a comment, false otherwise
*/
public boolean isComment(){
return false;
}
/**
* A description of this token. The description should
* be appropriate for syntax highlighting. For example
* "comment" is returned for a comment.
*
* @return a description of this token.
*/
public String getDescription(){
if (isText()){
return("text");
} else if (isWhiteSpace()){
return("whitespace");
} else {
return("unknown");
}
}
/**
* get a String that explains the error, if this token is an error.
*
* @return a String that explains the error, if this token is an error, null otherwise.
*/
public String errorString(){
return "";
}
/**
* get a representation of this token as a human readable string.
* The format of this string is subject to change and should only be used
* for debugging purposes.
*
* @return a string representation of this token
*/
public String toString() {
return ("Token #" + Integer.toHexString(ID) + ": " + getDescription() + " Line " +
lineNumber + " from " +charBegin + " to " + charEnd + " : " + contents);
}
}
--- NEW FILE: JavaLexer.java ---
/* The following code was generated by JFlex 1.4.3 on 10/9/09 6:11 PM */
/* JavaLexer.java is a generated file. You probably want to
* edit JavaLexer.lex to make changes. Use JFlex to generate it.
* To generate JavaLexer.java
* Install <a href="http://jflex.de/">JFlex</a> v1.3.2 or later.
* Once JFlex is in your classpath run<br>
* <code>java JFlex.Main JavaLexer.lex</code><br>
* You will then have a file called JavaLexer.java
*/
/*
* This file is part of a <a href="http://ostermiller.org/syntax/">syntax
* highlighting</a> package.
* Copyright (C) 1999-2002 Stephen Ostermiller
* http://ostermiller.org/contact.pl?regarding=Syntax+Highlighting
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
[...2077 lines suppressed...]
{ lastToken = JavaToken.IDENTIFIER;
String text = yytext();
JavaToken t = (new JavaToken(lastToken,text,yyline,yychar,yychar+text.length(),nextState));
return (t);
}
case 244: break;
default:
if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
zzAtEOF = true;
return null;
}
else {
zzScanError(ZZ_NO_MATCH);
}
}
}
}
}
--- NEW FILE: CToken.java ---
/*
* This file is part of a syntax highlighting package
* Copyright (C) 1999, 2000 Stephen Ostermiller
* http://ostermiller.org/contact.pl?regarding=Syntax+Highlighting
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* See COPYING.TXT for details.
*/
package com.Ostermiller.Syntax.Lexer;
/**
* A CToken is a token that is returned by a lexer that is lexing a C
* source file. It has several attributes describing the token:
* The type of token, the text of the token, the line number on which it
* occurred, the number of characters into the input at which it started, and
* similarly, the number of characters into the input at which it ended. <br>
*/
public class CToken extends Token {
public final static int RESERVED_WORD_ABSTRACT = 0x101;
public final static int RESERVED_WORD_AUTO = 0x102;
public final static int RESERVED_WORD_BREAK = 0x103;
public final static int RESERVED_WORD_CASE = 0x104;
public final static int RESERVED_WORD_CONST = 0x105;
public final static int RESERVED_WORD_CONTINUE = 0x106;
public final static int RESERVED_WORD_DEFAULT = 0x107;
public final static int RESERVED_WORD_DO = 0x108;
public final static int RESERVED_WORD_ELSE = 0x109;
public final static int RESERVED_WORD_ENUM = 0x10A;
public final static int RESERVED_WORD_EXTERN = 0x10B;
public final static int RESERVED_WORD_FOR = 0x10C;
public final static int RESERVED_WORD_GOTO = 0x10D;
public final static int RESERVED_WORD_IF = 0x10E;
public final static int RESERVED_WORD_REGISTER = 0x10F;
public final static int RESERVED_WORD_RETURN = 0x110;
public final static int RESERVED_WORD_SIZEOF = 0x111;
public final static int RESERVED_WORD_STATIC = 0x112;
public final static int RESERVED_WORD_STRUCT = 0x113;
public final static int RESERVED_WORD_SWITCH = 0x114;
public final static int RESERVED_WORD_TYPEDEF = 0x115;
public final static int RESERVED_WORD_UNION = 0x116;
public final static int RESERVED_WORD_VOLATILE = 0x117;
public final static int RESERVED_WORD_WHILE = 0x118;
public final static int RESERVED_WORD_CATCH = 0x119;
public final static int RESERVED_WORD_CLASS = 0x11A;
public final static int RESERVED_WORD_CONST_CAST = 0x11B;
public final static int RESERVED_WORD_DELETE = 0x11C;
public final static int RESERVED_WORD_DYNAMIC_CAST = 0x11D;
public final static int RESERVED_WORD_FRIEND = 0x11E;
public final static int RESERVED_WORD_INLINE = 0x11F;
public final static int RESERVED_WORD_MUTABLE = 0x120;
public final static int RESERVED_WORD_NAMESPACE = 0x121;
public final static int RESERVED_WORD_NEW = 0x122;
public final static int RESERVED_WORD_OPERATOR = 0x123;
public final static int RESERVED_WORD_OVERLOAD = 0x124;
public final static int RESERVED_WORD_PRIVATE = 0x125;
public final static int RESERVED_WORD_PROTECTED = 0x126;
public final static int RESERVED_WORD_PUBLIC = 0x127;
public final static int RESERVED_WORD_REINTERPRET_CAST = 0x128;
public final static int RESERVED_WORD_STATIC_CAST = 0x129;
public final static int RESERVED_WORD_TEMPLATE = 0x12A;
public final static int RESERVED_WORD_THIS = 0x12B;
public final static int RESERVED_WORD_TRY = 0x12C;
public final static int RESERVED_WORD_VIRTUAL = 0x12D;
public final static int RESERVED_WORD_BOOL = 0x12E;
public final static int RESERVED_WORD_CHAR = 0x12F;
public final static int RESERVED_WORD_DOUBLE = 0x130;
public final static int RESERVED_WORD_FLOAT = 0x131;
public final static int RESERVED_WORD_INT = 0x132;
public final static int RESERVED_WORD_LONG = 0x133;
public final static int RESERVED_WORD_SHORT = 0x134;
public final static int RESERVED_WORD_SIGNED = 0x135;
public final static int RESERVED_WORD_UNSIGNED = 0x136;
public final static int RESERVED_WORD_VOID = 0x137;
public final static int RESERVED_WORD_ASM = 0x138;
public final static int RESERVED_WORD_TYPENAME = 0x139;
public final static int RESERVED_WORD_EXPLICIT = 0x13A;
public final static int RESERVED_WORD_USING = 0x13B;
public final static int RESERVED_WORD_THROW = 0x13C;
public final static int RESERVED_WORD_WCHAR_T = 0x13D;
public final static int RESERVED_WORD_TYPEID = 0x13E;
public final static int IDENTIFIER = 0x200;
public final static int LITERAL_BOOLEAN = 0x300;
public final static int LITERAL_INTEGER_DECIMAL = 0x310;
public final static int LITERAL_INTEGER_OCTAL = 0x311;
public final static int LITERAL_INTEGER_HEXIDECIMAL = 0x312;
public final static int LITERAL_LONG_DECIMAL = 0x320;
public final static int LITERAL_LONG_OCTAL = 0x321;
public final static int LITERAL_LONG_HEXIDECIMAL = 0x322;
public final static int LITERAL_FLOATING_POINT = 0x330;
public final static int LITERAL_DOUBLE = 0x340;
public final static int LITERAL_CHARACTER = 0x350;
public final static int LITERAL_STRING = 0x360;
public final static int LITERAL_NULL = 0x370;
public final static int SEPARATOR_LPAREN = 0x400;
public final static int SEPARATOR_RPAREN = 0x401;
public final static int SEPARATOR_LBRACE = 0x410;
public final static int SEPARATOR_RBRACE = 0x411;
public final static int SEPARATOR_LBRACKET = 0x420;
public final static int SEPARATOR_RBRACKET = 0x421;
public final static int SEPARATOR_SEMICOLON = 0x430;
public final static int SEPARATOR_COMMA = 0x440;
public final static int SEPARATOR_PERIOD = 0x450;
public final static int SEPARATOR_ARROW = 0x460;
public final static int OPERATOR_GREATER_THAN = 0x500;
public final static int OPERATOR_LESS_THAN = 0x501;
public final static int OPERATOR_LESS_THAN_OR_EQUAL = 0x502;
public final static int OPERATOR_GREATER_THAN_OR_EQUAL = 0x503;
public final static int OPERATOR_EQUAL = 0x504;
public final static int OPERATOR_NOT_EQUAL = 0x505;
public final static int OPERATOR_LOGICAL_NOT = 0x510;
public final static int OPERATOR_LOGICAL_AND = 0x511;
public final static int OPERATOR_LOGICAL_OR = 0x512;
public final static int OPERATOR_ADD = 0x520;
public final static int OPERATOR_SUBTRACT = 0x521;
public final static int OPERATOR_MULTIPLY = 0x522;
public final static int OPERATOR_DIVIDE = 0x523;
public final static int OPERATOR_MOD = 0x524;
public final static int OPERATOR_BITWISE_COMPLIMENT = 0x530;
public final static int OPERATOR_BITWISE_AND = 0x531;
public final static int OPERATOR_BITWISE_OR = 0x532;
public final static int OPERATOR_BITWISE_XOR = 0x533;
public final static int OPERATOR_SHIFT_LEFT = 0x540;
public final static int OPERATOR_SHIFT_RIGHT = 0x541;
public final static int OPERATOR_ASSIGN = 0x550;
public final static int OPERATOR_ADD_ASSIGN = 0x560;
public final static int OPERATOR_SUBTRACT_ASSIGN = 0x561;
public final static int OPERATOR_MULTIPLY_ASSIGN = 0x562;
public final static int OPERATOR_DIVIDE_ASSIGN = 0x563;
public final static int OPERATOR_MOD_ASSIGN = 0x564;
public final static int OPERATOR_BITWISE_AND_ASSIGN = 0x571;
public final static int OPERATOR_BITWISE_OR_ASSIGN = 0x572;
public final static int OPERATOR_BITWISE_XOR_ASSIGN = 0x573;
public final static int OPERATOR_SHIFT_LEFT_ASSIGN = 0x580;
public final static int OPERATOR_SHIFT_RIGHT_ASSIGN = 0x581;
public final static int OPERATOR_INCREMENT = 0x590;
public final static int OPERATOR_DECREMENT = 0x591;
public final static int OPERATOR_QUESTION = 0x5A0;
public final static int OPERATOR_COLON = 0x5A1;
public final static int PREPROCESSOR_DIRECTIVE = 0XC00;
public final static int COMMENT_TRADITIONAL = 0xD00;
public final static int COMMENT_END_OF_LINE = 0xD10;
public final static int COMMENT_DOCUMENTATION = 0xD20;
public final static int WHITE_SPACE = 0xE00;
public final static int ERROR_IDENTIFIER = 0xF00;
public final static int ERROR_UNCLOSED_STRING = 0xF10;
public final static int ERROR_MALFORMED_STRING = 0xF11;
public final static int ERROR_MALFORMED_UNCLOSED_STRING = 0xF12;
public final static int ERROR_UNCLOSED_CHARACTER = 0xF20;
public final static int ERROR_MALFORMED_CHARACTER = 0xF21;
public final static int ERROR_MALFORMED_UNCLOSED_CHARACTER = 0xF22;
public final static int ERROR_INTEGER_DECIMIAL_SIZE = 0xF30;
public final static int ERROR_INTEGER_OCTAL_SIZE = 0xF31;
public final static int ERROR_INTEGER_HEXIDECIMAL_SIZE = 0xF32;
public final static int ERROR_LONG_DECIMIAL_SIZE = 0xF33;
public final static int ERROR_LONG_OCTAL_SIZE = 0xF34;
public final static int ERROR_LONG_HEXIDECIMAL_SIZE = 0xF35;
public final static int ERROR_FLOAT_SIZE = 0xF36;
public final static int ERROR_DOUBLE_SIZE = 0xF37;
public final static int ERROR_FLOAT = 0xF38;
public final static int ERROR_UNCLOSED_COMMENT = 0xF40;
public final static int ERROR_MALFORMED_PREPROCESSOR_DIRECTIVE = 0xF50;
private int ID;
private String contents;
private int lineNumber;
private int charBegin;
private int charEnd;
private int state;
/**
* Create a new token.
* The constructor is typically called by the lexer
*
* @param ID the id number of the token
* @param contents A string representing the text of the token
* @param lineNumber the line number of the input on which this token started
* @param charBegin the offset into the input in characters at which this token started
* @param charEnd the offset into the input in characters at which this token ended
*/
public CToken(int ID, String contents, int lineNumber, int charBegin, int charEnd){
this (ID, contents, lineNumber, charBegin, charEnd, Token.UNDEFINED_STATE);
}
/**
* Create a new token.
* The constructor is typically called by the lexer
*
* @param ID the id number of the token
* @param contents A string representing the text of the token
* @param lineNumber the line number of the input on which this token started
* @param charBegin the offset into the input in characters at which this token started
* @param charEnd the offset into the input in characters at which this token ended
* @param state the state the tokenizer is in after returning this token.
*/
public CToken(int ID, String contents, int lineNumber, int charBegin, int charEnd, int state){
this.ID = ID;
this.contents = new String(contents);
this.lineNumber = lineNumber;
this.charBegin = charBegin;
this.charEnd = charEnd;
this.state = state;
}
/**
* Get an integer representing the state the tokenizer is in after
* returning this token.
* Those who are interested in incremental tokenizing for performance
* reasons will want to use this method to figure out where the tokenizer
* may be restarted. The tokenizer starts in Token.INITIAL_STATE, so
* any time that it reports that it has returned to this state, the
* tokenizer may be restarted from there.
*/
public int getState(){
return state;
}
/**
* get the ID number of this token
*
* @return the id number of the token
*/
public int getID(){
return ID;
}
/**
* get the contents of this token
*
* @return A string representing the text of the token
*/
public String getContents(){
return (new String(contents));
}
/**
* get the line number of the input on which this token started
*
* @return the line number of the input on which this token started
*/
public int getLineNumber(){
return lineNumber;
}
/**
* get the offset into the input in characters at which this token started
*
* @return the offset into the input in characters at which this token started
*/
public int getCharBegin(){
return charBegin;
}
/**
* get the offset into the input in characters at which this token ended
*
* @return the offset into the input in characters at which this token ended
*/
public int getCharEnd(){
return charEnd;
}
/**
* Checks this token to see if it is a reserved word.
*
* @return true if this token is a reserved word, false otherwise
*/
public boolean isReservedWord(){
return((ID >> 8) == 0x1);
}
/**
* Checks this token to see if it is an identifier.
*
* @return true if this token is an identifier, false otherwise
*/
public boolean isIdentifier(){
return((ID >> 8) == 0x2);
}
/**
* Checks this token to see if it is a literal.
*
* @return true if this token is a literal, false otherwise
*/
public boolean isLiteral(){
return((ID >> 8) == 0x3);
}
/**
* Checks this token to see if it is a Separator.
*
* @return true if this token is a Separator, false otherwise
*/
public boolean isSeparator(){
return((ID >> 8) == 0x4);
}
/**
* Checks this token to see if it is a Operator.
*
* @return true if this token is a Operator, false otherwise
*/
public boolean isOperator(){
return((ID >> 8) == 0x5);
}
/**
* Checks this token to see if it should be handled by the preprocessor.
*
* @return true if this token should be handled by the preprocessor, false otherwise
*/
public boolean isPreProcessor(){
return((ID >> 8) == 0xC);
}
/**
* Checks this token to see if it is a comment.
*
* @return true if this token is a comment, false otherwise
*/
public boolean isComment(){
return((ID >> 8) == 0xD);
}
/**
* Checks this token to see if it is White Space.
* Usually tabs, line breaks, form feed, spaces, etc.
*
* @return true if this token is White Space, false otherwise
*/
public boolean isWhiteSpace(){
return((ID >> 8) == 0xE);
}
/**
* Checks this token to see if it is an Error.
* Unfinished comments, numbers that are too big, unclosed strings, etc.
*
* @return true if this token is an Error, false otherwise
*/
public boolean isError(){
return((ID >> 8) == 0xF);
}
/**
* A description of this token. The description should
* be appropriate for syntax highlighting. For example
* "comment" is returned for a comment.
*
* @return a description of this token.
*/
public String getDescription(){
if (isReservedWord()){
return("reservedWord");
} else if (isIdentifier()){
return("identifier");
} else if (isLiteral()){
return("literal");
} else if (isSeparator()){
return("separator");
} else if (isOperator()){
return("operator");
} else if (isComment()){
return("comment");
} else if (isPreProcessor()){
return("preprocessor");
} else if (isWhiteSpace()){
return("whitespace");
} else if (isError()){
return("error");
} else {
return("unknown");
}
}
/**
* get a String that explains the error, if this token is an error.
*
* @return a String that explains the error, if this token is an error, null otherwise.
*/
public String errorString(){
String s;
if (isError()){
s = "Error on line " + lineNumber + ": ";
switch (ID){
case ERROR_IDENTIFIER:
s += "Unrecognized Identifier: " + contents;
break;
case ERROR_UNCLOSED_STRING:
s += "'\"' expected after " + contents;
break;
case ERROR_MALFORMED_STRING:
case ERROR_MALFORMED_UNCLOSED_STRING:
s += "Illegal character in " + contents;
break;
case ERROR_UNCLOSED_CHARACTER:
s += "\"'\" expected after " + contents;
break;
case ERROR_MALFORMED_CHARACTER:
case ERROR_MALFORMED_UNCLOSED_CHARACTER:
s += "Illegal character in " + contents;
break;
case ERROR_INTEGER_DECIMIAL_SIZE:
case ERROR_INTEGER_OCTAL_SIZE:
case ERROR_FLOAT:
s += "Illegal character in " + contents;
break;
case ERROR_INTEGER_HEXIDECIMAL_SIZE:
case ERROR_LONG_DECIMIAL_SIZE:
case ERROR_LONG_OCTAL_SIZE:
case ERROR_LONG_HEXIDECIMAL_SIZE:
case ERROR_FLOAT_SIZE:
case ERROR_DOUBLE_SIZE:
s += "Literal out of bounds: " + contents;
break;
case ERROR_UNCLOSED_COMMENT:
s += "*/ expected after " + contents;
break;
case ERROR_MALFORMED_PREPROCESSOR_DIRECTIVE:
s += "Unrecognized preprocessor command " + contents;
break;
}
} else {
s = null;
}
return (s);
}
/**
* get a representation of this token as a human readable string.
* The format of this string is subject to change and should only be used
* for debugging purposes.
*
* @return a string representation of this token
*/
public String toString() {
return ("Token #" + Integer.toHexString(ID) + ": " + getDescription() + " Line " +
lineNumber + " from " +charBegin + " to " + charEnd + " : " + contents);
}
}
--- NEW FILE: JavaScriptLexer.java ---
/* The following code was generated by JFlex 1.4.3 on 10/9/09 6:11 PM */
/* JavaScriptLexer.java is a generated file. You probably want to
* edit JavaScriptLexer.lex to make changes. Use JFlex to generate it.
* To generate JavaScriptLexer.java
* Install <a href="http://jflex.de/">JFlex</a> v1.3.2 or later.
* Once JFlex is in your classpath run<br>
* <code>java JFlex.Main JavaScriptLexer.lex</code><br>
* You will then have a file called JavaScriptLexer.java
*/
/*
* This file is part of a <a href="http://ostermiller.org/syntax/">syntax
* highlighting</a> package.
* Copyright (C) 1999-2002 Stephen Ostermiller
* http://ostermiller.org/contact.pl?regarding=Syntax+Highlighting
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
[...1953 lines suppressed...]
case 64:
{ lastToken = JavaScriptToken.RESERVED_WORD_IF;
JavaScriptToken t = (new JavaScriptToken(lastToken, yytext(), yyline, yychar, yychar+2, nextState));
return (t);
}
case 246: break;
default:
if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
zzAtEOF = true;
return null;
}
else {
zzScanError(ZZ_NO_MATCH);
}
}
}
}
}
--- NEW FILE: Lexer.java ---
/*
* This file is part of a syntax highlighting package
* Copyright (C) 1999-2001 Stephen Ostermiller
* http://ostermiller.org/contact.pl?regarding=Syntax+Highlighting
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* See COPYING.TXT for details.
*/
package com.Ostermiller.Syntax.Lexer;
/**
* A lexer should implement this interface.
*/
public interface Lexer {
/**
* Returns the next token.
*
* @return the next token
*/
public Token getNextToken() throws java.io.IOException ;
/**
* Closes the current input stream, and resets the scanner to read from a new input stream.
* All internal variables are reset, the old input stream cannot be reused
* (content of the internal buffer is discarded and lost).
* The lexical state is set to the initial state.
* Subsequent tokens read from the lexer will start with the line, char, and column
* values given here.
*
* @param reader The new input.
* @param yyline The line number of the first token.
* @param yychar The position (relative to the start of the stream) of the first token.
* @param yycolumn The position (relative to the line) of the first token.
* @throws IOException if an IOExecption occurs while switching readers.
*/
public void reset(java.io.Reader reader, int yyline, int yychar, int yycolumn) throws java.io.IOException;
}
--- NEW FILE: LatexLexer.java ---
/* The following code was generated by JFlex 1.4.3 on 10/9/09 6:11 PM */
/* LatexLexer.java is a generated file. You probably want to
* edit LatexLexer.lex to make changes. Use JFlex to generate it.
* To generate LatexLexer.java
* Install <a href="http://jflex.de/">JFlex</a> v1.3.2 or later.
* Once JFlex is in your classpath run<br>
* <code>java JFlex.Main LatexLexer.lex</code><br>
* You will then have a file called LatexLexer.java
*/
/*
* This file is part of a <a href="http://ostermiller.org/syntax/">syntax
* highlighting</a> package.
* Copyright (C) 2002 Stephen Ostermiller
* http://ostermiller.org/contact.pl?regarding=Syntax+Highlighting
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* See COPYING.TXT for details.
*/
package com.Ostermiller.Syntax.Lexer;
import java.io.*;
/**
*
* LatexLexer is a LaTeX lexer. Created with JFlex. An example of how it is used:
* <CODE>
* <PRE>
* LatexLexer shredder = new LatexLexer(System.in);
* LatexToken t;
* while ((t = shredder.getNextToken()) != null){
* System.out.println(t);
* }
* </PRE>
* </CODE>
*
* @see LatexToken
*/
public class LatexLexer implements Lexer {
/** This character denotes the end of file */
public static final int YYEOF = -1;
/** initial size of the lookahead buffer */
private static final int ZZ_BUFFERSIZE = 16384;
/** lexical states */
public static final int YYINITIAL = 0;
/**
* ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l
* ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l
* at the beginning of a line
* l is of the form l = 2*k, k a non negative integer
*/
private static final int ZZ_LEXSTATE[] = {
0, 0
};
/**
* Translates characters to character classes
*/
private static final char [] ZZ_CMAP = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 0, 1, 2, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 0, 0, 6, 6, 5, 6, 0, 6, 6, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 6, 6,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 6, 6, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
/**
* Translates DFA states to action switch labels.
*/
private static final int [] ZZ_ACTION = zzUnpackAction();
private static final String ZZ_ACTION_PACKED_0 =
"\1\0\1\1\1\2\1\3\1\2\1\0\1\4\1\1"+
"\2\3\1\0\1\4";
private static int [] zzUnpackAction() {
int [] result = new int[12];
int offset = 0;
offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result);
return result;
}
private static int zzUnpackAction(String packed, int offset, int [] result) {
int i = 0; /* index in packed string */
int j = offset; /* index in unpacked array */
int l = packed.length();
while (i < l) {
int count = packed.charAt(i++);
int value = packed.charAt(i++);
do result[j++] = value; while (--count > 0);
}
return j;
}
/**
* Translates a state to a row index in the transition table
*/
private static final int [] ZZ_ROWMAP = zzUnpackRowMap();
private static final String ZZ_ROWMAP_PACKED_0 =
"\0\0\0\7\0\16\0\25\0\34\0\43\0\52\0\61"+
"\0\70\0\34\0\77\0\16";
private static int [] zzUnpackRowMap() {
int [] result = new int[12];
int offset = 0;
offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result);
return result;
}
private static int zzUnpackRowMap(String packed, int offset, int [] result) {
int i = 0; /* index in packed string */
int j = offset; /* index in unpacked array */
int l = packed.length();
while (i < l) {
int high = packed.charAt(i++) << 16;
result[j++] = high | packed.charAt(i++);
}
return j;
}
/**
* The transition table of the DFA
*/
private static final int [] ZZ_TRANS = zzUnpackTrans();
private static final String ZZ_TRANS_PACKED_0 =
"\4\2\1\3\1\4\1\5\4\2\1\6\2\0\1\7"+
"\3\0\1\10\2\2\2\4\1\11\1\12\3\4\13\0"+
"\3\2\1\7\3\0\1\13\2\0\4\2\1\14\2\7"+
"\3\0\1\12\7\0\3\7";
private static int [] zzUnpackTrans() {
int [] result = new int[70];
int offset = 0;
offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result);
return result;
}
private static int zzUnpackTrans(String packed, int offset, int [] result) {
int i = 0; /* index in packed string */
int j = offset; /* index in unpacked array */
int l = packed.length();
while (i < l) {
int count = packed.charAt(i++);
int value = packed.charAt(i++);
value--;
do result[j++] = value; while (--count > 0);
}
return j;
}
/* error codes */
private static final int ZZ_UNKNOWN_ERROR = 0;
private static final int ZZ_NO_MATCH = 1;
private static final int ZZ_PUSHBACK_2BIG = 2;
/* error messages for the codes above */
private static final String ZZ_ERROR_MSG[] = {
"Unkown internal scanner error",
"Error: could not match input",
"Error: pushback value was too large"
};
/**
* ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code>
*/
private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute();
private static final String ZZ_ATTRIBUTE_PACKED_0 =
"\1\0\3\1\1\11\1\0\3\1\1\11\1\0\1\1";
private static int [] zzUnpackAttribute() {
int [] result = new int[12];
int offset = 0;
offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result);
return result;
}
private static int zzUnpackAttribute(String packed, int offset, int [] result) {
int i = 0; /* index in packed string */
int j = offset; /* index in unpacked array */
int l = packed.length();
while (i < l) {
int count = packed.charAt(i++);
int value = packed.charAt(i++);
do result[j++] = value; while (--count > 0);
}
return j;
}
/** the input device */
private java.io.Reader zzReader;
/** the current state of the DFA */
private int zzState;
/** the current lexical state */
private int zzLexicalState = YYINITIAL;
/** this buffer contains the current text to be matched and is
the source of the yytext() string */
private char zzBuffer[] = new char[ZZ_BUFFERSIZE];
/** the textposition at the last accepting state */
private int zzMarkedPos;
/** the current text position in the buffer */
private int zzCurrentPos;
/** startRead marks the beginning of the yytext() string in the buffer */
private int zzStartRead;
/** endRead marks the last character in the buffer, that has been read
from input */
private int zzEndRead;
/** number of newlines encountered up to the start of the matched text */
private int yyline;
/** the number of characters up to the start of the matched text */
private int yychar;
/**
* the number of characters from the last newline up to the start of the
* matched text
*/
private int yycolumn;
/**
* zzAtBOL == true <=> the scanner is currently at the beginning of a line
*/
private boolean zzAtBOL = true;
/** zzAtEOF == true <=> the scanner is at the EOF */
private boolean zzAtEOF;
/** denotes if the user-EOF-code has already been executed */
private boolean zzEOFDone;
/* user code: */
private int lastToken;
private int nextState=YYINITIAL;
/**
* next Token method that allows you to control if whitespace and comments are
* returned as tokens.
*/
public Token getNextToken(boolean returnComments, boolean returnWhiteSpace)throws IOException{
Token t = getNextToken();
while (t != null && ((!returnWhiteSpace && t.isWhiteSpace()) || (!returnComments && t.isComment()))){
t = getNextToken();
}
return (t);
}
/**
* Prints out tokens from a file or System.in.
* If no arguments are given, System.in will be used for input.
* If more arguments are given, the first argument will be used as
* the name of the file to use as input
*
* @param args program arguments, of which the first is a filename
*/
public static void main(String[] args) {
InputStream in;
try {
if (args.length > 0){
File f = new File(args[0]);
if (f.exists()){
if (f.canRead()){
in = new FileInputStream(f);
} else {
throw new IOException("Could not open " + args[0]);
}
} else {
throw new IOException("Could not find " + args[0]);
}
} else {
in = System.in;
}
LatexLexer shredder = new LatexLexer(in);
Token t;
while ((t = shredder.getNextToken()) != null) {
if (t.getID() != CToken.WHITE_SPACE){
System.out.println(t);
}
}
} catch (IOException e){
System.out.println(e.getMessage());
}
}
/**
* Closes the current input stream, and resets the scanner to read from a new input stream.
* All internal variables are reset, the old input stream cannot be reused
* (content of the internal buffer is discarded and lost).
* The lexical state is set to the initial state.
* Subsequent tokens read from the lexer will start with the line, char, and column
* values given here.
*
* @param reader The new input.
* @param yyline The line number of the first token.
* @param yychar The position (relative to the start of the stream) of the first token.
* @param yycolumn The position (relative to the line) of the first token.
* @throws IOException if an IOExecption occurs while switching readers.
*/
public void reset(java.io.Reader reader, int yyline, int yychar, int yycolumn) throws IOException{
yyreset(reader);
this.yyline = yyline;
this.yychar = yychar;
this.yycolumn = yycolumn;
}
/**
* Creates a new scanner
* There is also a java.io.InputStream version of this constructor.
*
* @param in the java.io.Reader to read input from.
*/
public LatexLexer(java.io.Reader in) {
this.zzReader = in;
}
/**
* Creates a new scanner.
* There is also java.io.Reader version of this constructor.
*
* @param in the java.io.Inputstream to read input from.
*/
public LatexLexer(java.io.InputStream in) {
this(new java.io.InputStreamReader(in));
}
/**
* Refills the input buffer.
*
* @return <code>false</code>, iff there was new input.
*
* @exception java.io.IOException if any I/O-Error occurs
*/
private boolean zzRefill() throws java.io.IOException {
/* first: make room (if you can) */
if (zzStartRead > 0) {
System.arraycopy(zzBuffer, zzStartRead,
zzBuffer, 0,
zzEndRead-zzStartRead);
/* translate stored positions */
zzEndRead-= zzStartRead;
zzCurrentPos-= zzStartRead;
zzMarkedPos-= zzStartRead;
zzStartRead = 0;
}
/* is the buffer big enough? */
if (zzCurrentPos >= zzBuffer.length) {
/* if not: blow it up */
char newBuffer[] = new char[zzCurrentPos*2];
System.arraycopy(zzBuffer, 0, newBuffer, 0, zzBuffer.length);
zzBuffer = newBuffer;
}
/* finally: fill the buffer with new input */
int numRead = zzReader.read(zzBuffer, zzEndRead,
zzBuffer.length-zzEndRead);
if (numRead > 0) {
zzEndRead+= numRead;
return false;
}
// unlikely but not impossible: read 0 characters, but not at end of stream
if (numRead == 0) {
int c = zzReader.read();
if (c == -1) {
return true;
} else {
zzBuffer[zzEndRead++] = (char) c;
return false;
}
}
// numRead < 0
return true;
}
/**
* Closes the input stream.
*/
public final void yyclose() throws java.io.IOException {
zzAtEOF = true; /* indicate end of file */
zzEndRead = zzStartRead; /* invalidate buffer */
if (zzReader != null)
zzReader.close();
}
/**
* Resets the scanner to read from a new input stream.
* Does not close the old reader.
*
* All internal variables are reset, the old input stream
* <b>cannot</b> be reused (internal buffer is discarded and lost).
* Lexical state is set to <tt>ZZ_INITIAL</tt>.
*
* @param reader the new input stream
*/
public final void yyreset(java.io.Reader reader) {
zzReader = reader;
zzAtBOL = true;
zzAtEOF = false;
zzEOFDone = false;
zzEndRead = zzStartRead = 0;
zzCurrentPos = zzMarkedPos = 0;
yyline = yychar = yycolumn = 0;
zzLexicalState = YYINITIAL;
}
/**
* Returns the current lexical state.
*/
public final int yystate() {
return zzLexicalState;
}
/**
* Enters a new lexical state
*
* @param newState the new lexical state
*/
public final void yybegin(int newState) {
zzLexicalState = newState;
}
/**
* Returns the text matched by the current regular expression.
*/
public final String yytext() {
return new String( zzBuffer, zzStartRead, zzMarkedPos-zzStartRead );
}
/**
* Returns the character at position <tt>pos</tt> from the
* matched text.
*
* It is equivalent to yytext().charAt(pos), but faster
*
* @param pos the position of the character to fetch.
* A value from 0 to yylength()-1.
*
* @return the character at position pos
*/
public final char yycharat(int pos) {
return zzBuffer[zzStartRead+pos];
}
/**
* Returns the length of the matched text region.
*/
public final int yylength() {
return zzMarkedPos-zzStartRead;
}
/**
* Reports an error that occured while scanning.
*
* In a wellformed scanner (no or only correct usage of
* yypushback(int) and a match-all fallback rule) this method
* will only be called with things that "Can't Possibly Happen".
* If this method is called, something is seriously wrong
* (e.g. a JFlex bug producing a faulty scanner etc.).
*
* Usual syntax/scanner level error handling should be done
* in error fallback rules.
*
* @param errorCode the code of the errormessage to display
*/
private void zzScanError(int errorCode) {
String message;
try {
message = ZZ_ERROR_MSG[errorCode];
}
catch (ArrayIndexOutOfBoundsException e) {
message = ZZ_ERROR_MSG[ZZ_UNKNOWN_ERROR];
}
throw new Error(message);
}
/**
* Pushes the specified amount of characters back into the input stream.
*
* They will be read again by then next call of the scanning method
*
* @param number the number of characters to be read again.
* This number must not be greater than yylength()!
*/
public void yypushback(int number) {
if ( number > yylength() )
zzScanError(ZZ_PUSHBACK_2BIG);
zzMarkedPos -= number;
}
/**
* Resumes scanning until the next regular expression is matched,
* the end of input is encountered or an I/O-Error occurs.
*
* @return the next token
* @exception java.io.IOException if any I/O-Error occurs
*/
public Token getNextToken() throws java.io.IOException {
int zzInput;
int zzAction;
// cached fields:
int zzCurrentPosL;
int zzMarkedPosL;
int zzEndReadL = zzEndRead;
char [] zzBufferL = zzBuffer;
char [] zzCMapL = ZZ_CMAP;
int [] zzTransL = ZZ_TRANS;
int [] zzRowMapL = ZZ_ROWMAP;
int [] zzAttrL = ZZ_ATTRIBUTE;
while (true) {
zzMarkedPosL = zzMarkedPos;
yychar+= zzMarkedPosL-zzStartRead;
boolean zzR = false;
for (zzCurrentPosL = zzStartRead; zzCurrentPosL < zzMarkedPosL;
zzCurrentPosL++) {
switch (zzBufferL[zzCurrentPosL]) {
case '\u000B':
case '\u000C':
case '\u0085':
case '\u2028':
case '\u2029':
yyline++;
zzR = false;
break;
case '\r':
yyline++;
zzR = true;
break;
case '\n':
if (zzR)
zzR = false;
else {
yyline++;
}
break;
default:
zzR = false;
}
}
if (zzR) {
// peek one character ahead if it is \n (if we have counted one line too much)
boolean zzPeek;
if (zzMarkedPosL < zzEndReadL)
zzPeek = zzBufferL[zzMarkedPosL] == '\n';
else if (zzAtEOF)
zzPeek = false;
else {
boolean eof = zzRefill();
zzEndReadL = zzEndRead;
zzMarkedPosL = zzMarkedPos;
zzBufferL = zzBuffer;
if (eof)
zzPeek = false;
else
zzPeek = zzBufferL[zzMarkedPosL] == '\n';
}
if (zzPeek) yyline--;
}
zzAction = -1;
zzCurrentPosL = zzCurrentPos = zzStartRead = zzMarkedPosL;
zzState = ZZ_LEXSTATE[zzLexicalState];
zzForAction: {
while (true) {
if (zzCurrentPosL < zzEndReadL)
zzInput = zzBufferL[zzCurrentPosL++];
else if (zzAtEOF) {
zzInput = YYEOF;
break zzForAction;
}
else {
// store back cached positions
zzCurrentPos = zzCurrentPosL;
zzMarkedPos = zzMarkedPosL;
boolean eof = zzRefill();
// get translated positions and possibly new buffer
zzCurrentPosL = zzCurrentPos;
zzMarkedPosL = zzMarkedPos;
zzBufferL = zzBuffer;
zzEndReadL = zzEndRead;
if (eof) {
zzInput = YYEOF;
break zzForAction;
}
else {
zzInput = zzBufferL[zzCurrentPosL++];
}
}
int zzNext = zzTransL[ zzRowMapL[zzState] + zzCMapL[zzInput] ];
if (zzNext == -1) break zzForAction;
zzState = zzNext;
int zzAttributes = zzAttrL[zzState];
if ( (zzAttributes & 1) == 1 ) {
zzAction = zzState;
zzMarkedPosL = zzCurrentPosL;
if ( (zzAttributes & 8) == 8 ) break zzForAction;
}
}
}
// store back cached position
zzMarkedPos = zzMarkedPosL;
switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
case 4:
{ lastToken = LatexToken.COMMAND;
String text = yytext();
LatexToken t = (new LatexToken(lastToken,text,yyline,yychar,yychar+text.length(),nextState));
return(t);
}
case 5: break;
case 2:
{ lastToken = LatexToken.COMMAND_CHAR;
String text = yytext();
LatexToken t = (new LatexToken(lastToken,text,yyline,yychar,yychar+text.length(),nextState));
return(t);
}
case 6: break;
case 3:
{ lastToken = LatexToken.COMMENT;
String text = yytext();
LatexToken t = (new LatexToken(lastToken,text,yyline,yychar,yychar+text.length(),nextState));
return(t);
}
case 7: break;
case 1:
{ lastToken = LatexToken.TEXT;
String text = yytext();
LatexToken t = (new LatexToken(lastToken,text,yyline,yychar,yychar+text.length(),nextState));
return(t);
}
case 8: break;
default:
if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
zzAtEOF = true;
return null;
}
else {
zzScanError(ZZ_NO_MATCH);
}
}
}
}
}
--- NEW FILE: HTMLToken1.java ---
package com.Ostermiller.Syntax.Lexer;
/**
* A HTMLToken1 is a token that is returned by a lexer that is lexing a HTML
* source file. It has several attributes describing the token:
* The type of token, the text of the token, the line number on which it
* occurred, the number of characters into the input at which it started, and
* similarly, the number of characters into the input at which it ended. <br>
*/
public class HTMLToken1 extends Token {
public final static int TAG_START = 0x100;
public final static int TAG_END = 0x101;
public final static int EQUAL = 0x102;
public final static int WORD = 0x200;
public final static int REFERENCE = 0x300;
public final static int TAG_NAME = 0x400;
public final static int END_TAG_NAME = 0x401;
public final static int NAME = 0x500;
public final static int VALUE = 0x600;
public final static int CHAR_REF = 0x700;
public final static int SCRIPT = 0x800;
public final static int COMMENT = 0xD00;
public final static int WHITE_SPACE = 0xE00;
public final static int ERROR_MALFORMED_TAG = 0xF00;
private int ID;
private String contents;
private int lineNumber;
private int charBegin;
private int charEnd;
private int state;
/**
* Create a new token.
* The constructor is typically called by the lexer
*
* @param ID the id number of the token
* @param contents A string representing the text of the token
* @param lineNumber the line number of the input on which this token started
* @param charBegin the offset into the input in characters at which this token started
* @param charEnd the offset into the input in characters at which this token ended
*/
public HTMLToken1(int ID, String contents, int lineNumber, int charBegin, int charEnd){
this (ID, contents, lineNumber, charBegin, charEnd, Token.UNDEFINED_STATE);
}
/**
* Create a new token.
* The constructor is typically called by the lexer
*
* @param ID the id number of the token
* @param contents A string representing the text of the token
* @param lineNumber the line number of the input on which this token started
* @param charBegin the offset into the input in characters at which this token started
* @param charEnd the offset into the input in characters at which this token ended
* @param state the state the tokenizer is in after returning this token.
*/
public HTMLToken1(int ID, String contents, int lineNumber, int charBegin, int charEnd, int state){
this.ID = ID;
this.contents = new String(contents);
this.lineNumber = lineNumber;
this.charBegin = charBegin;
this.charEnd = charEnd;
this.state = state;
}
/**
* Get an integer representing the state the tokenizer is in after
* returning this token.
* Those who are interested in incremental tokenizing for performance
* reasons will want to use this method to figure out where the tokenizer
* may be restarted. The tokenizer starts in Token.INITIAL_STATE, so
* any time that it reports that it has returned to this state, the
* tokenizer may be restarted from there.
*/
public int getState(){
return state;
}
/**
* get the ID number of this token
*
* @return the id number of the token
*/
public int getID(){
return ID;
}
/**
* get the contents of this token
*
* @return A string representing the text of the token
*/
public String getContents(){
return (new String(contents));
}
/**
* get the line number of the input on which this token started
...
[truncated message content] |