[javascriptlint-commit] SF.net SVN: javascriptlint:[340] trunk/jsengine/tokenizer
Status: Beta
Brought to you by:
matthiasmiller
From: <mat...@us...> - 2013-10-09 21:35:02
|
Revision: 340 http://sourceforge.net/p/javascriptlint/code/340 Author: matthiasmiller Date: 2013-10-09 21:34:59 +0000 (Wed, 09 Oct 2013) Log Message: ----------- Move tokens into separate module Modified Paths: -------------- trunk/jsengine/tokenizer/__init__.py Added Paths: ----------- trunk/jsengine/tokenizer/tok.py Modified: trunk/jsengine/tokenizer/__init__.py =================================================================== --- trunk/jsengine/tokenizer/__init__.py 2013-10-09 20:33:12 UTC (rev 339) +++ trunk/jsengine/tokenizer/__init__.py 2013-10-09 21:34:59 UTC (rev 340) @@ -1,5 +1,6 @@ # vim: sw=4 ts=4 et from jsengine import JSSyntaxError +import tok _WHITESPACE = u'\u0020\t\u000B\u000C\u00A0\uFFFF' _LINETERMINATOR = u'\u000A\u000D\u2028\u2029' @@ -10,131 +11,9 @@ u'ABCDEFGHIJKLMNOPQRSTUVWXYZ' + \ u'$_' -_ALL_TOKENS = [] +_KEYWORDS = tok.getkeywords() +_PUNCTUATOR_TREE = tok.get_punctuator_tree() -class _Token(object): - def __init__(self, category, literal): - self._category = category - self._literal = literal - _ALL_TOKENS.append(self) - - def __repr__(self): - return '_Token(%r, %r)' % (self._category, self._literal) - - @property - def category(self): - return self._category - - @property - def literal(self): - return self._literal - -class _Tokens(object): - # Symbols - ASSIGN_ULSHIFT = _Token('sym', '<<<=') - ASSIGN_URSHIFT = _Token('sym', '>>>=') - EQ_STRICT = _Token('sym', '===') - NE_STRICT = _Token('sym', '!==') - URSHIFT = _Token('sym', '>>>') - ASSIGN_LSHIFT = _Token('sym', '<<=') - ASSIGN_RSHIFT = _Token('sym', '>>=') - LE = _Token('sym', '<=') - GE = _Token('sym', '>=') - EQ = _Token('sym', '==') - NE = _Token('sym', '!=') - INC = _Token('sym', '++') - DEC = _Token('sym', '--') - LSHIFT = _Token('sym', '<<') - RSHIFT = _Token('sym', '>>') - LOGICAL_AND = _Token('sym', '&&') - LOGICAL_OR = _Token('sym', '||') - ASSIGN_ADD = _Token('sym', '+=') - ASSIGN_SUB = _Token('sym', '-=') - ASSIGN_MUL = _Token('sym', '*=') - ASSIGN_MOD = _Token('sym', '%=') - ASSIGN_BIT_AND = _Token('sym', '&=') - ASSIGN_BIT_OR = _Token('sym', '|=') - ASSIGN_BIT_XOR = _Token('sym', '^=') - ASSIGN_DIV = _Token('sym', '/=') - LBRACE = _Token('sym', '{') - RBRACE = _Token('sym', '}') - LPAREN = _Token('sym', '(') - RPAREN = _Token('sym', ')') - LBRACKET = _Token('sym', '[') - RBRACKET = _Token('sym', ']') - DOT = _Token('sym', '.') - SEMI = _Token('sym', ';') - COMMA = _Token('sym', ',') - LT = _Token('sym', '<') - GT = _Token('sym', '>') - ADD = _Token('sym', '+') - SUB = _Token('sym', '-') - MUL = _Token('sym', '*') - MOD = _Token('sym', '%') - BIT_OR = _Token('sym', '|') - BIT_AND = _Token('sym', '&') - BIT_XOR = _Token('sym', '^') - LOGICAL_NOT = _Token('sym', '!') - BIT_NOT = _Token('sym', '~') - QUESTION = _Token('sym', '?') - COLON = _Token('sym', ':') - ASSIGN = _Token('sym', '=') - DIV = _Token('sym', '/') - - # Keywords - BREAK = _Token('kw', 'break') - CASE = _Token('kw', 'case') - CATCH = _Token('kw', 'catch') - CONTINUE = _Token('kw', 'continue') - DEFAULT = _Token('kw', 'default') - DELETE = _Token('kw', 'delete') - DO = _Token('kw', 'do') - ELSE = _Token('kw', 'else') - FALSE = _Token('kw', 'false') - FINALLY = _Token('kw', 'finally') - FOR = _Token('kw', 'for') - FUNCTION = _Token('kw', 'function') - IF = _Token('kw', 'if') - IN = _Token('kw', 'in') - INSTANCEOF = _Token('kw', 'instanceof') - NEW = _Token('kw', 'new') - NULL = _Token('kw', 'null') - RETURN = _Token('kw', 'return') - SWITCH = _Token('kw', 'switch') - THIS = _Token('kw', 'this') - THROW = _Token('kw', 'throw') - TRUE = _Token('kw', 'true') - TYPEOF = _Token('kw', 'typeof') - TRY = _Token('kw', 'try') - VAR = _Token('kw', 'var') - VOID = _Token('kw', 'void') - WHILE = _Token('kw', 'while') - WITH = _Token('kw', 'with') - - # Other tokens - C_COMMENT = _Token('other', '/*') - CPP_COMMENT = _Token('other', '//') - HTML_COMMENT = _Token('other', '<!--') - ERROR = _Token('other', 'err') - EOF = _Token('other', 'eof') - EOL = _Token('other', 'eol') - NAME = _Token('other', '(name)') - NUMBER = _Token('other', '(num)') - OPERATOR = _Token('other', '(op)') - REGEXP = _Token('other', '(re)') - SPACE = _Token('other', '(sp)') - STRING = _Token('other', '(str)') - -tok = _Tokens() -_KEYWORDS = dict((t.literal, t) for t in _ALL_TOKENS if t.category == 'kw') -_PUNCTUATOR_TREE = {} -for punctuator in (t for t in _ALL_TOKENS if t.category == 'sym'): - d = _PUNCTUATOR_TREE - for c in punctuator.literal: - d = d.setdefault(c, {}) - assert not None in d, punctuator.literal - d[None] = punctuator - class Token: def __init__(self, tok, atom=None): self.tok = tok @@ -282,7 +161,7 @@ peek.set_offset(start_offset, end_offset) self._peeked.append(peek) - assert isinstance(peek.tok, _Token), repr(peek.tok) + assert isinstance(peek.tok, tok.TokenType), repr(peek.tok) if peek.tok not in (tok.EOL, tok.SPACE, tok.C_COMMENT, tok.CPP_COMMENT, tok.HTML_COMMENT): Copied: trunk/jsengine/tokenizer/tok.py (from rev 339, trunk/jsengine/tokenizer/__init__.py) =================================================================== --- trunk/jsengine/tokenizer/tok.py (rev 0) +++ trunk/jsengine/tokenizer/tok.py 2013-10-09 21:34:59 UTC (rev 340) @@ -0,0 +1,128 @@ +# vim: sw=4 ts=4 et +_ALL_TOKENS = [] + +class TokenType(object): + def __init__(self, category, literal): + self._category = category + self._literal = literal + _ALL_TOKENS.append(self) + + def __repr__(self): + return 'TokenType(%r, %r)' % (self._category, self._literal) + + @property + def category(self): + return self._category + + @property + def literal(self): + return self._literal + +# Symbols +ASSIGN_ULSHIFT = TokenType('sym', '<<<=') +ASSIGN_URSHIFT = TokenType('sym', '>>>=') +EQ_STRICT = TokenType('sym', '===') +NE_STRICT = TokenType('sym', '!==') +URSHIFT = TokenType('sym', '>>>') +ASSIGN_LSHIFT = TokenType('sym', '<<=') +ASSIGN_RSHIFT = TokenType('sym', '>>=') +LE = TokenType('sym', '<=') +GE = TokenType('sym', '>=') +EQ = TokenType('sym', '==') +NE = TokenType('sym', '!=') +INC = TokenType('sym', '++') +DEC = TokenType('sym', '--') +LSHIFT = TokenType('sym', '<<') +RSHIFT = TokenType('sym', '>>') +LOGICAL_AND = TokenType('sym', '&&') +LOGICAL_OR = TokenType('sym', '||') +ASSIGN_ADD = TokenType('sym', '+=') +ASSIGN_SUB = TokenType('sym', '-=') +ASSIGN_MUL = TokenType('sym', '*=') +ASSIGN_MOD = TokenType('sym', '%=') +ASSIGN_BIT_AND = TokenType('sym', '&=') +ASSIGN_BIT_OR = TokenType('sym', '|=') +ASSIGN_BIT_XOR = TokenType('sym', '^=') +ASSIGN_DIV = TokenType('sym', '/=') +LBRACE = TokenType('sym', '{') +RBRACE = TokenType('sym', '}') +LPAREN = TokenType('sym', '(') +RPAREN = TokenType('sym', ')') +LBRACKET = TokenType('sym', '[') +RBRACKET = TokenType('sym', ']') +DOT = TokenType('sym', '.') +SEMI = TokenType('sym', ';') +COMMA = TokenType('sym', ',') +LT = TokenType('sym', '<') +GT = TokenType('sym', '>') +ADD = TokenType('sym', '+') +SUB = TokenType('sym', '-') +MUL = TokenType('sym', '*') +MOD = TokenType('sym', '%') +BIT_OR = TokenType('sym', '|') +BIT_AND = TokenType('sym', '&') +BIT_XOR = TokenType('sym', '^') +LOGICAL_NOT = TokenType('sym', '!') +BIT_NOT = TokenType('sym', '~') +QUESTION = TokenType('sym', '?') +COLON = TokenType('sym', ':') +ASSIGN = TokenType('sym', '=') +DIV = TokenType('sym', '/') + +# Keywords +BREAK = TokenType('kw', 'break') +CASE = TokenType('kw', 'case') +CATCH = TokenType('kw', 'catch') +CONTINUE = TokenType('kw', 'continue') +DEFAULT = TokenType('kw', 'default') +DELETE = TokenType('kw', 'delete') +DO = TokenType('kw', 'do') +ELSE = TokenType('kw', 'else') +FALSE = TokenType('kw', 'false') +FINALLY = TokenType('kw', 'finally') +FOR = TokenType('kw', 'for') +FUNCTION = TokenType('kw', 'function') +IF = TokenType('kw', 'if') +IN = TokenType('kw', 'in') +INSTANCEOF = TokenType('kw', 'instanceof') +NEW = TokenType('kw', 'new') +NULL = TokenType('kw', 'null') +RETURN = TokenType('kw', 'return') +SWITCH = TokenType('kw', 'switch') +THIS = TokenType('kw', 'this') +THROW = TokenType('kw', 'throw') +TRUE = TokenType('kw', 'true') +TYPEOF = TokenType('kw', 'typeof') +TRY = TokenType('kw', 'try') +VAR = TokenType('kw', 'var') +VOID = TokenType('kw', 'void') +WHILE = TokenType('kw', 'while') +WITH = TokenType('kw', 'with') + +# Other tokens +C_COMMENT = TokenType('other', '/*') +CPP_COMMENT = TokenType('other', '//') +HTML_COMMENT = TokenType('other', '<!--') +ERROR = TokenType('other', 'err') +EOF = TokenType('other', 'eof') +EOL = TokenType('other', 'eol') +NAME = TokenType('other', '(name)') +NUMBER = TokenType('other', '(num)') +OPERATOR = TokenType('other', '(op)') +REGEXP = TokenType('other', '(re)') +SPACE = TokenType('other', '(sp)') +STRING = TokenType('other', '(str)') + +def getkeywords(): + return dict((t.literal, t) for t in _ALL_TOKENS if t.category == 'kw') + +def get_punctuator_tree(): + tree = {} + for punctuator in (t for t in _ALL_TOKENS if t.category == 'sym'): + leaf = tree + for c in punctuator.literal: + leaf = leaf.setdefault(c, {}) + assert not None in leaf, punctuator.literal + leaf[None] = punctuator + return tree + This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |