[javascriptlint-commit] SF.net SVN: javascriptlint:[340] trunk/jsengine/tokenizer
Status: Beta
Brought to you by:
matthiasmiller
|
From: <mat...@us...> - 2013-10-09 21:35:02
|
Revision: 340
http://sourceforge.net/p/javascriptlint/code/340
Author: matthiasmiller
Date: 2013-10-09 21:34:59 +0000 (Wed, 09 Oct 2013)
Log Message:
-----------
Move tokens into separate module
Modified Paths:
--------------
trunk/jsengine/tokenizer/__init__.py
Added Paths:
-----------
trunk/jsengine/tokenizer/tok.py
Modified: trunk/jsengine/tokenizer/__init__.py
===================================================================
--- trunk/jsengine/tokenizer/__init__.py 2013-10-09 20:33:12 UTC (rev 339)
+++ trunk/jsengine/tokenizer/__init__.py 2013-10-09 21:34:59 UTC (rev 340)
@@ -1,5 +1,6 @@
# vim: sw=4 ts=4 et
from jsengine import JSSyntaxError
+import tok
_WHITESPACE = u'\u0020\t\u000B\u000C\u00A0\uFFFF'
_LINETERMINATOR = u'\u000A\u000D\u2028\u2029'
@@ -10,131 +11,9 @@
u'ABCDEFGHIJKLMNOPQRSTUVWXYZ' + \
u'$_'
-_ALL_TOKENS = []
+_KEYWORDS = tok.getkeywords()
+_PUNCTUATOR_TREE = tok.get_punctuator_tree()
-class _Token(object):
- def __init__(self, category, literal):
- self._category = category
- self._literal = literal
- _ALL_TOKENS.append(self)
-
- def __repr__(self):
- return '_Token(%r, %r)' % (self._category, self._literal)
-
- @property
- def category(self):
- return self._category
-
- @property
- def literal(self):
- return self._literal
-
-class _Tokens(object):
- # Symbols
- ASSIGN_ULSHIFT = _Token('sym', '<<<=')
- ASSIGN_URSHIFT = _Token('sym', '>>>=')
- EQ_STRICT = _Token('sym', '===')
- NE_STRICT = _Token('sym', '!==')
- URSHIFT = _Token('sym', '>>>')
- ASSIGN_LSHIFT = _Token('sym', '<<=')
- ASSIGN_RSHIFT = _Token('sym', '>>=')
- LE = _Token('sym', '<=')
- GE = _Token('sym', '>=')
- EQ = _Token('sym', '==')
- NE = _Token('sym', '!=')
- INC = _Token('sym', '++')
- DEC = _Token('sym', '--')
- LSHIFT = _Token('sym', '<<')
- RSHIFT = _Token('sym', '>>')
- LOGICAL_AND = _Token('sym', '&&')
- LOGICAL_OR = _Token('sym', '||')
- ASSIGN_ADD = _Token('sym', '+=')
- ASSIGN_SUB = _Token('sym', '-=')
- ASSIGN_MUL = _Token('sym', '*=')
- ASSIGN_MOD = _Token('sym', '%=')
- ASSIGN_BIT_AND = _Token('sym', '&=')
- ASSIGN_BIT_OR = _Token('sym', '|=')
- ASSIGN_BIT_XOR = _Token('sym', '^=')
- ASSIGN_DIV = _Token('sym', '/=')
- LBRACE = _Token('sym', '{')
- RBRACE = _Token('sym', '}')
- LPAREN = _Token('sym', '(')
- RPAREN = _Token('sym', ')')
- LBRACKET = _Token('sym', '[')
- RBRACKET = _Token('sym', ']')
- DOT = _Token('sym', '.')
- SEMI = _Token('sym', ';')
- COMMA = _Token('sym', ',')
- LT = _Token('sym', '<')
- GT = _Token('sym', '>')
- ADD = _Token('sym', '+')
- SUB = _Token('sym', '-')
- MUL = _Token('sym', '*')
- MOD = _Token('sym', '%')
- BIT_OR = _Token('sym', '|')
- BIT_AND = _Token('sym', '&')
- BIT_XOR = _Token('sym', '^')
- LOGICAL_NOT = _Token('sym', '!')
- BIT_NOT = _Token('sym', '~')
- QUESTION = _Token('sym', '?')
- COLON = _Token('sym', ':')
- ASSIGN = _Token('sym', '=')
- DIV = _Token('sym', '/')
-
- # Keywords
- BREAK = _Token('kw', 'break')
- CASE = _Token('kw', 'case')
- CATCH = _Token('kw', 'catch')
- CONTINUE = _Token('kw', 'continue')
- DEFAULT = _Token('kw', 'default')
- DELETE = _Token('kw', 'delete')
- DO = _Token('kw', 'do')
- ELSE = _Token('kw', 'else')
- FALSE = _Token('kw', 'false')
- FINALLY = _Token('kw', 'finally')
- FOR = _Token('kw', 'for')
- FUNCTION = _Token('kw', 'function')
- IF = _Token('kw', 'if')
- IN = _Token('kw', 'in')
- INSTANCEOF = _Token('kw', 'instanceof')
- NEW = _Token('kw', 'new')
- NULL = _Token('kw', 'null')
- RETURN = _Token('kw', 'return')
- SWITCH = _Token('kw', 'switch')
- THIS = _Token('kw', 'this')
- THROW = _Token('kw', 'throw')
- TRUE = _Token('kw', 'true')
- TYPEOF = _Token('kw', 'typeof')
- TRY = _Token('kw', 'try')
- VAR = _Token('kw', 'var')
- VOID = _Token('kw', 'void')
- WHILE = _Token('kw', 'while')
- WITH = _Token('kw', 'with')
-
- # Other tokens
- C_COMMENT = _Token('other', '/*')
- CPP_COMMENT = _Token('other', '//')
- HTML_COMMENT = _Token('other', '<!--')
- ERROR = _Token('other', 'err')
- EOF = _Token('other', 'eof')
- EOL = _Token('other', 'eol')
- NAME = _Token('other', '(name)')
- NUMBER = _Token('other', '(num)')
- OPERATOR = _Token('other', '(op)')
- REGEXP = _Token('other', '(re)')
- SPACE = _Token('other', '(sp)')
- STRING = _Token('other', '(str)')
-
-tok = _Tokens()
-_KEYWORDS = dict((t.literal, t) for t in _ALL_TOKENS if t.category == 'kw')
-_PUNCTUATOR_TREE = {}
-for punctuator in (t for t in _ALL_TOKENS if t.category == 'sym'):
- d = _PUNCTUATOR_TREE
- for c in punctuator.literal:
- d = d.setdefault(c, {})
- assert not None in d, punctuator.literal
- d[None] = punctuator
-
class Token:
def __init__(self, tok, atom=None):
self.tok = tok
@@ -282,7 +161,7 @@
peek.set_offset(start_offset, end_offset)
self._peeked.append(peek)
- assert isinstance(peek.tok, _Token), repr(peek.tok)
+ assert isinstance(peek.tok, tok.TokenType), repr(peek.tok)
if peek.tok not in (tok.EOL, tok.SPACE,
tok.C_COMMENT, tok.CPP_COMMENT,
tok.HTML_COMMENT):
Copied: trunk/jsengine/tokenizer/tok.py (from rev 339, trunk/jsengine/tokenizer/__init__.py)
===================================================================
--- trunk/jsengine/tokenizer/tok.py (rev 0)
+++ trunk/jsengine/tokenizer/tok.py 2013-10-09 21:34:59 UTC (rev 340)
@@ -0,0 +1,128 @@
+# vim: sw=4 ts=4 et
+_ALL_TOKENS = []
+
+class TokenType(object):
+ def __init__(self, category, literal):
+ self._category = category
+ self._literal = literal
+ _ALL_TOKENS.append(self)
+
+ def __repr__(self):
+ return 'TokenType(%r, %r)' % (self._category, self._literal)
+
+ @property
+ def category(self):
+ return self._category
+
+ @property
+ def literal(self):
+ return self._literal
+
+# Symbols
+ASSIGN_ULSHIFT = TokenType('sym', '<<<=')
+ASSIGN_URSHIFT = TokenType('sym', '>>>=')
+EQ_STRICT = TokenType('sym', '===')
+NE_STRICT = TokenType('sym', '!==')
+URSHIFT = TokenType('sym', '>>>')
+ASSIGN_LSHIFT = TokenType('sym', '<<=')
+ASSIGN_RSHIFT = TokenType('sym', '>>=')
+LE = TokenType('sym', '<=')
+GE = TokenType('sym', '>=')
+EQ = TokenType('sym', '==')
+NE = TokenType('sym', '!=')
+INC = TokenType('sym', '++')
+DEC = TokenType('sym', '--')
+LSHIFT = TokenType('sym', '<<')
+RSHIFT = TokenType('sym', '>>')
+LOGICAL_AND = TokenType('sym', '&&')
+LOGICAL_OR = TokenType('sym', '||')
+ASSIGN_ADD = TokenType('sym', '+=')
+ASSIGN_SUB = TokenType('sym', '-=')
+ASSIGN_MUL = TokenType('sym', '*=')
+ASSIGN_MOD = TokenType('sym', '%=')
+ASSIGN_BIT_AND = TokenType('sym', '&=')
+ASSIGN_BIT_OR = TokenType('sym', '|=')
+ASSIGN_BIT_XOR = TokenType('sym', '^=')
+ASSIGN_DIV = TokenType('sym', '/=')
+LBRACE = TokenType('sym', '{')
+RBRACE = TokenType('sym', '}')
+LPAREN = TokenType('sym', '(')
+RPAREN = TokenType('sym', ')')
+LBRACKET = TokenType('sym', '[')
+RBRACKET = TokenType('sym', ']')
+DOT = TokenType('sym', '.')
+SEMI = TokenType('sym', ';')
+COMMA = TokenType('sym', ',')
+LT = TokenType('sym', '<')
+GT = TokenType('sym', '>')
+ADD = TokenType('sym', '+')
+SUB = TokenType('sym', '-')
+MUL = TokenType('sym', '*')
+MOD = TokenType('sym', '%')
+BIT_OR = TokenType('sym', '|')
+BIT_AND = TokenType('sym', '&')
+BIT_XOR = TokenType('sym', '^')
+LOGICAL_NOT = TokenType('sym', '!')
+BIT_NOT = TokenType('sym', '~')
+QUESTION = TokenType('sym', '?')
+COLON = TokenType('sym', ':')
+ASSIGN = TokenType('sym', '=')
+DIV = TokenType('sym', '/')
+
+# Keywords
+BREAK = TokenType('kw', 'break')
+CASE = TokenType('kw', 'case')
+CATCH = TokenType('kw', 'catch')
+CONTINUE = TokenType('kw', 'continue')
+DEFAULT = TokenType('kw', 'default')
+DELETE = TokenType('kw', 'delete')
+DO = TokenType('kw', 'do')
+ELSE = TokenType('kw', 'else')
+FALSE = TokenType('kw', 'false')
+FINALLY = TokenType('kw', 'finally')
+FOR = TokenType('kw', 'for')
+FUNCTION = TokenType('kw', 'function')
+IF = TokenType('kw', 'if')
+IN = TokenType('kw', 'in')
+INSTANCEOF = TokenType('kw', 'instanceof')
+NEW = TokenType('kw', 'new')
+NULL = TokenType('kw', 'null')
+RETURN = TokenType('kw', 'return')
+SWITCH = TokenType('kw', 'switch')
+THIS = TokenType('kw', 'this')
+THROW = TokenType('kw', 'throw')
+TRUE = TokenType('kw', 'true')
+TYPEOF = TokenType('kw', 'typeof')
+TRY = TokenType('kw', 'try')
+VAR = TokenType('kw', 'var')
+VOID = TokenType('kw', 'void')
+WHILE = TokenType('kw', 'while')
+WITH = TokenType('kw', 'with')
+
+# Other tokens
+C_COMMENT = TokenType('other', '/*')
+CPP_COMMENT = TokenType('other', '//')
+HTML_COMMENT = TokenType('other', '<!--')
+ERROR = TokenType('other', 'err')
+EOF = TokenType('other', 'eof')
+EOL = TokenType('other', 'eol')
+NAME = TokenType('other', '(name)')
+NUMBER = TokenType('other', '(num)')
+OPERATOR = TokenType('other', '(op)')
+REGEXP = TokenType('other', '(re)')
+SPACE = TokenType('other', '(sp)')
+STRING = TokenType('other', '(str)')
+
+def getkeywords():
+ return dict((t.literal, t) for t in _ALL_TOKENS if t.category == 'kw')
+
+def get_punctuator_tree():
+ tree = {}
+ for punctuator in (t for t in _ALL_TOKENS if t.category == 'sym'):
+ leaf = tree
+ for c in punctuator.literal:
+ leaf = leaf.setdefault(c, {})
+ assert not None in leaf, punctuator.literal
+ leaf[None] = punctuator
+ return tree
+
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|