[javascriptlint-commit] SF.net SVN: javascriptlint:[343] trunk/jsengine/tokenizer
Status: Beta
Brought to you by:
matthiasmiller
From: <mat...@us...> - 2013-10-10 14:07:44
|
Revision: 343 http://sourceforge.net/p/javascriptlint/code/343 Author: matthiasmiller Date: 2013-10-10 14:07:41 +0000 (Thu, 10 Oct 2013) Log Message: ----------- Fully distinguish between chars and strings. Modified Paths: -------------- trunk/jsengine/tokenizer/__init__.py trunk/jsengine/tokenizer/tok.py Modified: trunk/jsengine/tokenizer/__init__.py =================================================================== --- trunk/jsengine/tokenizer/__init__.py 2013-10-09 22:41:46 UTC (rev 342) +++ trunk/jsengine/tokenizer/__init__.py 2013-10-10 14:07:41 UTC (rev 343) @@ -14,14 +14,43 @@ _KEYWORDS = tok.getkeywords() _PUNCTUATOR_TREE = tok.get_punctuator_tree() -def _str_has_chr(s, c): - assert len(c) <= 1 - return c in s +class _Char(object): + def __init__(self, u): + assert isinstance(u, int) or u is None, u + self._u = u -def _chr_to_str(c): - assert len(c) <= 1 - return c + @classmethod + def fromstr(cls, s, i): + return _Char(ord(s[i])) + @classmethod + def ord(cls, s): + return _Char(ord(s)) + + @property + def uval(self): + return self._u + + def tostr(self): + if self._u is None: + return unicode() + return unichr(self._u) + + def instr(self, s): + if self._u is None: + return False + return s.find(unichr(self._u)) != -1 + + def __hash__(self): + return hash(self._u) + + def __eq__(self, other): + assert isinstance(other, _Char), other + return self._u == other._u + + def __nonzero__(self): + return not self._u is None + class Token: def __init__(self, tok, atom=None): self.tok = tok @@ -69,7 +98,7 @@ if self.peekchr() == expect: self._offset += 1 return expect - return '' + return _Char(None) def readchrin(self, seq): s = self.peekchrin(seq) @@ -79,14 +108,14 @@ def peekchr(self): if self._offset < len(self._content): - return self._content[self._offset] - return '' + return _Char.fromstr(self._content, self._offset) + return _Char(None) def peekchrin(self, seq): c = self.peekchr() - if c and _str_has_chr(seq, c): + if c and c.instr(seq): return c - return '' + return _Char(None) def readtextif(self, text): """ Returns the string if found. Otherwise returns None. @@ -184,20 +213,20 @@ stream = self._stream while True: c = stream.readchr() - if c == '\\': + if c == _Char.ord('\\'): c = stream.readchr() - if c == '\n': + if c == _Char.ord('\n'): return Token(tok.ERROR) - elif c == '[': + elif c == _Char.ord('['): while True: c = stream.readchr() - if c == '\n': + if c == _Char.ord('\n'): return Token(tok.ERROR) - elif c == ']': + elif c == _Char.ord(']'): break - elif c == '\n': + elif c == _Char.ord('\n'): return Token(tok.ERROR) - elif c == '/': + elif c == _Char.ord('/'): break # TODO: Validate and save @@ -219,8 +248,8 @@ c = stream.readchr() # WHITESPACE - if _str_has_chr(_WHITESPACE, c) or _str_has_chr(_LINETERMINATOR, c): - linebreak = _str_has_chr(_LINETERMINATOR, c) + if c.instr(_WHITESPACE) or c.instr(_LINETERMINATOR): + linebreak = c.instr(_LINETERMINATOR) while True: if stream.readchrin(_LINETERMINATOR): linebreak = True @@ -234,49 +263,49 @@ return Token(tok.SPACE) # COMMENTS - if c == '/': - if stream.peekchr() == '/': + if c == _Char.ord('/'): + if stream.peekchr() == _Char.ord('/'): while not stream.eof() and not stream.peekchrin(_LINETERMINATOR): stream.readchr() return Token(tok.CPP_COMMENT) - if stream.peekchr() == '*': + if stream.peekchr() == _Char.ord('*'): linebreak = False while True: if stream.eof(): return Token(tok.ERROR, atom='unterminated_comment') c = stream.readchr() - if _str_has_chr(_LINETERMINATOR, c): + if c.instr(_LINETERMINATOR): linebreak = True - elif c == '*' and stream.readchrif('/'): + elif c == _Char.ord('*') and stream.readchrif(_Char.ord('/')): return Token(tok.C_COMMENT) return Token(tok.EOF) - elif c == '<': + elif c == _Char.ord('<'): if stream.readtextif('!--'): while not stream.eof() and not stream.peekchrin(_LINETERMINATOR): stream.readchr() return Token(tok.HTML_COMMENT) # STRING LITERALS - if c == '"' or c == "'": + if c == _Char.ord('"') or c == _Char.ord("'"): # TODO: Decode s = '' quote = c while True: c = stream.readchr() - if c == '\\': + if c == _Char.ord('\\'): c = stream.readchr() elif c == quote: return Token(tok.STRING, atom=s) - s += _chr_to_str(c) + s += c.tostr() # NUMBERS - if _str_has_chr(_DIGITS, c) or (c == '.' and stream.peekchrin(_DIGITS)): + if c.instr(_DIGITS) or (c == _Char.ord('.') and stream.peekchrin(_DIGITS)): s = c # TODO - if c == '0' and stream.readchrin('xX'): + if c == _Char.ord('0') and stream.readchrin('xX'): # Hex while stream.readchrin(_HEX_DIGITS): pass - elif c == '0' and stream.readchrin(_DIGITS): + elif c == _Char.ord('0') and stream.readchrin(_DIGITS): # Octal while stream.readchrin(_DIGITS): pass @@ -285,7 +314,7 @@ if c != '.': while stream.readchrin(_DIGITS): pass - stream.readchrif('.') + stream.readchrif(_Char.ord('.')) while stream.readchrin(_DIGITS): pass @@ -303,21 +332,22 @@ atom = stream.get_watched_reads() return Token(tok.NUMBER, atom=atom) - if c in _PUNCTUATOR_TREE: - d = _PUNCTUATOR_TREE[c] + if c.uval in _PUNCTUATOR_TREE: + d = _PUNCTUATOR_TREE[c.uval] while True: - c = stream.readchrin(u''.join(d.keys())) - if c: - d = d[c] + c = stream.peekchr() + if c and c.uval in d: + stream.readchr() + d = d[c.uval] else: break try: - return Token(d['']) + return Token(d[-1]) except KeyError: print('oops') raise JSSyntaxError(stream.get_offset(), 'syntax_error') - if _str_has_chr(_IDENT, c): + if c.instr(_IDENT): while stream.readchrin(_IDENT + _DIGITS): pass @@ -327,4 +357,4 @@ return Token(tok.NAME, atom=atom) raise JSSyntaxError(stream.get_offset(), 'unexpected_char', - { 'char': _chr_to_str(c) }) + { 'char': c.tostr() }) Modified: trunk/jsengine/tokenizer/tok.py =================================================================== --- trunk/jsengine/tokenizer/tok.py 2013-10-09 22:41:46 UTC (rev 342) +++ trunk/jsengine/tokenizer/tok.py 2013-10-10 14:07:41 UTC (rev 343) @@ -121,8 +121,8 @@ for punctuator in (t for t in _ALL_TOKENS if t.category == 'sym'): leaf = tree for c in punctuator.literal: - leaf = leaf.setdefault(c, {}) + leaf = leaf.setdefault(ord(c), {}) assert not None in leaf, punctuator.literal - leaf[''] = punctuator + leaf[-1] = punctuator return tree This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |