[javascriptlint-commit] SF.net SVN: javascriptlint:[342] trunk/jsengine/tokenizer
Status: Beta
Brought to you by:
matthiasmiller
|
From: <mat...@us...> - 2013-10-09 22:41:50
|
Revision: 342
http://sourceforge.net/p/javascriptlint/code/342
Author: matthiasmiller
Date: 2013-10-09 22:41:46 +0000 (Wed, 09 Oct 2013)
Log Message:
-----------
Distinguish between characters and strings.
Modified Paths:
--------------
trunk/jsengine/tokenizer/__init__.py
trunk/jsengine/tokenizer/tok.py
Modified: trunk/jsengine/tokenizer/__init__.py
===================================================================
--- trunk/jsengine/tokenizer/__init__.py 2013-10-09 21:51:03 UTC (rev 341)
+++ trunk/jsengine/tokenizer/__init__.py 2013-10-09 22:41:46 UTC (rev 342)
@@ -14,6 +14,14 @@
_KEYWORDS = tok.getkeywords()
_PUNCTUATOR_TREE = tok.get_punctuator_tree()
+def _str_has_chr(s, c):
+ assert len(c) <= 1
+ return c in s
+
+def _chr_to_str(c):
+ assert len(c) <= 1
+ return c
+
class Token:
def __init__(self, tok, atom=None):
self.tok = tok
@@ -51,23 +59,35 @@
return self._offset >= len(self._content)
def readchr(self):
- if self._offset < len(self._content):
+ c = self.peekchr()
+ if not c:
+ raise JSSyntaxError(self.get_offset(), 'unexpected_eof')
+ self._offset += 1
+ return c
+
+ def readchrif(self, expect):
+ if self.peekchr() == expect:
self._offset += 1
- return self._content[self._offset - 1]
- raise JSSyntaxError(self.get_offset()-1, 'unexpected_eof')
+ return expect
+ return ''
- def readchrif(self, seq):
- s = self.peekchrif(seq)
+ def readchrin(self, seq):
+ s = self.peekchrin(seq)
if s:
- assert len(s) == 1
self._offset += 1
return s
- def peekchrif(self, seq):
- if self._offset < len(self._content) and \
- self._content[self._offset] in seq:
+ def peekchr(self):
+ if self._offset < len(self._content):
return self._content[self._offset]
+ return ''
+ def peekchrin(self, seq):
+ c = self.peekchr()
+ if c and _str_has_chr(seq, c):
+ return c
+ return ''
+
def readtextif(self, text):
""" Returns the string if found. Otherwise returns None.
"""
@@ -182,7 +202,7 @@
# TODO: Validate and save
while True:
- c = stream.readchrif(_IDENT)
+ c = stream.readchrin(_IDENT)
if not c:
break
@@ -194,15 +214,17 @@
if stream.eof():
return Token(tok.EOF)
+ stream.watch_reads()
+
c = stream.readchr()
# WHITESPACE
- if c in _WHITESPACE or c in _LINETERMINATOR:
- linebreak = c in _LINETERMINATOR
+ if _str_has_chr(_WHITESPACE, c) or _str_has_chr(_LINETERMINATOR, c):
+ linebreak = _str_has_chr(_LINETERMINATOR, c)
while True:
- if stream.readchrif(_LINETERMINATOR):
+ if stream.readchrin(_LINETERMINATOR):
linebreak = True
- elif stream.readchrif(_WHITESPACE):
+ elif stream.readchrin(_WHITESPACE):
pass
else:
break
@@ -213,24 +235,24 @@
# COMMENTS
if c == '/':
- if stream.peekchrif("/"):
- while not stream.eof() and not stream.peekchrif(_LINETERMINATOR):
+ if stream.peekchr() == '/':
+ while not stream.eof() and not stream.peekchrin(_LINETERMINATOR):
stream.readchr()
return Token(tok.CPP_COMMENT)
- if stream.peekchrif("*"):
+ if stream.peekchr() == '*':
linebreak = False
while True:
if stream.eof():
return Token(tok.ERROR, atom='unterminated_comment')
c = stream.readchr()
- if c in _LINETERMINATOR:
+ if _str_has_chr(_LINETERMINATOR, c):
linebreak = True
elif c == '*' and stream.readchrif('/'):
return Token(tok.C_COMMENT)
return Token(tok.EOF)
elif c == '<':
if stream.readtextif('!--'):
- while not stream.eof() and not stream.peekchrif(_LINETERMINATOR):
+ while not stream.eof() and not stream.peekchrin(_LINETERMINATOR):
stream.readchr()
return Token(tok.HTML_COMMENT)
@@ -245,66 +267,64 @@
c = stream.readchr()
elif c == quote:
return Token(tok.STRING, atom=s)
- s += c
+ s += _chr_to_str(c)
# NUMBERS
- if c in _DIGITS or (c == '.' and stream.peekchrif(_DIGITS)):
+ if _str_has_chr(_DIGITS, c) or (c == '.' and stream.peekchrin(_DIGITS)):
s = c # TODO
- stream.watch_reads()
- if c == '0' and stream.readchrif('xX'):
+ if c == '0' and stream.readchrin('xX'):
# Hex
- while stream.readchrif(_HEX_DIGITS):
+ while stream.readchrin(_HEX_DIGITS):
pass
- elif c == '0' and stream.readchrif(_DIGITS):
+ elif c == '0' and stream.readchrin(_DIGITS):
# Octal
- while stream.readchrif(_DIGITS):
+ while stream.readchrin(_DIGITS):
pass
else:
# Decimal
if c != '.':
- while stream.readchrif(_DIGITS):
+ while stream.readchrin(_DIGITS):
pass
stream.readchrif('.')
- while stream.readchrif(_DIGITS):
+ while stream.readchrin(_DIGITS):
pass
- if stream.readchrif('eE'):
- stream.readchrif('+-')
- if not stream.readchrif(_DIGITS):
+ if stream.readchrin('eE'):
+ stream.readchrin('+-')
+ if not stream.readchrin(_DIGITS):
raise JSSyntaxError(stream.get_offset(), 'syntax_error')
- while stream.readchrif(_DIGITS):
+ while stream.readchrin(_DIGITS):
pass
- if stream.peekchrif(_IDENT):
+ if stream.peekchrin(_IDENT):
return Token(tok.ERROR)
- atom = s + stream.get_watched_reads()
+ atom = stream.get_watched_reads()
return Token(tok.NUMBER, atom=atom)
if c in _PUNCTUATOR_TREE:
d = _PUNCTUATOR_TREE[c]
while True:
- c = stream.readchrif(list(d.keys()))
+ c = stream.readchrin(u''.join(d.keys()))
if c:
d = d[c]
else:
break
try:
- return Token(d[None])
+ return Token(d[''])
except KeyError:
print('oops')
raise JSSyntaxError(stream.get_offset(), 'syntax_error')
- if c in _IDENT:
- s = ''
- while c:
- s += c
- c = stream.readchrif(_IDENT + _DIGITS)
- if s in _KEYWORDS:
- return Token(_KEYWORDS[s], atom=s)
- elif s:
- return Token(tok.NAME, atom=s)
+ if _str_has_chr(_IDENT, c):
+ while stream.readchrin(_IDENT + _DIGITS):
+ pass
+ atom = stream.get_watched_reads()
+ if atom in _KEYWORDS:
+ return Token(_KEYWORDS[atom], atom=atom)
+ return Token(tok.NAME, atom=atom)
+
raise JSSyntaxError(stream.get_offset(), 'unexpected_char',
- { 'char': c })
+ { 'char': _chr_to_str(c) })
Modified: trunk/jsengine/tokenizer/tok.py
===================================================================
--- trunk/jsengine/tokenizer/tok.py 2013-10-09 21:51:03 UTC (rev 341)
+++ trunk/jsengine/tokenizer/tok.py 2013-10-09 22:41:46 UTC (rev 342)
@@ -123,6 +123,6 @@
for c in punctuator.literal:
leaf = leaf.setdefault(c, {})
assert not None in leaf, punctuator.literal
- leaf[None] = punctuator
+ leaf[''] = punctuator
return tree
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|