[javascriptlint-commit] SF.net SVN: javascriptlint:[304] trunk
Status: Beta
Brought to you by:
matthiasmiller
|
From: <mat...@us...> - 2013-09-28 03:06:23
|
Revision: 304
http://sourceforge.net/p/javascriptlint/code/304
Author: matthiasmiller
Date: 2013-09-28 03:06:19 +0000 (Sat, 28 Sep 2013)
Log Message:
-----------
Replace SpiderMonkey with a JavaScript parser written purely in JavaScript.
Modified Paths:
--------------
trunk/DEVELOPMENT
trunk/INSTALL
trunk/javascriptlint/jsparse.py
trunk/javascriptlint/lint.py
trunk/javascriptlint/warnings.py
trunk/setup.py
trunk/test.py
trunk/tests/control_comments/conf-version.js
trunk/tests/html/e4x.html
trunk/tests/html/script_tag_in_js_comment.html
trunk/tests/warnings/identifier_hides_another.js
trunk/tests/warnings/spidermonkey/bad_backref.js
trunk/tests/warnings/spidermonkey/invalid_backref.js
trunk/tests/warnings/want_assign_or_call.js
Added Paths:
-----------
trunk/jsengine/
trunk/jsengine/__init__.py
trunk/jsengine/parser/
trunk/jsengine/parser/__init__.py
trunk/jsengine/parser/_constants_kind.py
trunk/jsengine/parser/_constants_op.py
trunk/jsengine/structs.py
trunk/jsengine/tokenizer/
trunk/jsengine/tokenizer/__init__.py
Removed Paths:
-------------
trunk/Makefile.SpiderMonkey
trunk/javascriptlint/pyspidermonkey/
trunk/javascriptlint/pyspidermonkey_/
trunk/javascriptlint/spidermonkey.py
trunk/spidermonkey/
trunk/tests/warnings/spidermonkey/deprecated_usage.js
Modified: trunk/DEVELOPMENT
===================================================================
--- trunk/DEVELOPMENT 2011-12-02 18:49:10 UTC (rev 303)
+++ trunk/DEVELOPMENT 2013-09-28 03:06:19 UTC (rev 304)
@@ -15,17 +15,3 @@
> add test for syntax error
> consider reimplementing abiguous_newline
-
-** UPGRADING SPIDERMONKEY
-
-Use the following command to upgrade SpiderMonkey. Replace X.X.X with the
-version number. js-X.X.X is the directory containing the new version of
-SpiderMonkey. Use a relative path for pretty commit messages.
-
-svn_load_dirs.pl \
- -t X.X.X \
- -p svn_load_dirs.conf \
- https://javascriptlint.svn.sourceforge.net/svnroot/javascriptlint/vendorsrc/Mozilla.org/js \
- current \
- js-X.X.X
-
Modified: trunk/INSTALL
===================================================================
--- trunk/INSTALL 2011-12-02 18:49:10 UTC (rev 303)
+++ trunk/INSTALL 2013-09-28 03:06:19 UTC (rev 304)
@@ -1,14 +1,4 @@
-BUILDING FROM THE SUBVERSION TRUNK
-* Windows Prequisites:
- * Visual Studio 2008 Express
- * Python 2.6
- * py2exe
- * MozillaBuild (http://developer.mozilla.org/en/docs/Windows_Build_Prerequisites)
-
- Launch the MozillaBuild MSVC 9 batch file. (You may have to run this as an
- Administrator on Windows Vista.) Run the commands in that shell.
-
On all platforms:
$ python setup.py build
Deleted: trunk/Makefile.SpiderMonkey
===================================================================
--- trunk/Makefile.SpiderMonkey 2011-12-02 18:49:10 UTC (rev 303)
+++ trunk/Makefile.SpiderMonkey 2013-09-28 03:06:19 UTC (rev 304)
@@ -1,59 +0,0 @@
-## THIS IS AN INTERNAL MAKEFILE FOR setup.py
-## DO NOT RUN THIS MAKEFILE DIRECTLY.
-
-SPIDERMONKEY_SRC=spidermonkey/src
-
-# Load the SpiderMonkey config to find the OS define
-# Also use this for the SO_SUFFIX
-DEPTH=$(SPIDERMONKEY_SRC)
-include $(SPIDERMONKEY_SRC)/config.mk
-SPIDERMONKEY_OS=$(firstword $(patsubst -D%, %, $(filter -DXP_%, $(OS_CFLAGS))))
-
-ifdef USE_MSVC
-JS_LIB=js32.lib
-else
-JS_LIB=libjs.a
-endif
-
-BUILD_DIR=build/spidermonkey
-
-ORIG_LIB=$(SPIDERMONKEY_SRC)/$(OBJDIR)/$(JS_LIB)
-COPY_LIB=$(BUILD_DIR)/$(JS_LIB)
-ORIG_DLL=$(SPIDERMONKEY_SRC)/$(OBJDIR)/js32.dll
-COPY_DLL_DIR=$(DISTUTILS_DIR)/javascriptlint
-COPY_DLL_PATH=$(COPY_DLL_DIR)/js32.dll
-OS_HEADER=$(BUILD_DIR)/js_operating_system.h
-ORIG_JSAUTOCFG_H=$(SPIDERMONKEY_SRC)/$(OBJDIR)/jsautocfg.h
-COPY_JSAUTOCFG_H=$(BUILD_DIR)/jsautocfg.h
-
-ALL_TARGETS=$(COPY_LIB) $(OS_HEADER)
-ifndef PREBUILT_CPUCFG
-ALL_TARGETS+=$(COPY_JSAUTOCFG_H)
-endif
-
-ifeq ($(SPIDERMONKEY_OS),XP_WIN)
-ALL_TARGETS+=$(COPY_DLL_PATH)
-endif
-
-all: $(ALL_TARGETS)
-
-clean:
- rm -f $(ORIG_LIB)
- rm -Rf $(BUILD_DIR)
-
-$(BUILD_DIR):
- mkdir -p $(BUILD_DIR)
-
-$(COPY_LIB): $(BUILD_DIR) $(ORIG_LIB)
- cp $(ORIG_LIB) $(COPY_LIB)
-
-$(COPY_DLL_PATH): $(ORIG_DLL)
- mkdir -p $(COPY_DLL_DIR)
- cp $(ORIG_DLL) $(COPY_DLL_PATH)
-
-$(OS_HEADER): $(BUILD_DIR)
- echo "#define $(SPIDERMONKEY_OS)" > $(OS_HEADER)
-
-$(COPY_JSAUTOCFG_H): $(ORIG_JSAUTOCFG_H)
- cp $(ORIG_JSAUTOCFG_H) $(COPY_JSAUTOCFG_H)
-
Modified: trunk/javascriptlint/jsparse.py
===================================================================
--- trunk/javascriptlint/jsparse.py 2011-12-02 18:49:10 UTC (rev 303)
+++ trunk/javascriptlint/jsparse.py 2013-09-28 03:06:19 UTC (rev 304)
@@ -1,155 +1,20 @@
#!/usr/bin/python
# vim: ts=4 sw=4 expandtab
""" Parses a script into nodes. """
-import bisect
import re
import unittest
-import spidermonkey
-from spidermonkey import tok, op
-from util import JSVersion
+import jsengine.parser
+from jsengine.parser import kind as tok
+from jsengine.parser import op
+from jsengine.structs import *
-_tok_names = dict(zip(
- [getattr(tok, prop) for prop in dir(tok)],
- ['tok.%s' % prop for prop in dir(tok)]
-))
-_op_names = dict(zip(
- [getattr(op, prop) for prop in dir(op)],
- ['op.%s' % prop for prop in dir(op)]
-))
+from .util import JSVersion
-NodePos = spidermonkey.NodePos
-
-class NodePositions:
- " Given a string, allows [x] lookups for NodePos line and column numbers."
- def __init__(self, text, start_pos=None):
- # Find the length of each line and incrementally sum all of the lengths
- # to determine the ending position of each line.
- self._start_pos = start_pos
- self._lines = text.splitlines(True)
- lines = [0] + [len(x) for x in self._lines]
- for x in range(1, len(lines)):
- lines[x] += lines[x-1]
- self._line_offsets = lines
- def from_offset(self, offset):
- line = bisect.bisect(self._line_offsets, offset)-1
- col = offset - self._line_offsets[line]
- if self._start_pos:
- if line == 0:
- col += self._start_pos.col
- line += self._start_pos.line
- return NodePos(line, col)
- def to_offset(self, pos):
- pos = self._to_rel_pos(pos)
- offset = self._line_offsets[pos.line] + pos.col
- assert offset <= self._line_offsets[pos.line+1] # out-of-bounds col num
- return offset
- def text(self, start, end):
- assert start <= end
- start, end = self._to_rel_pos(start), self._to_rel_pos(end)
- # Trim the ending first in case it's a single line.
- lines = self._lines[start.line:end.line+1]
- lines[-1] = lines[-1][:end.col+1]
- lines[0] = lines[0][start.col:]
- return ''.join(lines)
- def _to_rel_pos(self, pos):
- " converts a position to a position relative to self._start_pos "
- if not self._start_pos:
- return pos
- line, col = pos.line, pos.col
- line -= self._start_pos.line
- if line == 0:
- col -= self._start_pos.col
- assert line >= 0 and col >= 0 # out-of-bounds node position
- return NodePos(line, col)
-
-class NodeRanges:
- def __init__(self):
- self._offsets = []
- def add(self, start, end):
- i = bisect.bisect_left(self._offsets, start)
- if i % 2 == 1:
- i -= 1
- start = self._offsets[i]
-
- end = end + 1
- j = bisect.bisect_left(self._offsets, end)
- if j % 2 == 1:
- end = self._offsets[j]
- j += 1
-
- self._offsets[i:j] = [start,end]
- def has(self, pos):
- return bisect.bisect_right(self._offsets, pos) % 2 == 1
-
-class _Node:
- def add_child(self, node):
- if node:
- node.node_index = len(self.kids)
- node.parent = self
- self.kids.append(node)
-
- def start_pos(self):
- try:
- return self._start_pos
- except AttributeError:
- self._start_pos = NodePos(self._start_line, self._start_col)
- return self._start_pos
-
- def end_pos(self):
- try:
- return self._end_pos
- except AttributeError:
- self._end_pos = NodePos(self._end_line, self._end_col)
- return self._end_pos
-
- def __str__(self):
- kind = self.kind
- if not kind:
- kind = '(none)'
- return '%s>%s' % (_tok_names[kind], str(self.kids))
-
- def is_equivalent(self, other, are_functions_equiv=False):
- if not other:
- return False
-
- # Bail out for functions
- if not are_functions_equiv:
- if self.kind == tok.FUNCTION:
- return False
- if self.kind == tok.LP and self.opcode == op.CALL:
- return False
-
- if self.kind != other.kind:
- return False
- if self.opcode != other.opcode:
- return False
-
- # Check atoms on names, properties, and string constants
- if self.kind in (tok.NAME, tok.DOT, tok.STRING) and self.atom != other.atom:
- return False
-
- # Check values on numbers
- if self.kind == tok.NUMBER and self.dval != other.dval:
- return False
-
- # Compare child nodes
- if len(self.kids) != len(other.kids):
- return False
- for i in range(0, len(self.kids)):
- # Watch for dead nodes
- if not self.kids[i]:
- if not other.kids[i]: return True
- else: return False
- if not self.kids[i].is_equivalent(other.kids[i]):
- return False
-
- return True
-
def isvalidversion(jsversion):
if jsversion is None:
return True
- return spidermonkey.is_valid_version(jsversion.version)
+ return jsengine.parser.is_valid_version(jsversion.version)
def findpossiblecomments(script, node_positions):
pos = 0
@@ -168,31 +33,18 @@
comment_text = script[match.start():match.end()]
if comment_text.startswith('/*'):
comment_text = comment_text[2:-2]
- opcode = 'JSOP_C_COMMENT'
+ opcode = op.C_COMMENT
else:
comment_text = comment_text[2:]
- opcode = 'JSOP_CPP_COMMENT'
- opcode = opcode[5:].lower()
+ opcode = op.CPP_COMMENT
start_offset = match.start()
end_offset = match.end()-1
start_pos = node_positions.from_offset(start_offset)
end_pos = node_positions.from_offset(end_offset)
- kwargs = {
- 'kind': 'COMMENT',
- 'atom': comment_text,
- 'opcode': opcode,
- '_start_line': start_pos.line,
- '_start_col': start_pos.col,
- '_end_line': end_pos.line,
- '_end_col': end_pos.col,
- 'parent': None,
- 'kids': [],
- 'node_index': None
- }
- comment_node = _Node()
- comment_node.__dict__.update(kwargs)
+ comment_node = ParseNode(kind.COMMENT, opcode, start_pos, end_pos,
+ comment_text, [])
comments.append(comment_node)
# Start searching immediately after the start of the comment in case
@@ -203,28 +55,23 @@
""" All node positions will be relative to startpos. This allows scripts
to be embedded in a file (for example, HTML).
"""
- def _wrapped_callback(line, col, msg):
- assert msg.startswith('JSMSG_')
- msg = msg[6:].lower()
- error_callback(line, col, msg)
-
startpos = startpos or NodePos(0,0)
jsversion = jsversion or JSVersion.default()
- assert isvalidversion(jsversion)
- return spidermonkey.parse(script, jsversion.version, jsversion.e4x,
- _Node, _wrapped_callback,
- startpos.line, startpos.col)
+ assert isvalidversion(jsversion), jsversion
+ if jsversion.e4x:
+ error_callback(startpos.line, startpos.col, 'e4x_deprecated', {})
+ return jsengine.parser.parse(script, jsversion.version,
+ error_callback,
+ startpos)
def filtercomments(possible_comments, node_positions, root_node):
comment_ignore_ranges = NodeRanges()
def process(node):
- if node.kind == tok.NUMBER:
- node.atom = node_positions.text(node.start_pos(), node.end_pos())
- elif node.kind == tok.STRING or \
+ if node.kind == tok.STRING or \
(node.kind == tok.OBJECT and node.opcode == op.REGEXP):
start_offset = node_positions.to_offset(node.start_pos())
- end_offset = node_positions.to_offset(node.end_pos()) - 1
+ end_offset = node_positions.to_offset(node.end_pos())
comment_ignore_ranges.add(start_offset, end_offset)
for kid in node.kids:
if kid:
@@ -249,7 +96,7 @@
def is_compilable_unit(script, jsversion):
jsversion = jsversion or JSVersion.default()
assert isvalidversion(jsversion)
- return spidermonkey.is_compilable_unit(script, jsversion.version, jsversion.e4x)
+ return jsengine.parser.is_compilable_unit(script, jsversion.version)
def _dump_node(node, depth=0):
if node is None:
@@ -258,7 +105,7 @@
print
else:
print ' '*depth,
- print '%s, %s' % (_tok_names[node.kind], _op_names[node.opcode])
+ print '%s, %s' % (repr(node.kind), repr(node.opcode))
print ' '*depth,
print '%s - %s' % (node.start_pos(), node.end_pos())
if hasattr(node, 'atom'):
@@ -379,22 +226,21 @@
for text, expected in tests:
encountered = is_compilable_unit(text, JSVersion.default())
self.assertEquals(encountered, expected)
- # NOTE: This seems like a bug.
- self.assert_(is_compilable_unit("/* test", JSVersion.default()))
+ self.assert_(not is_compilable_unit("/* test", JSVersion.default()))
class TestLineOffset(unittest.TestCase):
def testErrorPos(self):
def geterror(script, startpos):
errors = []
- def onerror(line, col, msg):
- errors.append((line, col, msg))
+ def onerror(line, col, msg, msg_args):
+ errors.append((line, col, msg, msg_args))
parse(script, None, onerror, startpos)
self.assertEquals(len(errors), 1)
return errors[0]
- self.assertEquals(geterror(' ?', None), (0, 1, 'syntax_error'))
- self.assertEquals(geterror('\n ?', None), (1, 1, 'syntax_error'))
- self.assertEquals(geterror(' ?', NodePos(1,1)), (1, 2, 'syntax_error'))
- self.assertEquals(geterror('\n ?', NodePos(1,1)), (2, 1, 'syntax_error'))
+ self.assertEquals(geterror(' ?', None), (0, 1, 'syntax_error', {}))
+ self.assertEquals(geterror('\n ?', None), (1, 1, 'syntax_error', {}))
+ self.assertEquals(geterror(' ?', NodePos(1,1)), (1, 2, 'syntax_error', {}))
+ self.assertEquals(geterror('\n ?', NodePos(1,1)), (2, 1, 'syntax_error', {}))
def testNodePos(self):
def getnodepos(script, startpos):
root = parse(script, None, None, startpos)
Modified: trunk/javascriptlint/lint.py
===================================================================
--- trunk/javascriptlint/lint.py 2011-12-02 18:49:10 UTC (rev 303)
+++ trunk/javascriptlint/lint.py 2013-09-28 03:06:19 UTC (rev 304)
@@ -12,7 +12,8 @@
import unittest
import util
-from spidermonkey import tok, op
+from jsengine.parser import kind as tok
+from jsengine.parser import op
_newline_kinds = (
'eof', 'comma', 'dot', 'semi', 'colon', 'lc', 'rc', 'lp', 'rb', 'assign',
@@ -96,6 +97,7 @@
def add_declaration(self, name, node, type_):
assert type_ in ('arg', 'function', 'var'), \
'Unrecognized identifier type: %s' % type_
+ assert isinstance(name, basestring)
self._identifiers[name] = {
'node': node,
'type': type_
@@ -339,10 +341,10 @@
def _lint_script_part(scriptpos, jsversion, script, script_cache, conf,
ignores, report_native, report_lint, import_callback):
- def parse_error(row, col, msg):
+ def parse_error(row, col, msg, msg_args):
if not msg in ('anon_no_return_value', 'no_return_value',
'redeclared_var', 'var_hides_arg'):
- parse_errors.append((jsparse.NodePos(row, col), msg))
+ parse_errors.append((jsparse.NodePos(row, col), msg, msg_args))
def report(node, errname, pos=None, **errargs):
if errname == 'empty_statement' and node.kind == tok.LC:
@@ -411,8 +413,8 @@
root = jsparse.parse(script, jsversion, parse_error, scriptpos)
if not root:
# Report errors and quit.
- for pos, msg in parse_errors:
- report_native(pos, msg)
+ for pos, msg, msg_args in parse_errors:
+ report_native(pos, msg, msg_args)
return
comments = jsparse.filtercomments(possible_comments, node_positions, root)
@@ -457,7 +459,7 @@
elif keyword == 'pass':
passes.append(node)
else:
- if comment.opcode == 'c_comment':
+ if comment.opcode == op.C_COMMENT:
# Look for nested C-style comments.
nested_comment = comment.atom.find('/*')
if nested_comment < 0 and comment.atom.endswith('/'):
@@ -514,9 +516,9 @@
errdesc = warnings.format_error(errname, **errargs)
_report(pos or node.start_pos(), errname, errdesc, True)
- def report_native(pos, errname):
- # TODO: Format the error.
- _report(pos, errname, errname, False)
+ def report_native(pos, errname, errargs):
+ errdesc = warnings.format_error(errname, **errargs)
+ _report(pos, errname, errdesc, False)
def _report(pos, errname, errdesc, require_key):
try:
@@ -581,7 +583,7 @@
if other and parent_scope == scope:
# Only warn about duplications in this scope.
# Other scopes will be checked later.
- if other.kind == tok.FUNCTION and name in other.fn_args:
+ if other.kind == tok.NAME and other.opcode == op.ARGNAME:
report(node, 'var_hides_arg', name=name)
else:
report(node, 'redeclared_var', name=name)
@@ -612,7 +614,9 @@
_warn_or_declare(scopes[-1], node.fn_name, 'function', node, report)
self._push_scope(node)
for var_name in node.fn_args:
- scopes[-1].add_declaration(var_name, node, 'arg')
+ if scopes[-1].get_identifier(var_name.atom):
+ report(var_name, 'duplicate_formal', name=var_name.atom)
+ scopes[-1].add_declaration(var_name.atom, var_name, 'arg')
@visitation.visit('push', tok.LEXICALSCOPE, tok.WITH)
def _push_scope(self, node):
Deleted: trunk/javascriptlint/spidermonkey.py
===================================================================
--- trunk/javascriptlint/spidermonkey.py 2011-12-02 18:49:10 UTC (rev 303)
+++ trunk/javascriptlint/spidermonkey.py 2013-09-28 03:06:19 UTC (rev 304)
@@ -1,10 +0,0 @@
-# vim: ts=4 sw=4 expandtab
-
-# This is a wrapper script to make it easier for development. It tries to
-# import the development version first, and if that fails, it goes after the
-# real version.
-try:
- from pyspidermonkey_ import *
-except ImportError:
- from pyspidermonkey import *
-
Modified: trunk/javascriptlint/warnings.py
===================================================================
--- trunk/javascriptlint/warnings.py 2011-12-02 18:49:10 UTC (rev 303)
+++ trunk/javascriptlint/warnings.py 2013-09-28 03:06:19 UTC (rev 304)
@@ -21,9 +21,10 @@
import util
import visitation
-from spidermonkey import tok, op
+from jsengine.parser import kind as tok
+from jsengine.parser import op
-_ALL_TOKENS = tuple(filter(lambda x: x != tok.EOF, tok.__dict__.values()))
+_ALL_TOKENS = tok.__dict__.values()
def _get_assigned_lambda(node):
""" Given a node "x = function() {}", returns "function() {}".
@@ -53,6 +54,7 @@
'use_of_label': 'use of label',
'misplaced_regex': 'regular expressions should be preceded by a left parenthesis, assignment, colon, or comma',
'assign_to_function_call': 'assignment to a function call',
+ 'equal_as_assign': 'test for equality (==) mistyped as assignment (=)?',
'ambiguous_else_stmt': 'the else statement could be matched with one of multiple if statements (use curly braces to indicate intent',
'block_without_braces': 'block statement without curly braces',
'ambiguous_nested_stmt': 'block statements containing block statements should use curly braces to resolve ambiguity',
@@ -70,6 +72,7 @@
'leading_decimal_point': 'leading decimal point may indicate a number or an object member',
'trailing_decimal_point': 'trailing decimal point may indicate a number or an object member',
'octal_number': 'leading zeros make an octal number',
+ 'trailing_comma': 'extra comma is not recommended in object initializers',
'trailing_comma_in_array': 'extra comma is not recommended in array initializers',
'useless_quotes': 'the quotation marks are unnecessary',
'mismatch_ctrl_comments': 'mismatched control comment; "ignore" and "end" control comments must have a one-to-one correspondence',
@@ -99,8 +102,20 @@
'incorrect_version': 'Expected /*jsl:content-type*/ control comment. The script was parsed with the wrong version.',
}
+errors = {
+ 'e4x_deprecated': 'e4x is deprecated',
+ 'semi_before_stmnt': 'missing semicolon before statement',
+ 'syntax_error': 'syntax error',
+ 'expected_tok': 'expected token: {token}',
+ 'unexpected_char': 'unexpected character: {char}',
+}
+
def format_error(errname, **errargs):
- errdesc = warnings[errname]
+ if errname in errors:
+ errdesc = errors[errname]
+ else:
+ errdesc = warnings[errname]
+
try:
errdesc = re.sub(r"{(\w+)}", lambda match: errargs[match.group(1)], errdesc)
except (TypeError, KeyError):
@@ -295,9 +310,18 @@
@lookfor(tok.ASSIGN)
def assign_to_function_call(node):
- if node.kids[0].kind == tok.LP:
+ kid = node.kids[0]
+ # Unpack parens.
+ while kid.kind == tok.RP:
+ kid, = kid.kids
+ if kid.kind == tok.LP:
raise LintWarning, node
+@lookfor(tok.ASSIGN)
+def equal_as_assign(node):
+ if not node.parent.kind in (tok.SEMI, tok.RESERVED, tok.RP, tok.COMMA):
+ raise LintWarning, node
+
@lookfor(tok.IF)
def ambiguous_else_stmt(node):
# Only examine this node if it has an else statement.
@@ -492,6 +516,11 @@
if _octal_regexp.match(node.atom):
raise LintWarning, node
+@lookfor(tok.RC)
+def trailing_comma(node):
+ if node.end_comma:
+ raise LintWarning, node
+
@lookfor(tok.RB)
def trailing_comma_in_array(node):
if node.end_comma:
Index: trunk/jsengine
===================================================================
--- trunk/jsengine 2011-12-02 18:49:10 UTC (rev 303)
+++ trunk/jsengine 2013-09-28 03:06:19 UTC (rev 304)
Property changes on: trunk/jsengine
___________________________________________________________________
Added: svn:ignore
## -0,0 +1 ##
+*.pyc
Added: trunk/jsengine/__init__.py
===================================================================
--- trunk/jsengine/__init__.py (rev 0)
+++ trunk/jsengine/__init__.py 2013-09-28 03:06:19 UTC (rev 304)
@@ -0,0 +1,21 @@
+# vim: sw=4 ts=4 et
+
+_MESSAGES = (
+ 'eof',
+ 'semi_before_stmnt',
+ 'syntax_error',
+ 'unterminated_comment',
+ 'expected_tok',
+ 'unexpected_char',
+)
+
+class JSSyntaxError(BaseException):
+ def __init__(self, pos, msg, msg_args=None):
+ assert msg in _MESSAGES, msg
+ self.pos = pos
+ self.msg = msg
+ self.msg_args = msg_args or {}
+ def __unicode__(self):
+ return '%s: %s' % (self.pos, self.msg)
+ def __repr__(self):
+ return 'JSSyntaxError(%r, %r, %r)' % (self.pos, self.msg. self.msg_args)
Added: trunk/jsengine/parser/__init__.py
===================================================================
--- trunk/jsengine/parser/__init__.py (rev 0)
+++ trunk/jsengine/parser/__init__.py 2013-09-28 03:06:19 UTC (rev 304)
@@ -0,0 +1,924 @@
+# vim: sw=4 ts=4 et
+import unittest
+
+from jsengine.tokenizer import tok
+from jsengine import tokenizer
+
+from jsengine import JSSyntaxError
+from _constants_kind import kind
+from _constants_op import op
+
+from jsengine.structs import *
+
+_VERSIONS = [
+ "default",
+ "1.0",
+ "1.1",
+ "1.2",
+ "1.3",
+ "1.4",
+ "1.5",
+ "1.6",
+ "1.7",
+]
+
+def _auto_semicolon(t, kind_, op_, startpos, endpos, atom, kids):
+ nosemi = False
+ if t.peek_sameline().tok not in (tok.EOF, tok.EOL, tok.RBRACE):
+ x = t.advance()
+ if x.tok != tok.SEMI:
+ raise JSSyntaxError(x.startpos, 'semi_before_stmnt')
+ endpos = x.endpos
+ else:
+ nosemi = True
+ return ParseNode(kind_, op_, startpos, endpos, atom, kids, nosemi)
+
+def _function_arglist(t):
+ fn_args = []
+ if t.peek().tok != tok.RPAREN:
+ while True:
+ x = t.expect(tok.NAME)
+ fn_args.append(ParseNode(kind.NAME, op.ARGNAME,
+ x.startpos,
+ x.endpos, x.atom, []))
+ if t.peek().tok == tok.COMMA:
+ t.advance()
+ else:
+ break
+ return fn_args
+
+def _primary_expression(t):
+ x = t.next_withregexp()
+ if x.tok == tok.THIS:
+ return ParseNode(kind.PRIMARY, op.THIS, x.startpos, x.endpos, None, [])
+ elif x.tok == tok.NAME:
+ return ParseNode(kind.NAME, op.NAME, x.startpos, x.endpos, x.atom, [None])
+ elif x.tok == tok.NULL:
+ return ParseNode(kind.PRIMARY, op.NULL, x.startpos, x.endpos, None, [])
+ elif x.tok == tok.TRUE:
+ return ParseNode(kind.PRIMARY, op.TRUE, x.startpos, x.endpos, None, [])
+ elif x.tok == tok.FALSE:
+ return ParseNode(kind.PRIMARY, op.FALSE, x.startpos, x.endpos, None, [])
+ elif x.tok == tok.STRING:
+ return ParseNode(kind.STRING, op.STRING, x.startpos, x.endpos, x.atom, [])
+ elif x.tok == tok.REGEXP:
+ return ParseNode(kind.OBJECT, op.REGEXP, x.startpos, x.endpos, None, [])
+ elif x.tok == tok.NUMBER:
+ return ParseNode(kind.NUMBER, None, x.startpos, x.endpos, x.atom, [])
+ elif x.tok == tok.LBRACKET:
+ startpos = x.startpos
+ items = []
+ end_comma = None
+ if t.peek().tok != tok.RBRACKET:
+ while True:
+ # Conditionally add a value. If it isn't followed by a comma,
+ # quit in order to force an RBRACKET.
+ if t.peek().tok == tok.COMMA:
+ items.append(None)
+ else:
+ items.append(_assignment_expression(t, True))
+ if not t.peek().tok == tok.COMMA:
+ break
+
+ # Expect a comma and use it if the value was missing.
+ x = t.expect(tok.COMMA)
+ comma = ParseNode(kind.COMMA, None,
+ x.startpos, x.endpos, None, [])
+ items[-1] = items[-1] or comma
+
+ # Check for the end.
+ if t.peek().tok == tok.RBRACKET:
+ end_comma = comma
+ break
+ endpos = t.expect(tok.RBRACKET).endpos
+ return ParseNode(kind.RB, None, startpos, endpos, None, items,
+ end_comma=end_comma)
+ elif x.tok == tok.LBRACE:
+ startpos = x.startpos
+ kids = []
+ # TODO: get/set
+ end_comma = None
+ while True:
+ x = t.peek()
+ if x.tok == tok.RBRACE:
+ break
+ elif x.tok == tok.STRING:
+ t.expect(tok.STRING)
+ key = ParseNode(kind.STRING, None, x.startpos,
+ x.endpos, x.atom, [])
+ elif x.tok == tok.NUMBER:
+ t.expect(tok.NUMBER)
+ key = ParseNode(kind.NUMBER, None, x.startpos,
+ x.endpos, x.atom, [])
+ else:
+ x = t.expect_identifiername()
+ key = ParseNode(kind.NAME, None, x.startpos, x.endpos,
+ x.atom, [])
+ t.expect(tok.COLON)
+ value = _assignment_expression(t, True)
+ kids.append(ParseNode(kind.COLON, None, key.startpos,
+ value.endpos, None, [key, value]))
+ if t.peek().tok == tok.COMMA:
+ x = t.advance()
+ end_comma = ParseNode(kind.COMMA, None,
+ x.startpos, x.endpos, None, [])
+ else:
+ end_comma = None
+ break
+ endpos = t.expect(tok.RBRACE).endpos
+ return ParseNode(kind.RC, None, startpos, endpos, None, kids,
+ end_comma=end_comma)
+ elif x.tok == tok.LPAREN:
+ startpos = x.startpos
+ kid = _expression(t, True)
+ endpos = t.expect(tok.RPAREN).endpos
+ return ParseNode(kind.RP, None, startpos, endpos, None, [kid])
+ else:
+ raise JSSyntaxError(x.startpos, 'syntax_error')
+
+def _function_declaration(t, named_opcode):
+ node = _function_expression(t, named_opcode)
+
+ # Convert anonymous functions in expressions.
+ if node.opcode == op.ANONFUNOBJ:
+ node = _auto_semicolon(t, kind.SEMI, None, node.startpos, node.endpos,
+ None, [node])
+ return node
+
+
+def _function_expression(t, named_opcode):
+ startpos = t.expect(tok.FUNCTION).startpos
+ if t.peek().tok == tok.NAME:
+ fn_name = t.expect(tok.NAME).atom
+ opcode = named_opcode
+ else:
+ fn_name = None
+ opcode = op.ANONFUNOBJ
+ t.expect(tok.LPAREN)
+ fn_args = _function_arglist(t)
+ t.expect(tok.RPAREN)
+ fn_body_startpos = t.expect(tok.LBRACE).startpos
+ kids = _sourceelements(t, tok.RBRACE)
+ fn_body_endpos = t.expect(tok.RBRACE).endpos
+ fn_body = ParseNode(kind.LC, None, fn_body_startpos,
+ fn_body_endpos, None, kids)
+ return ParseNode(kind.FUNCTION,
+ op.ANONFUNOBJ if fn_name is None else op.NAMEDFUNOBJ,
+ startpos, fn_body.endpos,
+ fn_name, [fn_body], fn_args=fn_args)
+
+def _argument_list(t):
+ args = []
+ if t.peek().tok != tok.RPAREN:
+ while True:
+ args.append(_assignment_expression(t, True))
+ if t.peek().tok == tok.COMMA:
+ t.advance()
+ else:
+ break
+ return args
+
+def _new_expression(t):
+ startpos = t.expect(tok.NEW).startpos
+ expr = _member_expression(t)
+ # If no (), this is a variant of the NewExpression
+ if t.peek().tok == tok.LPAREN:
+ t.expect(tok.LPAREN)
+ args = _argument_list(t)
+ endpos = t.expect(tok.RPAREN).endpos
+ else:
+ args = []
+ endpos = expr.endpos
+ return ParseNode(kind.NEW, op.NEW, startpos, endpos,
+ None, [expr] + args)
+
+def _member_expression(t, _recurse=True):
+ x = t.peek()
+ if x.tok == tok.NEW:
+ kid = _new_expression(t)
+ elif x.tok == tok.FUNCTION:
+ kid = _function_expression(t, op.NAMEDFUNOBJ)
+ else:
+ kid = _primary_expression(t)
+
+ while True:
+ if t.peek().tok == tok.LBRACKET:
+ t.advance()
+ expr = _expression(t, True)
+ endpos = t.expect(tok.RBRACKET).endpos
+ kid = ParseNode(kind.LB, op.GETELEM, kid.startpos, endpos,
+ None, [kid, expr])
+ elif t.peek().tok == tok.DOT:
+ t.advance()
+ expr = t.expect_identifiername()
+ kid = ParseNode(kind.DOT, op.GETPROP, kid.startpos, expr.endpos,
+ expr.atom, [kid])
+ else:
+ return kid
+
+def _call_expression(t):
+ expr = _member_expression(t)
+ if t.peek().tok != tok.LPAREN:
+ return expr
+
+ while True:
+ x = t.peek()
+ if x.tok == tok.LPAREN:
+ t.expect(tok.LPAREN)
+ args = _argument_list(t)
+ endpos = t.expect(tok.RPAREN).endpos
+ expr = ParseNode(kind.LP, op.CALL, expr.startpos,
+ endpos, None, [expr] + args)
+ elif x.tok == tok.LBRACKET:
+ t.expect(tok.LBRACKET)
+ lookup = _expression(t, True)
+ endpos = t.expect(tok.RBRACKET).endpos
+ expr = ParseNode(kind.LB, op.GETELEM,
+ expr.startpos, endpos,
+ None, [expr, lookup])
+ elif x.tok == tok.DOT:
+ t.expect(tok.DOT)
+ lookup = t.expect_identifiername()
+ expr = ParseNode(kind.DOT, op.GETPROP,
+ expr.startpos, lookup.endpos,
+ lookup.atom, [expr])
+ else:
+ return expr
+
+def _lefthandside_expression(t):
+ kid = _call_expression(t)
+ kid._lefthandside = True
+ return kid
+
+def _postfix_expression(t):
+ kid = _lefthandside_expression(t)
+ if t.peek_sameline().tok == tok.INC:
+ endpos = t.expect(tok.INC).endpos
+ if kid.kind == kind.DOT and kid.opcode == op.GETPROP:
+ opcode = op.PROPINC
+ else:
+ opcode = op.NAMEINC
+ return ParseNode(kind.INC, opcode,
+ kid.startpos, endpos, None, [kid])
+ elif t.peek_sameline().tok == tok.DEC:
+ endpos = t.expect(tok.DEC).endpos
+ return ParseNode(kind.DEC, op.NAMEDEC,
+ kid.startpos, endpos, None, [kid])
+ else:
+ return kid
+
+_UNARY = {
+ tok.DELETE: (kind.DELETE, None),
+ tok.VOID: (kind.UNARYOP, op.VOID),
+ tok.TYPEOF: (kind.UNARYOP, op.TYPEOF),
+ tok.INC: (kind.INC, op.INCNAME),
+ tok.DEC: (kind.DEC, op.DECNAME),
+ tok.ADD: (kind.UNARYOP, op.POS),
+ tok.SUB: (kind.UNARYOP, op.NEG),
+ tok.BIT_NOT: (kind.UNARYOP, op.BITNOT),
+ tok.LOGICAL_NOT: (kind.UNARYOP, op.NOT),
+}
+def _unary_expression(t):
+ x = t.peek()
+ if x.tok in _UNARY:
+ kind_, op_ = _UNARY[x.tok]
+ startpos = t.advance().startpos
+ kid = _unary_expression(t)
+ return ParseNode(kind_, op_, startpos, kid.endpos, None, [kid])
+ else:
+ return _postfix_expression(t)
+
+def _binary_expression(t, dict_, child_expr_callback):
+ expr = child_expr_callback(t)
+ while True:
+ x = t.peek()
+ try:
+ kind_, op_ = dict_[x.tok]
+ except KeyError:
+ return expr
+
+ kids = [expr]
+ while t.peek().tok == x.tok:
+ t.advance()
+ kids.append(child_expr_callback(t))
+ expr = ParseNode(kind_, op_,
+ kids[0].startpos, kids[1].endpos,
+ None, kids)
+
+_MULTIPLICATIVE = {
+ tok.MUL: (kind.STAR, op.MUL),
+ tok.DIV: (kind.DIVOP, op.DIV),
+ tok.MOD: (kind.DIVOP, op.MOD),
+}
+def _multiplicative_expression(t):
+ return _binary_expression(t, _MULTIPLICATIVE, _unary_expression)
+
+_ADDITIVE = {
+ tok.ADD: (kind.PLUS, op.ADD),
+ tok.SUB: (kind.MINUS, op.SUB),
+}
+def _additive_expression(t):
+ return _binary_expression(t, _ADDITIVE,
+ _multiplicative_expression)
+
+_SHIFT = {
+ tok.LSHIFT: (kind.SHOP, op.LSH),
+ tok.RSHIFT: (kind.SHOP, op.RSH),
+ tok.URSHIFT: (kind.SHOP, op.URSH),
+}
+def _shift_expression(t):
+ return _binary_expression(t, _SHIFT,
+ _additive_expression)
+
+_RELATIONAL_NOIN = {
+ tok.LT: (kind.RELOP, op.LT),
+ tok.GT: (kind.RELOP, op.GT),
+ tok.LE: (kind.RELOP, op.LE),
+ tok.GE: (kind.RELOP, op.GE),
+ tok.INSTANCEOF: (kind.INSTANCEOF, op.INSTANCEOF),
+}
+_RELATIONAL_IN = dict(_RELATIONAL_NOIN)
+_RELATIONAL_IN.update({
+ tok.IN: (kind.IN, op.IN),
+})
+def _relational_expression(t, allowin):
+ return _binary_expression(t, _RELATIONAL_IN if allowin else _RELATIONAL_NOIN,
+ _shift_expression)
+
+_EQUALITY = {
+ tok.EQ: (kind.EQOP, op.EQ),
+ tok.NE: (kind.EQOP, op.NE),
+ tok.EQ_STRICT: (kind.EQOP, op.NEW_EQ),
+ tok.NE_STRICT: (kind.EQOP, op.NEW_NE),
+}
+def _equality_expression(t, allowin):
+ return _binary_expression(t, _EQUALITY,
+ lambda t: _relational_expression(t, allowin))
+
+def _bitwise_and_expression(t, allowin):
+ left = _equality_expression(t, allowin)
+ while t.peek().tok == tok.BIT_AND:
+ t.advance()
+ right = _equality_expression(t, allowin)
+ left = ParseNode(kind.BITAND, op.BITAND,
+ left.startpos, right.endpos,
+ None, [left, right])
+ return left
+
+def _bitwise_xor_expression(t, allowin):
+ left = _bitwise_and_expression(t, allowin)
+ while t.peek().tok == tok.BIT_XOR:
+ t.advance()
+ right = _bitwise_and_expression(t, allowin)
+ left = ParseNode(kind.BITXOR, op.BITXOR,
+ left.startpos, right.endpos,
+ None, [left, right])
+ return left
+
+def _bitwise_or_expression(t, allowin):
+ left = _bitwise_xor_expression(t, allowin)
+ while t.peek().tok == tok.BIT_OR:
+ t.advance()
+ right = _bitwise_xor_expression(t, allowin)
+ left = ParseNode(kind.BITOR, op.BITOR,
+ left.startpos, right.endpos,
+ None, [left, right])
+ return left
+
+def _logical_and_expression(t, allowin):
+ exprs = []
+ while True:
+ exprs.append(_bitwise_or_expression(t, allowin))
+ if t.peek().tok == tok.LOGICAL_AND:
+ t.expect(tok.LOGICAL_AND)
+ else:
+ break
+
+ while len(exprs) > 1:
+ right = exprs.pop()
+ left = exprs[-1]
+ exprs[-1] = ParseNode(kind.AND, op.AND,
+ left.startpos, right.endpos,
+ None, [left, right])
+ return exprs[0]
+
+def _logical_or_expression(t, allowin):
+ exprs = []
+ while True:
+ exprs.append(_logical_and_expression(t, allowin))
+ if t.peek().tok == tok.LOGICAL_OR:
+ t.expect(tok.LOGICAL_OR)
+ else:
+ break
+
+ while len(exprs) > 1:
+ right = exprs.pop()
+ left = exprs[-1]
+ exprs[-1] = ParseNode(kind.OR, op.OR,
+ left.startpos, right.endpos,
+ None, [left, right])
+ return exprs[0]
+
+def _conditional_expression(t, allowin):
+ kid = _logical_or_expression(t, allowin)
+ if t.peek().tok == tok.QUESTION:
+ t.expect(tok.QUESTION)
+ if_ = _assignment_expression(t, True)
+ t.expect(tok.COLON)
+ else_ = _assignment_expression(t, allowin)
+ return ParseNode(kind.HOOK, None,
+ kid.startpos, else_.endpos,
+ None, [kid, if_, else_])
+ else:
+ return kid
+
+_ASSIGNS = {
+ tok.ASSIGN: (kind.ASSIGN, None),
+ tok.ASSIGN_URSHIFT: (kind.ASSIGN, op.URSH),
+ tok.ASSIGN_LSHIFT: (kind.ASSIGN, op.LSH),
+ tok.ASSIGN_RSHIFT: (kind.ASSIGN, op.RSH),
+ tok.ASSIGN_ADD: (kind.ASSIGN, op.ADD),
+ tok.ASSIGN_SUB: (kind.ASSIGN, op.SUB),
+ tok.ASSIGN_MUL: (kind.ASSIGN, op.MUL),
+ tok.ASSIGN_MOD: (kind.ASSIGN, op.MOD),
+ tok.ASSIGN_BIT_AND: (kind.ASSIGN, op.BITAND),
+ tok.ASSIGN_BIT_OR: (kind.ASSIGN, op.BITOR),
+ tok.ASSIGN_BIT_XOR: (kind.ASSIGN, op.BITXOR),
+ tok.ASSIGN_DIV: (kind.ASSIGN, op.DIV),
+}
+def _assignment_expression(t, allowin):
+ left = _conditional_expression(t, allowin)
+ if t.peek().tok in _ASSIGNS:
+ kid = left
+ while kid.kind == kind.RP:
+ kid, = kid.kids
+ if kid.kind == kind.NAME:
+ assert kid.opcode == op.NAME
+ kid.opcode = op.SETNAME
+ elif kid.kind == kind.DOT:
+ assert kid.opcode == op.GETPROP, left.op
+ kid.opcode = op.SETPROP
+ elif kid.kind == kind.LB:
+ assert kid.opcode == op.GETELEM
+ kid.opcode = op.SETELEM
+ elif kid.kind == kind.LP:
+ assert kid.opcode == op.CALL
+ kid.opcode = op.SETCALL
+ else:
+ raise JSSyntaxError(left.startpos, 'invalid_assign')
+ kind_, op_ = _ASSIGNS[t.peek().tok]
+ t.advance()
+ right = _assignment_expression(t, allowin)
+ return ParseNode(kind_, op_,
+ left.startpos, right.endpos, None, [left, right])
+ else:
+ return left
+
+def _expression(t, allowin):
+ items = []
+ items.append(_assignment_expression(t, allowin))
+ while t.peek().tok == tok.COMMA:
+ t.advance()
+ items.append(_assignment_expression(t, allowin))
+ if len(items) > 1:
+ return ParseNode(kind.COMMA, None, items[0].startpos,
+ items[-1].endpos, None, items)
+ else:
+ return items[0]
+
+def _variable_declaration(t, allowin):
+ nodes = []
+ while True:
+ x = t.expect(tok.NAME)
+ value = None
+ if t.peek().tok == tok.ASSIGN:
+ t.advance()
+ value = _assignment_expression(t, allowin)
+ nodes.append(ParseNode(kind.NAME, op.SETNAME if value else op.NAME,
+ x.startpos,
+ value.endpos if value else x.endpos,
+ x.atom, [value]))
+
+ if t.peek().tok == tok.COMMA:
+ t.advance()
+ else:
+ return nodes
+
+def _block_statement(t):
+ kids = []
+ startpos = t.expect(tok.LBRACE).startpos
+ while t.peek().tok != tok.RBRACE:
+ kids.append(_statement(t))
+ endpos = t.expect(tok.RBRACE).endpos
+ return ParseNode(kind.LC, None, startpos, endpos, None, kids)
+
+def _empty_statement(t):
+ # EMPTY STATEMENT
+ x = t.expect(tok.SEMI)
+ return ParseNode(kind.SEMI, None, x.startpos, x.endpos, None, [None])
+
+def _var_statement(t):
+ # VARIABLE STATEMENT
+ startpos = t.expect(tok.VAR).startpos
+ nodes = _variable_declaration(t, True)
+ return _auto_semicolon(t, kind.VAR, op.DEFVAR,
+ startpos, nodes[-1].endpos, None, nodes)
+
+def _if_statement(t):
+ # IF STATEMENT
+ startpos = t.expect(tok.IF).startpos
+ t.expect(tok.LPAREN)
+ condition = _expression(t, True)
+ t.expect(tok.RPAREN)
+ if_body = _statement(t)
+ if t.peek().tok == tok.ELSE:
+ t.advance()
+ else_body = _statement(t)
+ else:
+ else_body = None
+ endpos = else_body.endpos if else_body else if_body.endpos
+ return ParseNode(kind.IF, None, startpos,
+ endpos, None, [condition, if_body, else_body])
+
+def _do_statement(t):
+ startpos = t.expect(tok.DO).startpos
+ code = _statement(t)
+ t.expect(tok.WHILE)
+ t.expect(tok.LPAREN)
+ expr = _expression(t, True)
+ endtoken = t.expect(tok.RPAREN)
+ return _auto_semicolon(t, kind.DO, None,
+ startpos, endtoken.endpos, None, [code, expr])
+
+def _while_statement(t):
+ startpos = t.expect(tok.WHILE).startpos
+ t.expect(tok.LPAREN)
+ expr = _expression(t, True)
+ t.expect(tok.RPAREN)
+ code = _statement(t)
+ return ParseNode(kind.WHILE, None,
+ startpos, code.endpos, None, [expr, code])
+
+def _for_statement(t):
+ for_startpos = t.expect(tok.FOR).startpos
+ t.expect(tok.LPAREN)
+
+ for_exprs = []
+ if t.peek().tok == tok.VAR:
+ var_startpos = t.advance().startpos
+ kids = _variable_declaration(t, False)
+ vars = ParseNode(kind.VAR, op.DEFVAR, var_startpos, kids[-1].endpos,
+ None, kids)
+
+ if t.peek().tok == tok.IN:
+ t.advance()
+ in_ = _expression(t, True)
+ for_exprs = [vars, in_]
+ else:
+ for_exprs = [vars, None, None]
+ else:
+ if t.peek().tok != tok.SEMI:
+ expr = _expression(t, False)
+ else:
+ expr = None
+
+ if t.peek().tok == tok.IN:
+ t.advance()
+ vars = expr
+ in_ = _expression(t, True)
+ for_exprs = [vars, in_]
+ else:
+ for_exprs = [expr, None, None]
+
+ if len(for_exprs) == 2:
+ condition = ParseNode(kind.IN, None, for_exprs[0].startpos,
+ for_exprs[-1].endpos, None, for_exprs)
+ else:
+ x = t.expect(tok.SEMI)
+ if t.peek().tok != tok.SEMI:
+ for_exprs[1] = _expression(t, True)
+ t.expect(tok.SEMI)
+ if t.peek().tok != tok.RPAREN:
+ for_exprs[2] = _expression(t, True)
+ condition = ParseNode(kind.RESERVED, None, None, None,
+ None, for_exprs)
+
+ t.expect(tok.RPAREN)
+ body = _statement(t)
+ return ParseNode(kind.FOR,
+ op.FORIN if condition.kind == kind.IN else None,
+ for_startpos, body.endpos,
+ None, [condition, body])
+
+def _continue_statement(t):
+ endtoken = t.expect(tok.CONTINUE)
+ startpos = endtoken.startpos
+
+ if t.peek_sameline().tok == tok.NAME:
+ endtoken = t.expect(tok.NAME)
+ name = endtoken.atom
+ else:
+ name = None
+ # TODO: Validate Scope Labels
+ return _auto_semicolon(t, kind.CONTINUE, None, startpos, endtoken.endpos, name, [])
+
+def _break_statement(t):
+ endtoken = t.expect(tok.BREAK)
+ startpos = endtoken.startpos
+
+ if t.peek_sameline().tok == tok.NAME:
+ endtoken = t.expect(tok.NAME)
+ name = endtoken.atom
+ else:
+ name = None
+ # TODO: Validate Scope Labels
+ return _auto_semicolon(t, kind.BREAK, None, startpos, endtoken.endpos, name, [])
+
+def _return_statement(t):
+ endtoken = t.expect(tok.RETURN)
+ startpos = endtoken.startpos
+
+ if t.peek_sameline().tok not in (tok.EOF, tok.EOL, tok.SEMI):
+ expr = _expression(t, True)
+ endtoken = expr
+ else:
+ expr = None
+ # TODO: Validate Scope Labels
+ return _auto_semicolon(t, kind.RETURN, None, startpos, endtoken.endpos,
+ None, [expr])
+
+def _with_statement(t):
+ startpos = t.expect(tok.WITH).startpos
+ t.expect(tok.LPAREN)
+ expr = _expression(t, True)
+ t.expect(tok.RPAREN)
+ body = _statement(t)
+ return ParseNode(kind.WITH, None, startpos, body.endpos, None, [expr, body])
+
+def _switch_statement(t):
+ switch_startpos = t.expect(tok.SWITCH).startpos
+ t.expect(tok.LPAREN)
+ expr = _expression(t, True)
+ t.expect(tok.RPAREN)
+ lc_startpos = t.expect(tok.LBRACE).startpos
+ cases = []
+ while t.peek().tok != tok.RBRACE:
+ case_kind = None
+ case_expr = None
+ if t.peek().tok == tok.CASE:
+ case_startpos = t.advance().startpos
+ case_kind = kind.CASE
+ case_expr = _expression(t, True)
+ elif t.peek().tok == tok.DEFAULT:
+ case_startpos = t.advance().startpos
+ case_kind = kind.DEFAULT
+ else:
+ raise JSSyntaxError(t.peek().startpos, 'invalid_case')
+
+ case_endpos = t.expect(tok.COLON).endpos
+
+ statements = []
+ while t.peek().tok not in (tok.DEFAULT, tok.CASE, tok.RBRACE):
+ statements.append(_statement(t))
+ if statements:
+ statements_startpos = statements[0].startpos
+ statements_endpos = statements[-1].endpos
+ case_endpos = statements[-1].endpos
+ else:
+ statements_startpos = case_endpos
+ statements_endpos = case_endpos
+
+ cases.append(ParseNode(case_kind, None, case_startpos, case_endpos,
+ None, [
+ case_expr,
+ ParseNode(kind.LC, None, statements_startpos,
+ statements_endpos, None, statements)
+ ]))
+
+ rc_endpos = t.expect(tok.RBRACE).endpos
+ return ParseNode(kind.SWITCH, None, switch_startpos, rc_endpos,
+ None, [expr,
+ ParseNode(kind.LC, None, lc_startpos, rc_endpos, None, cases)])
+
+def _throw_statement(t):
+ # TODO: Validate Scope
+ startpos = t.expect(tok.THROW).startpos
+ if t.peek_sameline().tok == tok.EOL:
+ raise JSSyntaxError(t.peek_sameline().startpos, 'expected_statement')
+ expr = _expression(t, True)
+ return _auto_semicolon(t, kind.THROW, op.THROW, startpos, expr.endpos,
+ None, [expr])
+
+def _try_statement(t):
+ try_startpos = t.expect(tok.TRY).startpos
+
+ try_node = _block_statement(t)
+ catch_node = None
+ finally_node = None
+ try_endpos = None
+
+ if t.peek().tok == tok.CATCH:
+ catch_startpos = t.advance().startpos
+ t.expect(tok.LPAREN)
+ x = t.expect(tok.NAME)
+ catch_expr = ParseNode(kind.NAME, None, x.startpos, x.endpos,
+ x.atom, [None])
+ t.expect(tok.RPAREN)
+ catch_block = _block_statement(t)
+ catch_endpos = catch_block.endpos
+ catch_node = \
+ ParseNode(kind.RESERVED, None, None, None, None, [
+ ParseNode(kind.LEXICALSCOPE, op.LEAVEBLOCK,
+ catch_startpos, catch_endpos, None, [
+ ParseNode(kind.CATCH, None, catch_startpos,
+ catch_endpos, None,
+ [catch_expr, None, catch_block])
+ ])
+ ])
+ try_endpos = catch_endpos
+
+ if t.peek().tok == tok.FINALLY:
+ t.advance()
+ finally_node = _block_statement(t)
+ try_endpos = finally_node.endpos
+
+ if not catch_node and not finally_node:
+ raise JSSyntaxError(try_endpos, 'invalid_catch')
+
+ return ParseNode(kind.TRY, None, try_startpos, try_endpos,
+ None,
+ [try_node, catch_node, finally_node])
+
+def _statement(t):
+ # TODO: Labelled Statement
+ x = t.peek()
+ if x.tok == tok.LBRACE:
+ return _block_statement(t)
+ elif x.tok == tok.SEMI:
+ return _empty_statement(t)
+ elif x.tok == tok.VAR:
+ return _var_statement(t)
+ elif x.tok == tok.IF:
+ return _if_statement(t)
+ elif x.tok == tok.DO:
+ return _do_statement(t)
+ elif x.tok == tok.WHILE:
+ return _while_statement(t)
+ elif x.tok == tok.FOR:
+ return _for_statement(t)
+ elif x.tok == tok.CONTINUE:
+ return _continue_statement(t)
+ elif x.tok == tok.BREAK:
+ return _break_statement(t)
+ elif x.tok == tok.RETURN:
+ return _return_statement(t)
+ elif x.tok == tok.WITH:
+ return _with_statement(t)
+ elif x.tok == tok.SWITCH:
+ return _switch_statement(t)
+ elif x.tok == tok.THROW:
+ return _throw_statement(t)
+ elif x.tok == tok.TRY:
+ return _try_statement(t)
+ elif x.tok == tok.EOF:
+ raise JSSyntaxError(x.startpos, 'eof')
+ elif x.tok == tok.FUNCTION:
+ return _function_declaration(t, op.CLOSURE) #TODO: warn, since this is not reliable
+
+ elif x.tok not in (tok.LBRACE, tok.FUNCTION):
+ expr = _expression(t, True)
+ if expr.kind == tok.NAME and t.peek().tok == tok.COLON:
+ t.expect(tok.COLON)
+ stmt = _statement(t)
+ return ParseNode(kind.COLON, op.NAME, expr.startpos,
+ stmt.endpos, expr.atom, [stmt])
+
+ return _auto_semicolon(t, kind.SEMI, None, expr.startpos, expr.endpos,
+ None, [expr])
+ else:
+ raise JSSyntaxError(x.startpos, 'syntax_error')
+
+def _sourceelements(t, end_tok):
+ nodes = []
+ while True:
+ x = t.peek()
+ if x.tok == tok.FUNCTION:
+ nodes.append(_function_declaration(t, None))
+ elif x.tok == end_tok:
+ return nodes
+ else:
+ nodes.append(_statement(t))
+
+def parsestring(s, startpos=None):
+ stream = tokenizer.TokenStream(s, startpos)
+ t = tokenizer.Tokenizer(stream)
+ nodes = _sourceelements(t, tok.EOF)
+ lc_endpos = t.expect(tok.EOF).endpos
+ lc_startpos = nodes[-1].startpos if nodes else lc_endpos
+ return ParseNode(kind.LC, None, lc_startpos, lc_endpos, None, nodes)
+
+def is_valid_version(version):
+ return version in _VERSIONS
+
+def _validate(node, depth=0):
+ for kid in node.kids:
+ if kid:
+ assert kid.parent is node
+ _validate(kid, depth+1)
+
+def parse(script, jsversion,
+ error_callback, startpos):
+ # TODO: respect version
+ assert is_valid_version(jsversion)
+ try:
+ root = parsestring(script, startpos)
+ except JSSyntaxError as error:
+ error_callback(error.pos.line, error.pos.col, error.msg, error.msg_args)
+ return None
+ _validate(root)
+ return root
+
+def is_compilable_unit(script, jsversion):
+ # TODO: respect version
+ assert is_valid_version(jsversion)
+ try:
+ parsestring(script)
+ except JSSyntaxError as error:
+ return error.msg not in ('eof', 'unterminated_comment')
+ return True
+
+class TestParser(unittest.TestCase):
+ def testCompilableUnit(self):
+ self.assert_(is_compilable_unit('', 'default'))
+ self.assert_(is_compilable_unit('/**/', 'default'))
+ self.assert_(not is_compilable_unit('/*', 'default'))
+ def testUnterminatedComment(self):
+ try:
+ parsestring('/*')
+ except JSSyntaxError as error:
+ self.assertEqual(error.pos, NodePos(0,1))
+ else:
+ self.assert_(False)
+ def testObjectEndComma(self):
+ root = parsestring('a={a:1,}')
+ node, = root.kids
+ self.assertEquals(node.kind, kind.SEMI)
+ node, = node.kids
+ self.assertEquals(node.kind, kind.ASSIGN)
+ left, right = node.kids
+ self.assertEquals(left.atom, 'a')
+ self.assertEquals(right.kind, kind.RC)
+ node = right.end_comma
+ self.assertEquals(node.kind, tok.COMMA)
+ self.assertEquals(node.startpos, NodePos(0, 6))
+ self.assertEquals(node.endpos, NodePos(0, 6))
+ def _testArrayEndComma(self, script, col):
+ root = parsestring(script)
+ node, = root.kids
+ self.assertEquals(node.kind, kind.SEMI)
+ node, = node.kids
+ self.assertEquals(node.kind, kind.ASSIGN)
+ left, right = node.kids
+ self.assertEquals(left.atom, 'a')
+ self.assertEquals(right.kind, kind.RB)
+ node = right.end_comma
+ self.assertEquals(node is None, col is None)
+ if col is None:
+ self.assert_(node is None)
+ else:
+ self.assertEquals(node.kind, tok.COMMA)
+ self.assertEquals(node.startpos, NodePos(0, col))
+ self.assertEquals(node.endpos, NodePos(0, col))
+ def testArrayEndComma(self):
+ self._testArrayEndComma('a=[,]', 3)
+ self._testArrayEndComma('a=[a,]', 4)
+ self._testArrayEndComma('a=[a,b,c]', None)
+ def _testArrayCommas(self, script, items, end_comma):
+ root = parsestring(script)
+ node, = root.kids
+ self.assertEquals(node.kind, kind.SEMI)
+ node, = node.kids
+ self.assertEquals(node.kind, kind.ASSIGN)
+ left, right = node.kids
+ self.assertEquals(left.atom, 'a')
+ self.assertEquals(right.kind, kind.RB)
+ node = right
+ self.assertEquals(len(node.kids), len(items))
+ for kid, item in zip(node.kids, items):
+ self.assertEquals(kid.atom, item)
+ self.assertEquals(bool(node.end_comma), end_comma)
+ def testArrayCommas(self):
+ self._testArrayCommas('a=[]', [], False)
+ self._testArrayCommas('a=[,]', [None], True)
+ self._testArrayCommas('a=[,,]', [None, None], True)
+ self._testArrayCommas('a=[,1]', [None, '1'], False)
+ self._testArrayCommas('a=[,,1]', [None, None, '1'], False)
+ self._testArrayCommas('a=[1,,1]', ['1', None, '1'], False)
+ self._testArrayCommas('a=[,1,]', [None, '1'], True)
+ def testParseArray(self):
+ try:
+ parsestring('a=[1 1]')
+ except JSSyntaxError as error:
+ pass
+ else:
+ self.assert_(False)
Added: trunk/jsengine/parser/_constants_kind.py
===================================================================
--- trunk/jsengine/parser/_constants_kind.py (rev 0)
+++ trunk/jsengine/parser/_constants_kind.py 2013-09-28 03:06:19 UTC (rev 304)
@@ -0,0 +1,79 @@
+# vim: sw=4 ts=4 et
+
+_KINDS = [
+ 'AND',
+ 'BITAND',
+ 'BITOR',
+ 'BITXOR',
+ 'CATCH',
+ 'COMMENT',
+ 'DELETE',
+ 'DIVOP',
+ 'DOT',
+ 'EQ',
+ 'FINALLY',
+ 'FUNCTION',
+ 'HOOK',
+ 'IF',
+ 'IN',
+ 'INC',
+ 'INSTANCEOF',
+ 'LB',
+ 'LC',
+ 'LEXICALSCOPE',
+ 'LP',
+ 'MINUS',
+ 'NAME',
+ 'NEW',
+ 'OBJECT',
+ 'OR',
+ 'PLUS',
+ 'PRIMARY',
+ 'RB',
+ 'RC',
+ 'RELOP',
+ 'RESERVED',
+ 'RP',
+ 'SEMI',
+ 'SHOP',
+ 'STAR',
+ 'TRY',
+ 'UNARYOP',
+ 'VAR',
+ 'ASSIGN',
+ 'CASE',
+ 'COLON',
+ 'DEFAULT',
+ 'EQOP',
+ 'OBJECT',
+ 'RELOP',
+ 'SWITCH',
+ 'WITH',
+ 'WHILE',
+ 'DO',
+ 'FOR',
+ 'COMMA',
+ 'DEC',
+ 'BREAK',
+ 'CONTINUE',
+ 'THROW',
+ 'RETURN',
+ 'UNARYOP',
+ 'LP',
+ 'NUMBER',
+ 'RB',
+ 'STRING',
+ 'YIELD', # TODO
+]
+class _Kind(str):
+ def __repr__(self):
+ return 'kind.%s' % self
+
+class _Kinds:
+ def __init__(self):
+ for kind in _KINDS:
+ setattr(self, kind, _Kind(kind))
+ def contains(self, item):
+ return isinstance(item, _Kind) and \
+ getattr(self, item) is item
+kind = _Kinds()
Added: trunk/jsengine/parser/_constants_op.py
===================================================================
--- trunk/jsengine/parser/_constants_op.py (rev 0)
+++ trunk/jsengine/parser/_constants_op.py 2013-09-28 03:06:19 UTC (rev 304)
@@ -0,0 +1,85 @@
+# vim: sw=4 ts=4 et
+
+_OPS = [
+ 'ADD',
+ 'AND',
+ 'ANONFUNOBJ',
+ 'ARGNAME',
+ 'BITAND',
+ 'BITNOT',
+ 'BITOR',
+ 'BITXOR',
+ 'CALL',
+ 'C_COMMENT',
+ 'CLOSURE',
+ 'CPP_COMMENT',
+ 'DECNAME',
+ 'DEFVAR',
+ 'DIV',
+ 'EQOP',
+ 'FALSE',
+ 'FORIN',
+ 'GETELEM',
+ 'GETPROP',
+ 'GT',
+ 'GE',
+ 'HOOK',
+ 'HTMLCOMMENT',
+ 'IN',
+ 'INCNAME',
+ 'INSTANCEOF',
+ 'LEAVEBLOCK',
+ 'LSH',
+ 'LT',
+ 'LE',
+ 'MOD',
+ 'MUL',
+ 'NAME',
+ 'NAMEDEC',
+ 'NAMEINC',
+ 'NAMEDFUNOBJ',
+ 'NEG',
+ 'NE',
+ 'NEW',
+ 'NEW_EQ',
+ 'NEW_NE',
+ 'NOT',
+ 'NULL',
+ 'NUMBER',
+ 'OR',
+ 'POS',
+ 'PROPINC',
+ 'REGEXP',
+ 'RSH',
+ 'SETCALL',
+ 'SETELEM',
+ 'SETNAME',
+ 'SETPROP',
+ 'STRING',
+ 'SUB',
+ 'THIS',
+ 'TRUE',
+ 'THROW',
+ 'TYPEOF',
+ 'URSH',
+ 'VOID',
+ 'EQ',
+ 'NAME',
+ 'REGEXP',
+ 'SETNAME',
+ 'VOID',
+ 'CALL',
+]
+class _Op(str):
+ def __repr__(self):
+ return 'op.%s' % self
+
+class _Ops:
+ NOP = None # TODO!
+ def __init__(self):
+ for op in _OPS:
+ setattr(self, op, _Op(op))
+ def contains(self, item):
+ return isinstance(item, _Op) and \
+ getattr(self, item) is item
+op = _Ops()
Added: trunk/jsengine/structs.py
===================================================================
--- trunk/jsengine/structs.py (rev 0)
+++ trunk/jsengine/structs.py 2013-09-28 03:06:19 UTC (rev 304)
@@ -0,0 +1,196 @@
+# vim: ts=4 sw=4 expandtab
+import bisect
+import functools
+
+from parser._constants_kind import kind
+from parser._constants_op import op
+
+class NodePositions:
+ " Given a string, allows [x] lookups for NodePos line and col...
[truncated message content] |