[javascriptlint-commit] SF.net SVN: javascriptlint:[304] trunk
Status: Beta
Brought to you by:
matthiasmiller
From: <mat...@us...> - 2013-09-28 03:06:23
|
Revision: 304 http://sourceforge.net/p/javascriptlint/code/304 Author: matthiasmiller Date: 2013-09-28 03:06:19 +0000 (Sat, 28 Sep 2013) Log Message: ----------- Replace SpiderMonkey with a JavaScript parser written purely in JavaScript. Modified Paths: -------------- trunk/DEVELOPMENT trunk/INSTALL trunk/javascriptlint/jsparse.py trunk/javascriptlint/lint.py trunk/javascriptlint/warnings.py trunk/setup.py trunk/test.py trunk/tests/control_comments/conf-version.js trunk/tests/html/e4x.html trunk/tests/html/script_tag_in_js_comment.html trunk/tests/warnings/identifier_hides_another.js trunk/tests/warnings/spidermonkey/bad_backref.js trunk/tests/warnings/spidermonkey/invalid_backref.js trunk/tests/warnings/want_assign_or_call.js Added Paths: ----------- trunk/jsengine/ trunk/jsengine/__init__.py trunk/jsengine/parser/ trunk/jsengine/parser/__init__.py trunk/jsengine/parser/_constants_kind.py trunk/jsengine/parser/_constants_op.py trunk/jsengine/structs.py trunk/jsengine/tokenizer/ trunk/jsengine/tokenizer/__init__.py Removed Paths: ------------- trunk/Makefile.SpiderMonkey trunk/javascriptlint/pyspidermonkey/ trunk/javascriptlint/pyspidermonkey_/ trunk/javascriptlint/spidermonkey.py trunk/spidermonkey/ trunk/tests/warnings/spidermonkey/deprecated_usage.js Modified: trunk/DEVELOPMENT =================================================================== --- trunk/DEVELOPMENT 2011-12-02 18:49:10 UTC (rev 303) +++ trunk/DEVELOPMENT 2013-09-28 03:06:19 UTC (rev 304) @@ -15,17 +15,3 @@ > add test for syntax error > consider reimplementing abiguous_newline - -** UPGRADING SPIDERMONKEY - -Use the following command to upgrade SpiderMonkey. Replace X.X.X with the -version number. js-X.X.X is the directory containing the new version of -SpiderMonkey. Use a relative path for pretty commit messages. - -svn_load_dirs.pl \ - -t X.X.X \ - -p svn_load_dirs.conf \ - https://javascriptlint.svn.sourceforge.net/svnroot/javascriptlint/vendorsrc/Mozilla.org/js \ - current \ - js-X.X.X - Modified: trunk/INSTALL =================================================================== --- trunk/INSTALL 2011-12-02 18:49:10 UTC (rev 303) +++ trunk/INSTALL 2013-09-28 03:06:19 UTC (rev 304) @@ -1,14 +1,4 @@ -BUILDING FROM THE SUBVERSION TRUNK -* Windows Prequisites: - * Visual Studio 2008 Express - * Python 2.6 - * py2exe - * MozillaBuild (http://developer.mozilla.org/en/docs/Windows_Build_Prerequisites) - - Launch the MozillaBuild MSVC 9 batch file. (You may have to run this as an - Administrator on Windows Vista.) Run the commands in that shell. - On all platforms: $ python setup.py build Deleted: trunk/Makefile.SpiderMonkey =================================================================== --- trunk/Makefile.SpiderMonkey 2011-12-02 18:49:10 UTC (rev 303) +++ trunk/Makefile.SpiderMonkey 2013-09-28 03:06:19 UTC (rev 304) @@ -1,59 +0,0 @@ -## THIS IS AN INTERNAL MAKEFILE FOR setup.py -## DO NOT RUN THIS MAKEFILE DIRECTLY. - -SPIDERMONKEY_SRC=spidermonkey/src - -# Load the SpiderMonkey config to find the OS define -# Also use this for the SO_SUFFIX -DEPTH=$(SPIDERMONKEY_SRC) -include $(SPIDERMONKEY_SRC)/config.mk -SPIDERMONKEY_OS=$(firstword $(patsubst -D%, %, $(filter -DXP_%, $(OS_CFLAGS)))) - -ifdef USE_MSVC -JS_LIB=js32.lib -else -JS_LIB=libjs.a -endif - -BUILD_DIR=build/spidermonkey - -ORIG_LIB=$(SPIDERMONKEY_SRC)/$(OBJDIR)/$(JS_LIB) -COPY_LIB=$(BUILD_DIR)/$(JS_LIB) -ORIG_DLL=$(SPIDERMONKEY_SRC)/$(OBJDIR)/js32.dll -COPY_DLL_DIR=$(DISTUTILS_DIR)/javascriptlint -COPY_DLL_PATH=$(COPY_DLL_DIR)/js32.dll -OS_HEADER=$(BUILD_DIR)/js_operating_system.h -ORIG_JSAUTOCFG_H=$(SPIDERMONKEY_SRC)/$(OBJDIR)/jsautocfg.h -COPY_JSAUTOCFG_H=$(BUILD_DIR)/jsautocfg.h - -ALL_TARGETS=$(COPY_LIB) $(OS_HEADER) -ifndef PREBUILT_CPUCFG -ALL_TARGETS+=$(COPY_JSAUTOCFG_H) -endif - -ifeq ($(SPIDERMONKEY_OS),XP_WIN) -ALL_TARGETS+=$(COPY_DLL_PATH) -endif - -all: $(ALL_TARGETS) - -clean: - rm -f $(ORIG_LIB) - rm -Rf $(BUILD_DIR) - -$(BUILD_DIR): - mkdir -p $(BUILD_DIR) - -$(COPY_LIB): $(BUILD_DIR) $(ORIG_LIB) - cp $(ORIG_LIB) $(COPY_LIB) - -$(COPY_DLL_PATH): $(ORIG_DLL) - mkdir -p $(COPY_DLL_DIR) - cp $(ORIG_DLL) $(COPY_DLL_PATH) - -$(OS_HEADER): $(BUILD_DIR) - echo "#define $(SPIDERMONKEY_OS)" > $(OS_HEADER) - -$(COPY_JSAUTOCFG_H): $(ORIG_JSAUTOCFG_H) - cp $(ORIG_JSAUTOCFG_H) $(COPY_JSAUTOCFG_H) - Modified: trunk/javascriptlint/jsparse.py =================================================================== --- trunk/javascriptlint/jsparse.py 2011-12-02 18:49:10 UTC (rev 303) +++ trunk/javascriptlint/jsparse.py 2013-09-28 03:06:19 UTC (rev 304) @@ -1,155 +1,20 @@ #!/usr/bin/python # vim: ts=4 sw=4 expandtab """ Parses a script into nodes. """ -import bisect import re import unittest -import spidermonkey -from spidermonkey import tok, op -from util import JSVersion +import jsengine.parser +from jsengine.parser import kind as tok +from jsengine.parser import op +from jsengine.structs import * -_tok_names = dict(zip( - [getattr(tok, prop) for prop in dir(tok)], - ['tok.%s' % prop for prop in dir(tok)] -)) -_op_names = dict(zip( - [getattr(op, prop) for prop in dir(op)], - ['op.%s' % prop for prop in dir(op)] -)) +from .util import JSVersion -NodePos = spidermonkey.NodePos - -class NodePositions: - " Given a string, allows [x] lookups for NodePos line and column numbers." - def __init__(self, text, start_pos=None): - # Find the length of each line and incrementally sum all of the lengths - # to determine the ending position of each line. - self._start_pos = start_pos - self._lines = text.splitlines(True) - lines = [0] + [len(x) for x in self._lines] - for x in range(1, len(lines)): - lines[x] += lines[x-1] - self._line_offsets = lines - def from_offset(self, offset): - line = bisect.bisect(self._line_offsets, offset)-1 - col = offset - self._line_offsets[line] - if self._start_pos: - if line == 0: - col += self._start_pos.col - line += self._start_pos.line - return NodePos(line, col) - def to_offset(self, pos): - pos = self._to_rel_pos(pos) - offset = self._line_offsets[pos.line] + pos.col - assert offset <= self._line_offsets[pos.line+1] # out-of-bounds col num - return offset - def text(self, start, end): - assert start <= end - start, end = self._to_rel_pos(start), self._to_rel_pos(end) - # Trim the ending first in case it's a single line. - lines = self._lines[start.line:end.line+1] - lines[-1] = lines[-1][:end.col+1] - lines[0] = lines[0][start.col:] - return ''.join(lines) - def _to_rel_pos(self, pos): - " converts a position to a position relative to self._start_pos " - if not self._start_pos: - return pos - line, col = pos.line, pos.col - line -= self._start_pos.line - if line == 0: - col -= self._start_pos.col - assert line >= 0 and col >= 0 # out-of-bounds node position - return NodePos(line, col) - -class NodeRanges: - def __init__(self): - self._offsets = [] - def add(self, start, end): - i = bisect.bisect_left(self._offsets, start) - if i % 2 == 1: - i -= 1 - start = self._offsets[i] - - end = end + 1 - j = bisect.bisect_left(self._offsets, end) - if j % 2 == 1: - end = self._offsets[j] - j += 1 - - self._offsets[i:j] = [start,end] - def has(self, pos): - return bisect.bisect_right(self._offsets, pos) % 2 == 1 - -class _Node: - def add_child(self, node): - if node: - node.node_index = len(self.kids) - node.parent = self - self.kids.append(node) - - def start_pos(self): - try: - return self._start_pos - except AttributeError: - self._start_pos = NodePos(self._start_line, self._start_col) - return self._start_pos - - def end_pos(self): - try: - return self._end_pos - except AttributeError: - self._end_pos = NodePos(self._end_line, self._end_col) - return self._end_pos - - def __str__(self): - kind = self.kind - if not kind: - kind = '(none)' - return '%s>%s' % (_tok_names[kind], str(self.kids)) - - def is_equivalent(self, other, are_functions_equiv=False): - if not other: - return False - - # Bail out for functions - if not are_functions_equiv: - if self.kind == tok.FUNCTION: - return False - if self.kind == tok.LP and self.opcode == op.CALL: - return False - - if self.kind != other.kind: - return False - if self.opcode != other.opcode: - return False - - # Check atoms on names, properties, and string constants - if self.kind in (tok.NAME, tok.DOT, tok.STRING) and self.atom != other.atom: - return False - - # Check values on numbers - if self.kind == tok.NUMBER and self.dval != other.dval: - return False - - # Compare child nodes - if len(self.kids) != len(other.kids): - return False - for i in range(0, len(self.kids)): - # Watch for dead nodes - if not self.kids[i]: - if not other.kids[i]: return True - else: return False - if not self.kids[i].is_equivalent(other.kids[i]): - return False - - return True - def isvalidversion(jsversion): if jsversion is None: return True - return spidermonkey.is_valid_version(jsversion.version) + return jsengine.parser.is_valid_version(jsversion.version) def findpossiblecomments(script, node_positions): pos = 0 @@ -168,31 +33,18 @@ comment_text = script[match.start():match.end()] if comment_text.startswith('/*'): comment_text = comment_text[2:-2] - opcode = 'JSOP_C_COMMENT' + opcode = op.C_COMMENT else: comment_text = comment_text[2:] - opcode = 'JSOP_CPP_COMMENT' - opcode = opcode[5:].lower() + opcode = op.CPP_COMMENT start_offset = match.start() end_offset = match.end()-1 start_pos = node_positions.from_offset(start_offset) end_pos = node_positions.from_offset(end_offset) - kwargs = { - 'kind': 'COMMENT', - 'atom': comment_text, - 'opcode': opcode, - '_start_line': start_pos.line, - '_start_col': start_pos.col, - '_end_line': end_pos.line, - '_end_col': end_pos.col, - 'parent': None, - 'kids': [], - 'node_index': None - } - comment_node = _Node() - comment_node.__dict__.update(kwargs) + comment_node = ParseNode(kind.COMMENT, opcode, start_pos, end_pos, + comment_text, []) comments.append(comment_node) # Start searching immediately after the start of the comment in case @@ -203,28 +55,23 @@ """ All node positions will be relative to startpos. This allows scripts to be embedded in a file (for example, HTML). """ - def _wrapped_callback(line, col, msg): - assert msg.startswith('JSMSG_') - msg = msg[6:].lower() - error_callback(line, col, msg) - startpos = startpos or NodePos(0,0) jsversion = jsversion or JSVersion.default() - assert isvalidversion(jsversion) - return spidermonkey.parse(script, jsversion.version, jsversion.e4x, - _Node, _wrapped_callback, - startpos.line, startpos.col) + assert isvalidversion(jsversion), jsversion + if jsversion.e4x: + error_callback(startpos.line, startpos.col, 'e4x_deprecated', {}) + return jsengine.parser.parse(script, jsversion.version, + error_callback, + startpos) def filtercomments(possible_comments, node_positions, root_node): comment_ignore_ranges = NodeRanges() def process(node): - if node.kind == tok.NUMBER: - node.atom = node_positions.text(node.start_pos(), node.end_pos()) - elif node.kind == tok.STRING or \ + if node.kind == tok.STRING or \ (node.kind == tok.OBJECT and node.opcode == op.REGEXP): start_offset = node_positions.to_offset(node.start_pos()) - end_offset = node_positions.to_offset(node.end_pos()) - 1 + end_offset = node_positions.to_offset(node.end_pos()) comment_ignore_ranges.add(start_offset, end_offset) for kid in node.kids: if kid: @@ -249,7 +96,7 @@ def is_compilable_unit(script, jsversion): jsversion = jsversion or JSVersion.default() assert isvalidversion(jsversion) - return spidermonkey.is_compilable_unit(script, jsversion.version, jsversion.e4x) + return jsengine.parser.is_compilable_unit(script, jsversion.version) def _dump_node(node, depth=0): if node is None: @@ -258,7 +105,7 @@ print else: print ' '*depth, - print '%s, %s' % (_tok_names[node.kind], _op_names[node.opcode]) + print '%s, %s' % (repr(node.kind), repr(node.opcode)) print ' '*depth, print '%s - %s' % (node.start_pos(), node.end_pos()) if hasattr(node, 'atom'): @@ -379,22 +226,21 @@ for text, expected in tests: encountered = is_compilable_unit(text, JSVersion.default()) self.assertEquals(encountered, expected) - # NOTE: This seems like a bug. - self.assert_(is_compilable_unit("/* test", JSVersion.default())) + self.assert_(not is_compilable_unit("/* test", JSVersion.default())) class TestLineOffset(unittest.TestCase): def testErrorPos(self): def geterror(script, startpos): errors = [] - def onerror(line, col, msg): - errors.append((line, col, msg)) + def onerror(line, col, msg, msg_args): + errors.append((line, col, msg, msg_args)) parse(script, None, onerror, startpos) self.assertEquals(len(errors), 1) return errors[0] - self.assertEquals(geterror(' ?', None), (0, 1, 'syntax_error')) - self.assertEquals(geterror('\n ?', None), (1, 1, 'syntax_error')) - self.assertEquals(geterror(' ?', NodePos(1,1)), (1, 2, 'syntax_error')) - self.assertEquals(geterror('\n ?', NodePos(1,1)), (2, 1, 'syntax_error')) + self.assertEquals(geterror(' ?', None), (0, 1, 'syntax_error', {})) + self.assertEquals(geterror('\n ?', None), (1, 1, 'syntax_error', {})) + self.assertEquals(geterror(' ?', NodePos(1,1)), (1, 2, 'syntax_error', {})) + self.assertEquals(geterror('\n ?', NodePos(1,1)), (2, 1, 'syntax_error', {})) def testNodePos(self): def getnodepos(script, startpos): root = parse(script, None, None, startpos) Modified: trunk/javascriptlint/lint.py =================================================================== --- trunk/javascriptlint/lint.py 2011-12-02 18:49:10 UTC (rev 303) +++ trunk/javascriptlint/lint.py 2013-09-28 03:06:19 UTC (rev 304) @@ -12,7 +12,8 @@ import unittest import util -from spidermonkey import tok, op +from jsengine.parser import kind as tok +from jsengine.parser import op _newline_kinds = ( 'eof', 'comma', 'dot', 'semi', 'colon', 'lc', 'rc', 'lp', 'rb', 'assign', @@ -96,6 +97,7 @@ def add_declaration(self, name, node, type_): assert type_ in ('arg', 'function', 'var'), \ 'Unrecognized identifier type: %s' % type_ + assert isinstance(name, basestring) self._identifiers[name] = { 'node': node, 'type': type_ @@ -339,10 +341,10 @@ def _lint_script_part(scriptpos, jsversion, script, script_cache, conf, ignores, report_native, report_lint, import_callback): - def parse_error(row, col, msg): + def parse_error(row, col, msg, msg_args): if not msg in ('anon_no_return_value', 'no_return_value', 'redeclared_var', 'var_hides_arg'): - parse_errors.append((jsparse.NodePos(row, col), msg)) + parse_errors.append((jsparse.NodePos(row, col), msg, msg_args)) def report(node, errname, pos=None, **errargs): if errname == 'empty_statement' and node.kind == tok.LC: @@ -411,8 +413,8 @@ root = jsparse.parse(script, jsversion, parse_error, scriptpos) if not root: # Report errors and quit. - for pos, msg in parse_errors: - report_native(pos, msg) + for pos, msg, msg_args in parse_errors: + report_native(pos, msg, msg_args) return comments = jsparse.filtercomments(possible_comments, node_positions, root) @@ -457,7 +459,7 @@ elif keyword == 'pass': passes.append(node) else: - if comment.opcode == 'c_comment': + if comment.opcode == op.C_COMMENT: # Look for nested C-style comments. nested_comment = comment.atom.find('/*') if nested_comment < 0 and comment.atom.endswith('/'): @@ -514,9 +516,9 @@ errdesc = warnings.format_error(errname, **errargs) _report(pos or node.start_pos(), errname, errdesc, True) - def report_native(pos, errname): - # TODO: Format the error. - _report(pos, errname, errname, False) + def report_native(pos, errname, errargs): + errdesc = warnings.format_error(errname, **errargs) + _report(pos, errname, errdesc, False) def _report(pos, errname, errdesc, require_key): try: @@ -581,7 +583,7 @@ if other and parent_scope == scope: # Only warn about duplications in this scope. # Other scopes will be checked later. - if other.kind == tok.FUNCTION and name in other.fn_args: + if other.kind == tok.NAME and other.opcode == op.ARGNAME: report(node, 'var_hides_arg', name=name) else: report(node, 'redeclared_var', name=name) @@ -612,7 +614,9 @@ _warn_or_declare(scopes[-1], node.fn_name, 'function', node, report) self._push_scope(node) for var_name in node.fn_args: - scopes[-1].add_declaration(var_name, node, 'arg') + if scopes[-1].get_identifier(var_name.atom): + report(var_name, 'duplicate_formal', name=var_name.atom) + scopes[-1].add_declaration(var_name.atom, var_name, 'arg') @visitation.visit('push', tok.LEXICALSCOPE, tok.WITH) def _push_scope(self, node): Deleted: trunk/javascriptlint/spidermonkey.py =================================================================== --- trunk/javascriptlint/spidermonkey.py 2011-12-02 18:49:10 UTC (rev 303) +++ trunk/javascriptlint/spidermonkey.py 2013-09-28 03:06:19 UTC (rev 304) @@ -1,10 +0,0 @@ -# vim: ts=4 sw=4 expandtab - -# This is a wrapper script to make it easier for development. It tries to -# import the development version first, and if that fails, it goes after the -# real version. -try: - from pyspidermonkey_ import * -except ImportError: - from pyspidermonkey import * - Modified: trunk/javascriptlint/warnings.py =================================================================== --- trunk/javascriptlint/warnings.py 2011-12-02 18:49:10 UTC (rev 303) +++ trunk/javascriptlint/warnings.py 2013-09-28 03:06:19 UTC (rev 304) @@ -21,9 +21,10 @@ import util import visitation -from spidermonkey import tok, op +from jsengine.parser import kind as tok +from jsengine.parser import op -_ALL_TOKENS = tuple(filter(lambda x: x != tok.EOF, tok.__dict__.values())) +_ALL_TOKENS = tok.__dict__.values() def _get_assigned_lambda(node): """ Given a node "x = function() {}", returns "function() {}". @@ -53,6 +54,7 @@ 'use_of_label': 'use of label', 'misplaced_regex': 'regular expressions should be preceded by a left parenthesis, assignment, colon, or comma', 'assign_to_function_call': 'assignment to a function call', + 'equal_as_assign': 'test for equality (==) mistyped as assignment (=)?', 'ambiguous_else_stmt': 'the else statement could be matched with one of multiple if statements (use curly braces to indicate intent', 'block_without_braces': 'block statement without curly braces', 'ambiguous_nested_stmt': 'block statements containing block statements should use curly braces to resolve ambiguity', @@ -70,6 +72,7 @@ 'leading_decimal_point': 'leading decimal point may indicate a number or an object member', 'trailing_decimal_point': 'trailing decimal point may indicate a number or an object member', 'octal_number': 'leading zeros make an octal number', + 'trailing_comma': 'extra comma is not recommended in object initializers', 'trailing_comma_in_array': 'extra comma is not recommended in array initializers', 'useless_quotes': 'the quotation marks are unnecessary', 'mismatch_ctrl_comments': 'mismatched control comment; "ignore" and "end" control comments must have a one-to-one correspondence', @@ -99,8 +102,20 @@ 'incorrect_version': 'Expected /*jsl:content-type*/ control comment. The script was parsed with the wrong version.', } +errors = { + 'e4x_deprecated': 'e4x is deprecated', + 'semi_before_stmnt': 'missing semicolon before statement', + 'syntax_error': 'syntax error', + 'expected_tok': 'expected token: {token}', + 'unexpected_char': 'unexpected character: {char}', +} + def format_error(errname, **errargs): - errdesc = warnings[errname] + if errname in errors: + errdesc = errors[errname] + else: + errdesc = warnings[errname] + try: errdesc = re.sub(r"{(\w+)}", lambda match: errargs[match.group(1)], errdesc) except (TypeError, KeyError): @@ -295,9 +310,18 @@ @lookfor(tok.ASSIGN) def assign_to_function_call(node): - if node.kids[0].kind == tok.LP: + kid = node.kids[0] + # Unpack parens. + while kid.kind == tok.RP: + kid, = kid.kids + if kid.kind == tok.LP: raise LintWarning, node +@lookfor(tok.ASSIGN) +def equal_as_assign(node): + if not node.parent.kind in (tok.SEMI, tok.RESERVED, tok.RP, tok.COMMA): + raise LintWarning, node + @lookfor(tok.IF) def ambiguous_else_stmt(node): # Only examine this node if it has an else statement. @@ -492,6 +516,11 @@ if _octal_regexp.match(node.atom): raise LintWarning, node +@lookfor(tok.RC) +def trailing_comma(node): + if node.end_comma: + raise LintWarning, node + @lookfor(tok.RB) def trailing_comma_in_array(node): if node.end_comma: Index: trunk/jsengine =================================================================== --- trunk/jsengine 2011-12-02 18:49:10 UTC (rev 303) +++ trunk/jsengine 2013-09-28 03:06:19 UTC (rev 304) Property changes on: trunk/jsengine ___________________________________________________________________ Added: svn:ignore ## -0,0 +1 ## +*.pyc Added: trunk/jsengine/__init__.py =================================================================== --- trunk/jsengine/__init__.py (rev 0) +++ trunk/jsengine/__init__.py 2013-09-28 03:06:19 UTC (rev 304) @@ -0,0 +1,21 @@ +# vim: sw=4 ts=4 et + +_MESSAGES = ( + 'eof', + 'semi_before_stmnt', + 'syntax_error', + 'unterminated_comment', + 'expected_tok', + 'unexpected_char', +) + +class JSSyntaxError(BaseException): + def __init__(self, pos, msg, msg_args=None): + assert msg in _MESSAGES, msg + self.pos = pos + self.msg = msg + self.msg_args = msg_args or {} + def __unicode__(self): + return '%s: %s' % (self.pos, self.msg) + def __repr__(self): + return 'JSSyntaxError(%r, %r, %r)' % (self.pos, self.msg. self.msg_args) Added: trunk/jsengine/parser/__init__.py =================================================================== --- trunk/jsengine/parser/__init__.py (rev 0) +++ trunk/jsengine/parser/__init__.py 2013-09-28 03:06:19 UTC (rev 304) @@ -0,0 +1,924 @@ +# vim: sw=4 ts=4 et +import unittest + +from jsengine.tokenizer import tok +from jsengine import tokenizer + +from jsengine import JSSyntaxError +from _constants_kind import kind +from _constants_op import op + +from jsengine.structs import * + +_VERSIONS = [ + "default", + "1.0", + "1.1", + "1.2", + "1.3", + "1.4", + "1.5", + "1.6", + "1.7", +] + +def _auto_semicolon(t, kind_, op_, startpos, endpos, atom, kids): + nosemi = False + if t.peek_sameline().tok not in (tok.EOF, tok.EOL, tok.RBRACE): + x = t.advance() + if x.tok != tok.SEMI: + raise JSSyntaxError(x.startpos, 'semi_before_stmnt') + endpos = x.endpos + else: + nosemi = True + return ParseNode(kind_, op_, startpos, endpos, atom, kids, nosemi) + +def _function_arglist(t): + fn_args = [] + if t.peek().tok != tok.RPAREN: + while True: + x = t.expect(tok.NAME) + fn_args.append(ParseNode(kind.NAME, op.ARGNAME, + x.startpos, + x.endpos, x.atom, [])) + if t.peek().tok == tok.COMMA: + t.advance() + else: + break + return fn_args + +def _primary_expression(t): + x = t.next_withregexp() + if x.tok == tok.THIS: + return ParseNode(kind.PRIMARY, op.THIS, x.startpos, x.endpos, None, []) + elif x.tok == tok.NAME: + return ParseNode(kind.NAME, op.NAME, x.startpos, x.endpos, x.atom, [None]) + elif x.tok == tok.NULL: + return ParseNode(kind.PRIMARY, op.NULL, x.startpos, x.endpos, None, []) + elif x.tok == tok.TRUE: + return ParseNode(kind.PRIMARY, op.TRUE, x.startpos, x.endpos, None, []) + elif x.tok == tok.FALSE: + return ParseNode(kind.PRIMARY, op.FALSE, x.startpos, x.endpos, None, []) + elif x.tok == tok.STRING: + return ParseNode(kind.STRING, op.STRING, x.startpos, x.endpos, x.atom, []) + elif x.tok == tok.REGEXP: + return ParseNode(kind.OBJECT, op.REGEXP, x.startpos, x.endpos, None, []) + elif x.tok == tok.NUMBER: + return ParseNode(kind.NUMBER, None, x.startpos, x.endpos, x.atom, []) + elif x.tok == tok.LBRACKET: + startpos = x.startpos + items = [] + end_comma = None + if t.peek().tok != tok.RBRACKET: + while True: + # Conditionally add a value. If it isn't followed by a comma, + # quit in order to force an RBRACKET. + if t.peek().tok == tok.COMMA: + items.append(None) + else: + items.append(_assignment_expression(t, True)) + if not t.peek().tok == tok.COMMA: + break + + # Expect a comma and use it if the value was missing. + x = t.expect(tok.COMMA) + comma = ParseNode(kind.COMMA, None, + x.startpos, x.endpos, None, []) + items[-1] = items[-1] or comma + + # Check for the end. + if t.peek().tok == tok.RBRACKET: + end_comma = comma + break + endpos = t.expect(tok.RBRACKET).endpos + return ParseNode(kind.RB, None, startpos, endpos, None, items, + end_comma=end_comma) + elif x.tok == tok.LBRACE: + startpos = x.startpos + kids = [] + # TODO: get/set + end_comma = None + while True: + x = t.peek() + if x.tok == tok.RBRACE: + break + elif x.tok == tok.STRING: + t.expect(tok.STRING) + key = ParseNode(kind.STRING, None, x.startpos, + x.endpos, x.atom, []) + elif x.tok == tok.NUMBER: + t.expect(tok.NUMBER) + key = ParseNode(kind.NUMBER, None, x.startpos, + x.endpos, x.atom, []) + else: + x = t.expect_identifiername() + key = ParseNode(kind.NAME, None, x.startpos, x.endpos, + x.atom, []) + t.expect(tok.COLON) + value = _assignment_expression(t, True) + kids.append(ParseNode(kind.COLON, None, key.startpos, + value.endpos, None, [key, value])) + if t.peek().tok == tok.COMMA: + x = t.advance() + end_comma = ParseNode(kind.COMMA, None, + x.startpos, x.endpos, None, []) + else: + end_comma = None + break + endpos = t.expect(tok.RBRACE).endpos + return ParseNode(kind.RC, None, startpos, endpos, None, kids, + end_comma=end_comma) + elif x.tok == tok.LPAREN: + startpos = x.startpos + kid = _expression(t, True) + endpos = t.expect(tok.RPAREN).endpos + return ParseNode(kind.RP, None, startpos, endpos, None, [kid]) + else: + raise JSSyntaxError(x.startpos, 'syntax_error') + +def _function_declaration(t, named_opcode): + node = _function_expression(t, named_opcode) + + # Convert anonymous functions in expressions. + if node.opcode == op.ANONFUNOBJ: + node = _auto_semicolon(t, kind.SEMI, None, node.startpos, node.endpos, + None, [node]) + return node + + +def _function_expression(t, named_opcode): + startpos = t.expect(tok.FUNCTION).startpos + if t.peek().tok == tok.NAME: + fn_name = t.expect(tok.NAME).atom + opcode = named_opcode + else: + fn_name = None + opcode = op.ANONFUNOBJ + t.expect(tok.LPAREN) + fn_args = _function_arglist(t) + t.expect(tok.RPAREN) + fn_body_startpos = t.expect(tok.LBRACE).startpos + kids = _sourceelements(t, tok.RBRACE) + fn_body_endpos = t.expect(tok.RBRACE).endpos + fn_body = ParseNode(kind.LC, None, fn_body_startpos, + fn_body_endpos, None, kids) + return ParseNode(kind.FUNCTION, + op.ANONFUNOBJ if fn_name is None else op.NAMEDFUNOBJ, + startpos, fn_body.endpos, + fn_name, [fn_body], fn_args=fn_args) + +def _argument_list(t): + args = [] + if t.peek().tok != tok.RPAREN: + while True: + args.append(_assignment_expression(t, True)) + if t.peek().tok == tok.COMMA: + t.advance() + else: + break + return args + +def _new_expression(t): + startpos = t.expect(tok.NEW).startpos + expr = _member_expression(t) + # If no (), this is a variant of the NewExpression + if t.peek().tok == tok.LPAREN: + t.expect(tok.LPAREN) + args = _argument_list(t) + endpos = t.expect(tok.RPAREN).endpos + else: + args = [] + endpos = expr.endpos + return ParseNode(kind.NEW, op.NEW, startpos, endpos, + None, [expr] + args) + +def _member_expression(t, _recurse=True): + x = t.peek() + if x.tok == tok.NEW: + kid = _new_expression(t) + elif x.tok == tok.FUNCTION: + kid = _function_expression(t, op.NAMEDFUNOBJ) + else: + kid = _primary_expression(t) + + while True: + if t.peek().tok == tok.LBRACKET: + t.advance() + expr = _expression(t, True) + endpos = t.expect(tok.RBRACKET).endpos + kid = ParseNode(kind.LB, op.GETELEM, kid.startpos, endpos, + None, [kid, expr]) + elif t.peek().tok == tok.DOT: + t.advance() + expr = t.expect_identifiername() + kid = ParseNode(kind.DOT, op.GETPROP, kid.startpos, expr.endpos, + expr.atom, [kid]) + else: + return kid + +def _call_expression(t): + expr = _member_expression(t) + if t.peek().tok != tok.LPAREN: + return expr + + while True: + x = t.peek() + if x.tok == tok.LPAREN: + t.expect(tok.LPAREN) + args = _argument_list(t) + endpos = t.expect(tok.RPAREN).endpos + expr = ParseNode(kind.LP, op.CALL, expr.startpos, + endpos, None, [expr] + args) + elif x.tok == tok.LBRACKET: + t.expect(tok.LBRACKET) + lookup = _expression(t, True) + endpos = t.expect(tok.RBRACKET).endpos + expr = ParseNode(kind.LB, op.GETELEM, + expr.startpos, endpos, + None, [expr, lookup]) + elif x.tok == tok.DOT: + t.expect(tok.DOT) + lookup = t.expect_identifiername() + expr = ParseNode(kind.DOT, op.GETPROP, + expr.startpos, lookup.endpos, + lookup.atom, [expr]) + else: + return expr + +def _lefthandside_expression(t): + kid = _call_expression(t) + kid._lefthandside = True + return kid + +def _postfix_expression(t): + kid = _lefthandside_expression(t) + if t.peek_sameline().tok == tok.INC: + endpos = t.expect(tok.INC).endpos + if kid.kind == kind.DOT and kid.opcode == op.GETPROP: + opcode = op.PROPINC + else: + opcode = op.NAMEINC + return ParseNode(kind.INC, opcode, + kid.startpos, endpos, None, [kid]) + elif t.peek_sameline().tok == tok.DEC: + endpos = t.expect(tok.DEC).endpos + return ParseNode(kind.DEC, op.NAMEDEC, + kid.startpos, endpos, None, [kid]) + else: + return kid + +_UNARY = { + tok.DELETE: (kind.DELETE, None), + tok.VOID: (kind.UNARYOP, op.VOID), + tok.TYPEOF: (kind.UNARYOP, op.TYPEOF), + tok.INC: (kind.INC, op.INCNAME), + tok.DEC: (kind.DEC, op.DECNAME), + tok.ADD: (kind.UNARYOP, op.POS), + tok.SUB: (kind.UNARYOP, op.NEG), + tok.BIT_NOT: (kind.UNARYOP, op.BITNOT), + tok.LOGICAL_NOT: (kind.UNARYOP, op.NOT), +} +def _unary_expression(t): + x = t.peek() + if x.tok in _UNARY: + kind_, op_ = _UNARY[x.tok] + startpos = t.advance().startpos + kid = _unary_expression(t) + return ParseNode(kind_, op_, startpos, kid.endpos, None, [kid]) + else: + return _postfix_expression(t) + +def _binary_expression(t, dict_, child_expr_callback): + expr = child_expr_callback(t) + while True: + x = t.peek() + try: + kind_, op_ = dict_[x.tok] + except KeyError: + return expr + + kids = [expr] + while t.peek().tok == x.tok: + t.advance() + kids.append(child_expr_callback(t)) + expr = ParseNode(kind_, op_, + kids[0].startpos, kids[1].endpos, + None, kids) + +_MULTIPLICATIVE = { + tok.MUL: (kind.STAR, op.MUL), + tok.DIV: (kind.DIVOP, op.DIV), + tok.MOD: (kind.DIVOP, op.MOD), +} +def _multiplicative_expression(t): + return _binary_expression(t, _MULTIPLICATIVE, _unary_expression) + +_ADDITIVE = { + tok.ADD: (kind.PLUS, op.ADD), + tok.SUB: (kind.MINUS, op.SUB), +} +def _additive_expression(t): + return _binary_expression(t, _ADDITIVE, + _multiplicative_expression) + +_SHIFT = { + tok.LSHIFT: (kind.SHOP, op.LSH), + tok.RSHIFT: (kind.SHOP, op.RSH), + tok.URSHIFT: (kind.SHOP, op.URSH), +} +def _shift_expression(t): + return _binary_expression(t, _SHIFT, + _additive_expression) + +_RELATIONAL_NOIN = { + tok.LT: (kind.RELOP, op.LT), + tok.GT: (kind.RELOP, op.GT), + tok.LE: (kind.RELOP, op.LE), + tok.GE: (kind.RELOP, op.GE), + tok.INSTANCEOF: (kind.INSTANCEOF, op.INSTANCEOF), +} +_RELATIONAL_IN = dict(_RELATIONAL_NOIN) +_RELATIONAL_IN.update({ + tok.IN: (kind.IN, op.IN), +}) +def _relational_expression(t, allowin): + return _binary_expression(t, _RELATIONAL_IN if allowin else _RELATIONAL_NOIN, + _shift_expression) + +_EQUALITY = { + tok.EQ: (kind.EQOP, op.EQ), + tok.NE: (kind.EQOP, op.NE), + tok.EQ_STRICT: (kind.EQOP, op.NEW_EQ), + tok.NE_STRICT: (kind.EQOP, op.NEW_NE), +} +def _equality_expression(t, allowin): + return _binary_expression(t, _EQUALITY, + lambda t: _relational_expression(t, allowin)) + +def _bitwise_and_expression(t, allowin): + left = _equality_expression(t, allowin) + while t.peek().tok == tok.BIT_AND: + t.advance() + right = _equality_expression(t, allowin) + left = ParseNode(kind.BITAND, op.BITAND, + left.startpos, right.endpos, + None, [left, right]) + return left + +def _bitwise_xor_expression(t, allowin): + left = _bitwise_and_expression(t, allowin) + while t.peek().tok == tok.BIT_XOR: + t.advance() + right = _bitwise_and_expression(t, allowin) + left = ParseNode(kind.BITXOR, op.BITXOR, + left.startpos, right.endpos, + None, [left, right]) + return left + +def _bitwise_or_expression(t, allowin): + left = _bitwise_xor_expression(t, allowin) + while t.peek().tok == tok.BIT_OR: + t.advance() + right = _bitwise_xor_expression(t, allowin) + left = ParseNode(kind.BITOR, op.BITOR, + left.startpos, right.endpos, + None, [left, right]) + return left + +def _logical_and_expression(t, allowin): + exprs = [] + while True: + exprs.append(_bitwise_or_expression(t, allowin)) + if t.peek().tok == tok.LOGICAL_AND: + t.expect(tok.LOGICAL_AND) + else: + break + + while len(exprs) > 1: + right = exprs.pop() + left = exprs[-1] + exprs[-1] = ParseNode(kind.AND, op.AND, + left.startpos, right.endpos, + None, [left, right]) + return exprs[0] + +def _logical_or_expression(t, allowin): + exprs = [] + while True: + exprs.append(_logical_and_expression(t, allowin)) + if t.peek().tok == tok.LOGICAL_OR: + t.expect(tok.LOGICAL_OR) + else: + break + + while len(exprs) > 1: + right = exprs.pop() + left = exprs[-1] + exprs[-1] = ParseNode(kind.OR, op.OR, + left.startpos, right.endpos, + None, [left, right]) + return exprs[0] + +def _conditional_expression(t, allowin): + kid = _logical_or_expression(t, allowin) + if t.peek().tok == tok.QUESTION: + t.expect(tok.QUESTION) + if_ = _assignment_expression(t, True) + t.expect(tok.COLON) + else_ = _assignment_expression(t, allowin) + return ParseNode(kind.HOOK, None, + kid.startpos, else_.endpos, + None, [kid, if_, else_]) + else: + return kid + +_ASSIGNS = { + tok.ASSIGN: (kind.ASSIGN, None), + tok.ASSIGN_URSHIFT: (kind.ASSIGN, op.URSH), + tok.ASSIGN_LSHIFT: (kind.ASSIGN, op.LSH), + tok.ASSIGN_RSHIFT: (kind.ASSIGN, op.RSH), + tok.ASSIGN_ADD: (kind.ASSIGN, op.ADD), + tok.ASSIGN_SUB: (kind.ASSIGN, op.SUB), + tok.ASSIGN_MUL: (kind.ASSIGN, op.MUL), + tok.ASSIGN_MOD: (kind.ASSIGN, op.MOD), + tok.ASSIGN_BIT_AND: (kind.ASSIGN, op.BITAND), + tok.ASSIGN_BIT_OR: (kind.ASSIGN, op.BITOR), + tok.ASSIGN_BIT_XOR: (kind.ASSIGN, op.BITXOR), + tok.ASSIGN_DIV: (kind.ASSIGN, op.DIV), +} +def _assignment_expression(t, allowin): + left = _conditional_expression(t, allowin) + if t.peek().tok in _ASSIGNS: + kid = left + while kid.kind == kind.RP: + kid, = kid.kids + if kid.kind == kind.NAME: + assert kid.opcode == op.NAME + kid.opcode = op.SETNAME + elif kid.kind == kind.DOT: + assert kid.opcode == op.GETPROP, left.op + kid.opcode = op.SETPROP + elif kid.kind == kind.LB: + assert kid.opcode == op.GETELEM + kid.opcode = op.SETELEM + elif kid.kind == kind.LP: + assert kid.opcode == op.CALL + kid.opcode = op.SETCALL + else: + raise JSSyntaxError(left.startpos, 'invalid_assign') + kind_, op_ = _ASSIGNS[t.peek().tok] + t.advance() + right = _assignment_expression(t, allowin) + return ParseNode(kind_, op_, + left.startpos, right.endpos, None, [left, right]) + else: + return left + +def _expression(t, allowin): + items = [] + items.append(_assignment_expression(t, allowin)) + while t.peek().tok == tok.COMMA: + t.advance() + items.append(_assignment_expression(t, allowin)) + if len(items) > 1: + return ParseNode(kind.COMMA, None, items[0].startpos, + items[-1].endpos, None, items) + else: + return items[0] + +def _variable_declaration(t, allowin): + nodes = [] + while True: + x = t.expect(tok.NAME) + value = None + if t.peek().tok == tok.ASSIGN: + t.advance() + value = _assignment_expression(t, allowin) + nodes.append(ParseNode(kind.NAME, op.SETNAME if value else op.NAME, + x.startpos, + value.endpos if value else x.endpos, + x.atom, [value])) + + if t.peek().tok == tok.COMMA: + t.advance() + else: + return nodes + +def _block_statement(t): + kids = [] + startpos = t.expect(tok.LBRACE).startpos + while t.peek().tok != tok.RBRACE: + kids.append(_statement(t)) + endpos = t.expect(tok.RBRACE).endpos + return ParseNode(kind.LC, None, startpos, endpos, None, kids) + +def _empty_statement(t): + # EMPTY STATEMENT + x = t.expect(tok.SEMI) + return ParseNode(kind.SEMI, None, x.startpos, x.endpos, None, [None]) + +def _var_statement(t): + # VARIABLE STATEMENT + startpos = t.expect(tok.VAR).startpos + nodes = _variable_declaration(t, True) + return _auto_semicolon(t, kind.VAR, op.DEFVAR, + startpos, nodes[-1].endpos, None, nodes) + +def _if_statement(t): + # IF STATEMENT + startpos = t.expect(tok.IF).startpos + t.expect(tok.LPAREN) + condition = _expression(t, True) + t.expect(tok.RPAREN) + if_body = _statement(t) + if t.peek().tok == tok.ELSE: + t.advance() + else_body = _statement(t) + else: + else_body = None + endpos = else_body.endpos if else_body else if_body.endpos + return ParseNode(kind.IF, None, startpos, + endpos, None, [condition, if_body, else_body]) + +def _do_statement(t): + startpos = t.expect(tok.DO).startpos + code = _statement(t) + t.expect(tok.WHILE) + t.expect(tok.LPAREN) + expr = _expression(t, True) + endtoken = t.expect(tok.RPAREN) + return _auto_semicolon(t, kind.DO, None, + startpos, endtoken.endpos, None, [code, expr]) + +def _while_statement(t): + startpos = t.expect(tok.WHILE).startpos + t.expect(tok.LPAREN) + expr = _expression(t, True) + t.expect(tok.RPAREN) + code = _statement(t) + return ParseNode(kind.WHILE, None, + startpos, code.endpos, None, [expr, code]) + +def _for_statement(t): + for_startpos = t.expect(tok.FOR).startpos + t.expect(tok.LPAREN) + + for_exprs = [] + if t.peek().tok == tok.VAR: + var_startpos = t.advance().startpos + kids = _variable_declaration(t, False) + vars = ParseNode(kind.VAR, op.DEFVAR, var_startpos, kids[-1].endpos, + None, kids) + + if t.peek().tok == tok.IN: + t.advance() + in_ = _expression(t, True) + for_exprs = [vars, in_] + else: + for_exprs = [vars, None, None] + else: + if t.peek().tok != tok.SEMI: + expr = _expression(t, False) + else: + expr = None + + if t.peek().tok == tok.IN: + t.advance() + vars = expr + in_ = _expression(t, True) + for_exprs = [vars, in_] + else: + for_exprs = [expr, None, None] + + if len(for_exprs) == 2: + condition = ParseNode(kind.IN, None, for_exprs[0].startpos, + for_exprs[-1].endpos, None, for_exprs) + else: + x = t.expect(tok.SEMI) + if t.peek().tok != tok.SEMI: + for_exprs[1] = _expression(t, True) + t.expect(tok.SEMI) + if t.peek().tok != tok.RPAREN: + for_exprs[2] = _expression(t, True) + condition = ParseNode(kind.RESERVED, None, None, None, + None, for_exprs) + + t.expect(tok.RPAREN) + body = _statement(t) + return ParseNode(kind.FOR, + op.FORIN if condition.kind == kind.IN else None, + for_startpos, body.endpos, + None, [condition, body]) + +def _continue_statement(t): + endtoken = t.expect(tok.CONTINUE) + startpos = endtoken.startpos + + if t.peek_sameline().tok == tok.NAME: + endtoken = t.expect(tok.NAME) + name = endtoken.atom + else: + name = None + # TODO: Validate Scope Labels + return _auto_semicolon(t, kind.CONTINUE, None, startpos, endtoken.endpos, name, []) + +def _break_statement(t): + endtoken = t.expect(tok.BREAK) + startpos = endtoken.startpos + + if t.peek_sameline().tok == tok.NAME: + endtoken = t.expect(tok.NAME) + name = endtoken.atom + else: + name = None + # TODO: Validate Scope Labels + return _auto_semicolon(t, kind.BREAK, None, startpos, endtoken.endpos, name, []) + +def _return_statement(t): + endtoken = t.expect(tok.RETURN) + startpos = endtoken.startpos + + if t.peek_sameline().tok not in (tok.EOF, tok.EOL, tok.SEMI): + expr = _expression(t, True) + endtoken = expr + else: + expr = None + # TODO: Validate Scope Labels + return _auto_semicolon(t, kind.RETURN, None, startpos, endtoken.endpos, + None, [expr]) + +def _with_statement(t): + startpos = t.expect(tok.WITH).startpos + t.expect(tok.LPAREN) + expr = _expression(t, True) + t.expect(tok.RPAREN) + body = _statement(t) + return ParseNode(kind.WITH, None, startpos, body.endpos, None, [expr, body]) + +def _switch_statement(t): + switch_startpos = t.expect(tok.SWITCH).startpos + t.expect(tok.LPAREN) + expr = _expression(t, True) + t.expect(tok.RPAREN) + lc_startpos = t.expect(tok.LBRACE).startpos + cases = [] + while t.peek().tok != tok.RBRACE: + case_kind = None + case_expr = None + if t.peek().tok == tok.CASE: + case_startpos = t.advance().startpos + case_kind = kind.CASE + case_expr = _expression(t, True) + elif t.peek().tok == tok.DEFAULT: + case_startpos = t.advance().startpos + case_kind = kind.DEFAULT + else: + raise JSSyntaxError(t.peek().startpos, 'invalid_case') + + case_endpos = t.expect(tok.COLON).endpos + + statements = [] + while t.peek().tok not in (tok.DEFAULT, tok.CASE, tok.RBRACE): + statements.append(_statement(t)) + if statements: + statements_startpos = statements[0].startpos + statements_endpos = statements[-1].endpos + case_endpos = statements[-1].endpos + else: + statements_startpos = case_endpos + statements_endpos = case_endpos + + cases.append(ParseNode(case_kind, None, case_startpos, case_endpos, + None, [ + case_expr, + ParseNode(kind.LC, None, statements_startpos, + statements_endpos, None, statements) + ])) + + rc_endpos = t.expect(tok.RBRACE).endpos + return ParseNode(kind.SWITCH, None, switch_startpos, rc_endpos, + None, [expr, + ParseNode(kind.LC, None, lc_startpos, rc_endpos, None, cases)]) + +def _throw_statement(t): + # TODO: Validate Scope + startpos = t.expect(tok.THROW).startpos + if t.peek_sameline().tok == tok.EOL: + raise JSSyntaxError(t.peek_sameline().startpos, 'expected_statement') + expr = _expression(t, True) + return _auto_semicolon(t, kind.THROW, op.THROW, startpos, expr.endpos, + None, [expr]) + +def _try_statement(t): + try_startpos = t.expect(tok.TRY).startpos + + try_node = _block_statement(t) + catch_node = None + finally_node = None + try_endpos = None + + if t.peek().tok == tok.CATCH: + catch_startpos = t.advance().startpos + t.expect(tok.LPAREN) + x = t.expect(tok.NAME) + catch_expr = ParseNode(kind.NAME, None, x.startpos, x.endpos, + x.atom, [None]) + t.expect(tok.RPAREN) + catch_block = _block_statement(t) + catch_endpos = catch_block.endpos + catch_node = \ + ParseNode(kind.RESERVED, None, None, None, None, [ + ParseNode(kind.LEXICALSCOPE, op.LEAVEBLOCK, + catch_startpos, catch_endpos, None, [ + ParseNode(kind.CATCH, None, catch_startpos, + catch_endpos, None, + [catch_expr, None, catch_block]) + ]) + ]) + try_endpos = catch_endpos + + if t.peek().tok == tok.FINALLY: + t.advance() + finally_node = _block_statement(t) + try_endpos = finally_node.endpos + + if not catch_node and not finally_node: + raise JSSyntaxError(try_endpos, 'invalid_catch') + + return ParseNode(kind.TRY, None, try_startpos, try_endpos, + None, + [try_node, catch_node, finally_node]) + +def _statement(t): + # TODO: Labelled Statement + x = t.peek() + if x.tok == tok.LBRACE: + return _block_statement(t) + elif x.tok == tok.SEMI: + return _empty_statement(t) + elif x.tok == tok.VAR: + return _var_statement(t) + elif x.tok == tok.IF: + return _if_statement(t) + elif x.tok == tok.DO: + return _do_statement(t) + elif x.tok == tok.WHILE: + return _while_statement(t) + elif x.tok == tok.FOR: + return _for_statement(t) + elif x.tok == tok.CONTINUE: + return _continue_statement(t) + elif x.tok == tok.BREAK: + return _break_statement(t) + elif x.tok == tok.RETURN: + return _return_statement(t) + elif x.tok == tok.WITH: + return _with_statement(t) + elif x.tok == tok.SWITCH: + return _switch_statement(t) + elif x.tok == tok.THROW: + return _throw_statement(t) + elif x.tok == tok.TRY: + return _try_statement(t) + elif x.tok == tok.EOF: + raise JSSyntaxError(x.startpos, 'eof') + elif x.tok == tok.FUNCTION: + return _function_declaration(t, op.CLOSURE) #TODO: warn, since this is not reliable + + elif x.tok not in (tok.LBRACE, tok.FUNCTION): + expr = _expression(t, True) + if expr.kind == tok.NAME and t.peek().tok == tok.COLON: + t.expect(tok.COLON) + stmt = _statement(t) + return ParseNode(kind.COLON, op.NAME, expr.startpos, + stmt.endpos, expr.atom, [stmt]) + + return _auto_semicolon(t, kind.SEMI, None, expr.startpos, expr.endpos, + None, [expr]) + else: + raise JSSyntaxError(x.startpos, 'syntax_error') + +def _sourceelements(t, end_tok): + nodes = [] + while True: + x = t.peek() + if x.tok == tok.FUNCTION: + nodes.append(_function_declaration(t, None)) + elif x.tok == end_tok: + return nodes + else: + nodes.append(_statement(t)) + +def parsestring(s, startpos=None): + stream = tokenizer.TokenStream(s, startpos) + t = tokenizer.Tokenizer(stream) + nodes = _sourceelements(t, tok.EOF) + lc_endpos = t.expect(tok.EOF).endpos + lc_startpos = nodes[-1].startpos if nodes else lc_endpos + return ParseNode(kind.LC, None, lc_startpos, lc_endpos, None, nodes) + +def is_valid_version(version): + return version in _VERSIONS + +def _validate(node, depth=0): + for kid in node.kids: + if kid: + assert kid.parent is node + _validate(kid, depth+1) + +def parse(script, jsversion, + error_callback, startpos): + # TODO: respect version + assert is_valid_version(jsversion) + try: + root = parsestring(script, startpos) + except JSSyntaxError as error: + error_callback(error.pos.line, error.pos.col, error.msg, error.msg_args) + return None + _validate(root) + return root + +def is_compilable_unit(script, jsversion): + # TODO: respect version + assert is_valid_version(jsversion) + try: + parsestring(script) + except JSSyntaxError as error: + return error.msg not in ('eof', 'unterminated_comment') + return True + +class TestParser(unittest.TestCase): + def testCompilableUnit(self): + self.assert_(is_compilable_unit('', 'default')) + self.assert_(is_compilable_unit('/**/', 'default')) + self.assert_(not is_compilable_unit('/*', 'default')) + def testUnterminatedComment(self): + try: + parsestring('/*') + except JSSyntaxError as error: + self.assertEqual(error.pos, NodePos(0,1)) + else: + self.assert_(False) + def testObjectEndComma(self): + root = parsestring('a={a:1,}') + node, = root.kids + self.assertEquals(node.kind, kind.SEMI) + node, = node.kids + self.assertEquals(node.kind, kind.ASSIGN) + left, right = node.kids + self.assertEquals(left.atom, 'a') + self.assertEquals(right.kind, kind.RC) + node = right.end_comma + self.assertEquals(node.kind, tok.COMMA) + self.assertEquals(node.startpos, NodePos(0, 6)) + self.assertEquals(node.endpos, NodePos(0, 6)) + def _testArrayEndComma(self, script, col): + root = parsestring(script) + node, = root.kids + self.assertEquals(node.kind, kind.SEMI) + node, = node.kids + self.assertEquals(node.kind, kind.ASSIGN) + left, right = node.kids + self.assertEquals(left.atom, 'a') + self.assertEquals(right.kind, kind.RB) + node = right.end_comma + self.assertEquals(node is None, col is None) + if col is None: + self.assert_(node is None) + else: + self.assertEquals(node.kind, tok.COMMA) + self.assertEquals(node.startpos, NodePos(0, col)) + self.assertEquals(node.endpos, NodePos(0, col)) + def testArrayEndComma(self): + self._testArrayEndComma('a=[,]', 3) + self._testArrayEndComma('a=[a,]', 4) + self._testArrayEndComma('a=[a,b,c]', None) + def _testArrayCommas(self, script, items, end_comma): + root = parsestring(script) + node, = root.kids + self.assertEquals(node.kind, kind.SEMI) + node, = node.kids + self.assertEquals(node.kind, kind.ASSIGN) + left, right = node.kids + self.assertEquals(left.atom, 'a') + self.assertEquals(right.kind, kind.RB) + node = right + self.assertEquals(len(node.kids), len(items)) + for kid, item in zip(node.kids, items): + self.assertEquals(kid.atom, item) + self.assertEquals(bool(node.end_comma), end_comma) + def testArrayCommas(self): + self._testArrayCommas('a=[]', [], False) + self._testArrayCommas('a=[,]', [None], True) + self._testArrayCommas('a=[,,]', [None, None], True) + self._testArrayCommas('a=[,1]', [None, '1'], False) + self._testArrayCommas('a=[,,1]', [None, None, '1'], False) + self._testArrayCommas('a=[1,,1]', ['1', None, '1'], False) + self._testArrayCommas('a=[,1,]', [None, '1'], True) + def testParseArray(self): + try: + parsestring('a=[1 1]') + except JSSyntaxError as error: + pass + else: + self.assert_(False) Added: trunk/jsengine/parser/_constants_kind.py =================================================================== --- trunk/jsengine/parser/_constants_kind.py (rev 0) +++ trunk/jsengine/parser/_constants_kind.py 2013-09-28 03:06:19 UTC (rev 304) @@ -0,0 +1,79 @@ +# vim: sw=4 ts=4 et + +_KINDS = [ + 'AND', + 'BITAND', + 'BITOR', + 'BITXOR', + 'CATCH', + 'COMMENT', + 'DELETE', + 'DIVOP', + 'DOT', + 'EQ', + 'FINALLY', + 'FUNCTION', + 'HOOK', + 'IF', + 'IN', + 'INC', + 'INSTANCEOF', + 'LB', + 'LC', + 'LEXICALSCOPE', + 'LP', + 'MINUS', + 'NAME', + 'NEW', + 'OBJECT', + 'OR', + 'PLUS', + 'PRIMARY', + 'RB', + 'RC', + 'RELOP', + 'RESERVED', + 'RP', + 'SEMI', + 'SHOP', + 'STAR', + 'TRY', + 'UNARYOP', + 'VAR', + 'ASSIGN', + 'CASE', + 'COLON', + 'DEFAULT', + 'EQOP', + 'OBJECT', + 'RELOP', + 'SWITCH', + 'WITH', + 'WHILE', + 'DO', + 'FOR', + 'COMMA', + 'DEC', + 'BREAK', + 'CONTINUE', + 'THROW', + 'RETURN', + 'UNARYOP', + 'LP', + 'NUMBER', + 'RB', + 'STRING', + 'YIELD', # TODO +] +class _Kind(str): + def __repr__(self): + return 'kind.%s' % self + +class _Kinds: + def __init__(self): + for kind in _KINDS: + setattr(self, kind, _Kind(kind)) + def contains(self, item): + return isinstance(item, _Kind) and \ + getattr(self, item) is item +kind = _Kinds() Added: trunk/jsengine/parser/_constants_op.py =================================================================== --- trunk/jsengine/parser/_constants_op.py (rev 0) +++ trunk/jsengine/parser/_constants_op.py 2013-09-28 03:06:19 UTC (rev 304) @@ -0,0 +1,85 @@ +# vim: sw=4 ts=4 et + +_OPS = [ + 'ADD', + 'AND', + 'ANONFUNOBJ', + 'ARGNAME', + 'BITAND', + 'BITNOT', + 'BITOR', + 'BITXOR', + 'CALL', + 'C_COMMENT', + 'CLOSURE', + 'CPP_COMMENT', + 'DECNAME', + 'DEFVAR', + 'DIV', + 'EQOP', + 'FALSE', + 'FORIN', + 'GETELEM', + 'GETPROP', + 'GT', + 'GE', + 'HOOK', + 'HTMLCOMMENT', + 'IN', + 'INCNAME', + 'INSTANCEOF', + 'LEAVEBLOCK', + 'LSH', + 'LT', + 'LE', + 'MOD', + 'MUL', + 'NAME', + 'NAMEDEC', + 'NAMEINC', + 'NAMEDFUNOBJ', + 'NEG', + 'NE', + 'NEW', + 'NEW_EQ', + 'NEW_NE', + 'NOT', + 'NULL', + 'NUMBER', + 'OR', + 'POS', + 'PROPINC', + 'REGEXP', + 'RSH', + 'SETCALL', + 'SETELEM', + 'SETNAME', + 'SETPROP', + 'STRING', + 'SUB', + 'THIS', + 'TRUE', + 'THROW', + 'TYPEOF', + 'URSH', + 'VOID', + 'EQ', + 'NAME', + 'REGEXP', + 'SETNAME', + 'VOID', + 'CALL', +] +class _Op(str): + def __repr__(self): + return 'op.%s' % self + +class _Ops: + NOP = None # TODO! + def __init__(self): + for op in _OPS: + setattr(self, op, _Op(op)) + def contains(self, item): + return isinstance(item, _Op) and \ + getattr(self, item) is item +op = _Ops() Added: trunk/jsengine/structs.py =================================================================== --- trunk/jsengine/structs.py (rev 0) +++ trunk/jsengine/structs.py 2013-09-28 03:06:19 UTC (rev 304) @@ -0,0 +1,196 @@ +# vim: ts=4 sw=4 expandtab +import bisect +import functools + +from parser._constants_kind import kind +from parser._constants_op import op + +class NodePositions: + " Given a string, allows [x] lookups for NodePos line and col... [truncated message content] |