From: <fwi...@us...> - 2008-09-01 04:03:33
|
Revision: 5277 http://jython.svn.sourceforge.net/jython/?rev=5277&view=rev Author: fwierzbicki Date: 2008-09-01 04:03:21 +0000 (Mon, 01 Sep 2008) Log Message: ----------- update "bare" grammar. Modified Paths: -------------- trunk/sandbox/wierzbicki/antlr/grammar/Python.g trunk/sandbox/wierzbicki/antlr/regr trunk/sandbox/wierzbicki/antlr/run trunk/sandbox/wierzbicki/antlr/src/Main.java trunk/sandbox/wierzbicki/antlr/src/PythonTokenSource.java Added Paths: ----------- trunk/sandbox/wierzbicki/antlr/lib/antlr-3.1.jar trunk/sandbox/wierzbicki/antlr/lib/antlr-runtime-3.1.jar trunk/sandbox/wierzbicki/antlr/lib/stringtemplate-3.2.jar Removed Paths: ------------- trunk/sandbox/wierzbicki/antlr/lib/antlr-3.0.1.jar trunk/sandbox/wierzbicki/antlr/lib/stringtemplate-3.1b1.jar Modified: trunk/sandbox/wierzbicki/antlr/grammar/Python.g =================================================================== --- trunk/sandbox/wierzbicki/antlr/grammar/Python.g 2008-08-31 01:00:57 UTC (rev 5276) +++ trunk/sandbox/wierzbicki/antlr/grammar/Python.g 2008-09-01 04:03:21 UTC (rev 5277) @@ -52,7 +52,16 @@ * * I (Terence) tested this by running it on the jython-2.1/Lib * directory of 40k lines of Python. - * + * + * REQUIRES ANTLR v3 + * + * + * Updated the original parser for Python 2.5 features. The parser has been + * altered to produce an AST - the AST work started from tne newcompiler + * grammar from Jim Baker. The current parsing and compiling strategy looks + * like this: + * + * Python source->Python.g->AST (org/python/parser/ast/*)->CodeCompiler(ASM)->.class */ grammar Python; @@ -72,60 +81,127 @@ int startPos=-1; } -single_input : NEWLINE - | simple_stmt - | compound_stmt NEWLINE - ; +single_input + : NEWLINE* EOF + | simple_stmt NEWLINE* EOF + | compound_stmt NEWLINE+ EOF + ; -file_input : (NEWLINE | stmt)* - ; +file_input + : (NEWLINE + | stmt + )* + ; -eval_input : (NEWLINE)* testlist (NEWLINE)* - ; +eval_input + : LEADING_WS? (NEWLINE)* testlist (NEWLINE)* EOF + ; -decorators: decorator+ - ; +dotted_attr + : NAME + ( (DOT NAME)+ + | + ) + ; -decorator: AT dotted_attr (LPAREN arglist? RPAREN)? NEWLINE - ; +//attr is here for Java compatibility. A Java foo.getIf() can be called from Jython as foo.if +// so we need to support any keyword as an attribute. +attr + : NAME + | AND + | AS + | ASSERT + | BREAK + | CLASS + | CONTINUE + | DEF + | DELETE + | ELIF + | EXCEPT + | EXEC + | FINALLY + | FROM + | FOR + | GLOBAL + | IF + | IMPORT + | IN + | IS + | LAMBDA + | NOT + | OR + | ORELSE + | PASS + | PRINT + | RAISE + | RETURN + | TRY + | WHILE + | WITH + | YIELD + ; -dotted_attr - : NAME (DOT NAME)* +decorator + : AT dotted_attr + ( LPAREN + ( arglist + | + ) + RPAREN + | + ) NEWLINE ; -funcdef : decorators? 'def' NAME parameters COLON suite - ; +decorators + : decorator+ + ; -parameters : LPAREN (varargslist)? RPAREN - ; +funcdef + : decorators? DEF NAME parameters COLON suite + ; -varargslist : defparameter (options {greedy=true;}:COMMA defparameter)* - (COMMA - ( STAR NAME (COMMA DOUBLESTAR NAME)? - | DOUBLESTAR NAME - )? - )? - | STAR NAME (COMMA DOUBLESTAR NAME)? - | DOUBLESTAR NAME - ; +parameters + : LPAREN + (varargslist + | + ) + RPAREN + ; -defparameter : fpdef (ASSIGN test)? - ; +defparameter + : fpdef (ASSIGN test)? + ; -fpdef : NAME - | LPAREN fplist RPAREN - ; +varargslist + : defparameter (options {greedy=true;}:COMMA defparameter)* + (COMMA + (STAR NAME (COMMA DOUBLESTAR NAME)? + | DOUBLESTAR NAME + )? + )? + | STAR NAME (COMMA DOUBLESTAR NAME)? + | DOUBLESTAR NAME + ; -fplist : fpdef (options {greedy=true;}:COMMA fpdef)* (COMMA)? - ; +fpdef + : NAME + | LPAREN fplist RPAREN + ; -stmt : simple_stmt - | compound_stmt - ; +fplist + : fpdef + (options {greedy=true;}:COMMA fpdef)* (COMMA)? + ; -simple_stmt : small_stmt (options {greedy=true;}:SEMI small_stmt)* (SEMI)? NEWLINE - ; +stmt + : simple_stmt + | compound_stmt + ; +simple_stmt + : small_stmt (options {greedy=true;}:SEMI small_stmt)* (SEMI)? NEWLINE + ; + small_stmt : expr_stmt | print_stmt | del_stmt @@ -137,308 +213,515 @@ | assert_stmt ; -expr_stmt : testlist - ( augassign yield_expr - | augassign testlist - | assigns - )? - ; +expr_stmt + : ((testlist augassign) => lhs=testlist + ( (augassign yield_expr + ) + | (augassign testlist + ) + ) + | (testlist ASSIGN) => lhs=testlist + ( + | ((ASSIGN testlist)+ + ) + | ((ASSIGN yield_expr)+ + ) + ) + | lhs=testlist + ) + ; -assigns - : assign_testlist+ - | assign_yield+ +augassign + : PLUSEQUAL + | MINUSEQUAL + | STAREQUAL + | SLASHEQUAL + | PERCENTEQUAL + | AMPEREQUAL + | VBAREQUAL + | CIRCUMFLEXEQUAL + | LEFTSHIFTEQUAL + | RIGHTSHIFTEQUAL + | DOUBLESTAREQUAL + | DOUBLESLASHEQUAL ; -assign_testlist - : ASSIGN testlist - ; +print_stmt + : PRINT + (printlist + | RIGHTSHIFT printlist2 + | + ) + ; -assign_yield - : ASSIGN yield_expr +//not in CPython's Grammar file +printlist + : (test COMMA) => + test (options {k=2;}: COMMA test)* + (trailcomma=COMMA)? + | test ; -augassign : PLUSEQUAL - | MINUSEQUAL - | STAREQUAL - | SLASHEQUAL - | PERCENTEQUAL - | AMPEREQUAL - | VBAREQUAL - | CIRCUMFLEXEQUAL - | LEFTSHIFTEQUAL - | RIGHTSHIFTEQUAL - | DOUBLESTAREQUAL - | DOUBLESLASHEQUAL - ; +//XXX: would be nice if printlist and printlist2 could be merged. +//not in CPython's Grammar file +printlist2 + : (test COMMA test) => + test (options {k=2;}: COMMA test)* + (trailcomma=COMMA)? + | test + ; -print_stmt : 'print' (printlist | RIGHTSHIFT printlist)? - ; +del_stmt + : DELETE del_list + ; -printlist returns [boolean newline] - : test (options {k=2;}: COMMA test)* (COMMA)? +pass_stmt + : PASS ; +flow_stmt + : break_stmt + | continue_stmt + | return_stmt + | raise_stmt + | yield_stmt + ; -del_stmt : 'del' exprlist - ; +break_stmt + : BREAK + ; -pass_stmt : 'pass' - ; +continue_stmt + : CONTINUE + ; -flow_stmt : break_stmt - | continue_stmt - | return_stmt - | raise_stmt - | yield_stmt - ; +return_stmt + : RETURN + (testlist + | + ) + ; -break_stmt : 'break' - ; +yield_stmt + : yield_expr + ; -continue_stmt : 'continue' - ; +raise_stmt + : RAISE (test (COMMA test + (COMMA test)?)?)? + ; -return_stmt : 'return' (testlist)? - ; +import_stmt + : import_name + | import_from + ; -yield_stmt : yield_expr - ; +import_name + : IMPORT dotted_as_names + ; -raise_stmt: 'raise' (test (COMMA test (COMMA test)?)?)? - ; +import_from + : FROM (DOT* dotted_name | DOT+) IMPORT + (STAR + | import_as_names + | LPAREN import_as_names COMMA? RPAREN + ) + ; -import_stmt : import_name - | import_from - ; +import_as_names + : import_as_name (COMMA import_as_name)* + ; -import_name : 'import' dotted_as_names - ; +import_as_name + : name=NAME (AS asname=NAME)? + ; -import_from: 'from' (DOT* dotted_name | DOT+) 'import' - (STAR - | import_as_names - | LPAREN import_as_names RPAREN - ) - ; +dotted_as_name + : dotted_name (AS NAME)? + ; -import_as_names : import_as_name (COMMA import_as_name)* (COMMA)? - ; +dotted_as_names + : dotted_as_name (COMMA dotted_as_name)* + ; -import_as_name : NAME ('as' NAME)? - ; +dotted_name + : NAME (DOT attr)* + ; -dotted_as_name : dotted_name ('as' NAME)? - ; +global_stmt + : GLOBAL NAME (COMMA NAME)* + ; -dotted_as_names : dotted_as_name (COMMA dotted_as_name)* - ; -dotted_name : NAME (DOT NAME)* - ; +exec_stmt + : EXEC expr (IN test + (COMMA test)?)? + ; -global_stmt : 'global' NAME (COMMA NAME)* - ; +assert_stmt + : ASSERT test (COMMA test)? + ; -exec_stmt : 'exec' expr ('in' test (COMMA test)?)? - ; +compound_stmt + : if_stmt + | while_stmt + | for_stmt + | try_stmt + | with_stmt + | funcdef + | classdef + ; -assert_stmt : 'assert' test (COMMA test)? - ; +if_stmt + : IF test COLON suite elif_clause* + (ORELSE COLON suite)? + ; -compound_stmt : if_stmt - | while_stmt - | for_stmt - | try_stmt - | with_stmt - | funcdef - | classdef - ; +//not in CPython's Grammar file +elif_clause + : ELIF test COLON suite + ; -if_stmt: 'if' test COLON suite elif_clause* ('else' COLON suite)? - ; +while_stmt + : WHILE test COLON suite (ORELSE COLON suite)? + ; -elif_clause : 'elif' test COLON suite - ; +for_stmt + : FOR exprlist IN testlist COLON suite + (ORELSE COLON suite)? + ; -while_stmt : 'while' test COLON suite ('else' COLON suite)? - ; +try_stmt + : TRY COLON suite + ( except_clause+ (ORELSE COLON suite)? (FINALLY COLON suite)? + | FINALLY COLON suite + ) + ; -for_stmt : 'for' exprlist 'in' testlist COLON suite ('else' COLON suite)? - ; +with_stmt + : WITH test (with_var)? COLON suite + ; -try_stmt : 'try' COLON suite - ( except_clause+ ('else' COLON suite)? ('finally' COLON suite)? - | 'finally' COLON suite - ) - ; +with_var + : (AS | NAME) expr + ; -with_stmt: 'with' test (with_var)? COLON suite - ; +except_clause + : EXCEPT (test (COMMA test)?)? COLON suite + ; -with_var: ('as' | NAME) expr - ; +suite + : simple_stmt + | NEWLINE INDENT + (stmt + )+ DEDENT + ; -except_clause : 'except' (test (COMMA test)?)? COLON suite - ; +test + :or_test + ( (IF or_test ORELSE) => IF o2=or_test ORELSE e=test + | + ) + | lambdef + ; -suite : simple_stmt - | NEWLINE INDENT (stmt)+ DEDENT - ; +or_test + : left=and_test + ( (OR and_test + )+ + | + ) + ; -test: or_test - ( ('if' or_test 'else') => 'if' or_test 'else' test)? - | lambdef +and_test + : not_test + ( (AND not_test + )+ + | + ) ; -or_test : and_test (OR and_test)* - ; +not_test + : NOT nt=not_test + | comparison + ; -and_test : not_test (AND not_test)* - ; +comparison + : left=expr + ( ( comp_op expr + )+ + | + ) + ; -not_test : NOT not_test - | comparison - ; +comp_op + : LESS + | GREATER + | EQUAL + | GREATEREQUAL + | LESSEQUAL + | ALT_NOTEQUAL + | NOTEQUAL + | IN + | NOT IN + | IS + | IS NOT + ; -comparison: expr (comp_op expr)* - ; +expr + : left=xor_expr + ( (VBAR xor_expr + )+ + | + ) + ; -comp_op : LESS - | GREATER - | EQUAL - | GREATEREQUAL - | LESSEQUAL - | ALT_NOTEQUAL - | NOTEQUAL - | 'in' - | NOT 'in' - | 'is' - | 'is' NOT - ; +xor_expr + : left=and_expr + ( (CIRCUMFLEX and_expr + )+ + | + ) + ; -expr : xor_expr (VBAR xor_expr)* - ; +and_expr + : shift_expr + ( (AMPER shift_expr + )+ + | + ) + ; -xor_expr : and_expr (CIRCUMFLEX and_expr)* - ; +shift_expr + : left=arith_expr + ( ( shift_op arith_expr + )+ + | + ) + ; -and_expr : shift_expr (AMPER shift_expr)* - ; +shift_op + : LEFTSHIFT + | RIGHTSHIFT + ; -shift_expr : arith_expr ((LEFTSHIFT|RIGHTSHIFT) arith_expr)* - ; +arith_expr + : left=term + ( (arith_op term + )+ + | + ) + ; -arith_expr: term ((PLUS|MINUS) term)* - ; +arith_op + : PLUS + | MINUS + ; -term : factor ((STAR | SLASH | PERCENT | DOUBLESLASH ) factor)* - ; +term + : factor + ( (term_op factor + )+ + | + ) + ; -factor : PLUS factor - | MINUS factor - | TILDE factor - | power - ; +term_op + :STAR + |SLASH + |PERCENT + |DOUBLESLASH + ; -power : atom (trailer)* (options {greedy=true;}:DOUBLESTAR factor)? - ; +factor + : PLUS factor + | MINUS factor + | TILDE factor + | power + ; -atom : LPAREN - ( yield_expr - | testlist_gexp - )? - RPAREN - | LBRACK (listmaker)? RBRACK - | LCURLY (dictmaker)? RCURLY +power + : atom (trailer)* (options {greedy=true;}:DOUBLESTAR factor)? + ; + +atom + : LPAREN + ( yield_expr + | testlist_gexp + | + ) + RPAREN + | LBRACK + (listmaker + | + ) + RBRACK + | LCURLY + (dictmaker + | + ) + RCURLY | BACKQUOTE testlist BACKQUOTE | NAME | INT | LONGINT | FLOAT | COMPLEX - | (STRING)+ + | (STRING)+ ; -listmaker : test - ( list_for - | (options {greedy=true;}:COMMA test)* - ) (COMMA)? +listmaker + : test + (list_for + | (options {greedy=true;}:COMMA test)* + ) (COMMA)? ; testlist_gexp - : test ( (options {k=2;}: COMMA test)* (COMMA)? - | gen_for - ) - + : test + ( ((options {k=2;}: COMMA test)* (COMMA)? + ) + | (gen_for + ) + ) ; -lambdef: 'lambda' (varargslist)? COLON test - ; +lambdef + : LAMBDA (varargslist)? COLON test + ; -trailer : LPAREN (arglist)? RPAREN - | LBRACK subscriptlist RBRACK - | DOT NAME - ; +trailer + : LPAREN + (arglist + | + ) + RPAREN + | LBRACK subscriptlist RBRACK + | DOT attr + ; -subscriptlist : subscript (options {greedy=true;}:COMMA subscript)* (COMMA)? - ; +subscriptlist + : subscript (options {greedy=true;}:COMMA subscript)* (COMMA)? + ; -subscript : DOT DOT DOT - | test (COLON (test)? (sliceop)?)? - | COLON (test)? (sliceop)? - ; +subscript + : DOT DOT DOT + | (test COLON) + => test (COLON (test)? (sliceop)?)? + | (COLON) + => COLON (test)? (sliceop)? + | test + ; -sliceop : COLON (test)? - ; +sliceop + : COLON + (test + )? + ; -exprlist : expr (options {k=2;}: COMMA expr)* (COMMA)? - ; +exprlist + : (expr COMMA) => expr (options {k=2;}: COMMA expr)* (COMMA)? + | expr + ; +//not in CPython's Grammar file +del_list + : expr (options {k=2;}: COMMA expr)* (COMMA)? + ; + testlist - : test (options {k=2;}: COMMA test)* (COMMA)? + : (test COMMA) + => test (options {k=2;}: COMMA test)* (COMMA)? + | test ; -dictmaker : test COLON test (options {k=2;}:COMMA test COLON test)* (COMMA)? - ; +dictmaker + : test COLON test + (options {k=2;}:COMMA test COLON test)* + (COMMA)? + ; -classdef: 'class' NAME (LPAREN testlist? RPAREN)? COLON suite - ; +classdef + : CLASS NAME (LPAREN testlist? RPAREN)? COLON suite + ; -arglist : argument (COMMA argument)* - ( COMMA - ( STAR test (COMMA DOUBLESTAR test)? - | DOUBLESTAR test - )? +arglist + : argument (COMMA argument)* + (COMMA + ( STAR test (COMMA DOUBLESTAR test)? + | DOUBLESTAR test + )? )? - | STAR test (COMMA DOUBLESTAR test)? - | DOUBLESTAR test - ; + | STAR test (COMMA DOUBLESTAR test)? + | DOUBLESTAR test + ; -argument : test ( (ASSIGN test) | gen_for)? - ; +argument + : t1=test + ((ASSIGN t2=test) + | gen_for + | + ) + ; -list_iter : list_for - | list_if - ; +list_iter + : list_for + | list_if + ; -list_for : 'for' exprlist 'in' testlist (list_iter)? - ; +list_for + : FOR exprlist IN testlist (list_iter)? + ; -list_if : 'if' test (list_iter)? - ; +list_if + : IF test (list_iter)? + ; -gen_iter: gen_for - | gen_if - ; +gen_iter + : gen_for + | gen_if + ; -gen_for: 'for' exprlist 'in' or_test gen_iter? - ; +gen_for + : FOR exprlist IN or_test gen_iter? + ; -gen_if: 'if' test gen_iter? - ; +gen_if + : IF test gen_iter? + ; -yield_expr : 'yield' testlist? - ; +yield_expr + : YIELD testlist? + ; +AS : 'as' ; +ASSERT : 'assert' ; +BREAK : 'break' ; +CLASS : 'class' ; +CONTINUE : 'continue' ; +DEF : 'def' ; +DELETE : 'del' ; +ELIF : 'elif' ; +EXCEPT : 'except' ; +EXEC : 'exec' ; +FINALLY : 'finally' ; +FROM : 'from' ; +FOR : 'for' ; +GLOBAL : 'global' ; +IF : 'if' ; +IMPORT : 'import' ; +IN : 'in' ; +IS : 'is' ; +LAMBDA : 'lambda' ; +ORELSE : 'else' ; +PASS : 'pass' ; +PRINT : 'print' ; +RAISE : 'raise' ; +RETURN : 'return' ; +TRY : 'try' ; +WHILE : 'while' ; +WITH : 'with' ; +YIELD : 'yield' ; + LPAREN : '(' {implicitLineJoiningLevel++;} ; RPAREN : ')' {implicitLineJoiningLevel--;} ; @@ -553,12 +836,12 @@ INT : // Hex '0' ('x' | 'X') ( '0' .. '9' | 'a' .. 'f' | 'A' .. 'F' )+ | // Octal - '0' DIGITS* + '0' ( '0' .. '7' )* | '1'..'9' DIGITS* ; COMPLEX - : INT ('j'|'J') + : DIGITS+ ('j'|'J') | FLOAT ('j'|'J') ; @@ -573,12 +856,17 @@ * should make us exit loop not continue. */ STRING - : ('r'|'u'|'ur')? + : ('r'|'u'|'ur'|'R'|'U'|'UR'|'uR'|'Ur')? ( '\'\'\'' (options {greedy=false;}:TRIAPOS)* '\'\'\'' | '"""' (options {greedy=false;}:TRIQUOTE)* '"""' | '"' (ESC|~('\\'|'\n'|'"'))* '"' | '\'' (ESC|~('\\'|'\n'|'\''))* '\'' - ) + ) { + if (state.tokenStartLine != input.getLine()) { + state.tokenStartLine = input.getLine(); + state.tokenStartCharPositionInLine = -2; + } + } ; /** the two '"'? cause a warning -- is there a way to avoid that? */ @@ -604,7 +892,7 @@ */ CONTINUED_LINE : '\\' ('\r')? '\n' (' '|'\t')* { $channel=HIDDEN; } - ( nl=NEWLINE {emit(new ClassicToken(NEWLINE,nl.getText()));} + ( nl=NEWLINE {emit(new CommonToken(NEWLINE,nl.getText()));} | ) ; @@ -616,8 +904,11 @@ * Frank Wierzbicki added: Also ignore FORMFEEDS (\u000C). */ NEWLINE - : (('\u000C')?('\r')? '\n' )+ - {if ( startPos==0 || implicitLineJoiningLevel>0 ) +@init { + int newlines = 0; +} + : (('\u000C')?('\r')? '\n' {newlines++; } )+ { + if ( startPos==0 || implicitLineJoiningLevel>0 ) $channel=HIDDEN; } ; @@ -634,24 +925,42 @@ LEADING_WS @init { int spaces = 0; + int newlines = 0; } : {startPos==0}?=> ( {implicitLineJoiningLevel>0}? ( ' ' | '\t' )+ {$channel=HIDDEN;} - | ( ' ' { spaces++; } - | '\t' { spaces += 8; spaces -= (spaces \% 8); } - )+ - { - // make a string of n spaces where n is column number - 1 - char[] indentation = new char[spaces]; - for (int i=0; i<spaces; i++) { - indentation[i] = ' '; - } - String s = new String(indentation); - emit(new ClassicToken(LEADING_WS,new String(indentation))); - } - // kill trailing newline if present and then ignore - ( ('\r')? '\n' {if (token!=null) token.setChannel(HIDDEN); else $channel=HIDDEN;})* - // {token.setChannel(99); } + | ( ' ' { spaces++; } + | '\t' { spaces += 8; spaces -= (spaces \% 8); } + )+ + ( ('\r')? '\n' {newlines++; } + )* { + if (input.LA(1) != -1) { + // make a string of n spaces where n is column number - 1 + char[] indentation = new char[spaces]; + for (int i=0; i<spaces; i++) { + indentation[i] = ' '; + } + CommonToken c = new CommonToken(LEADING_WS,new String(indentation)); + c.setLine(input.getLine()); + c.setCharPositionInLine(input.getCharPositionInLine()); + emit(c); + // kill trailing newline if present and then ignore + if (newlines != 0) { + if (state.token!=null) { + state.token.setChannel(HIDDEN); + } else { + $channel=HIDDEN; + } + } + } else { + // make a string of n newlines + char[] nls = new char[newlines]; + for (int i=0; i<newlines; i++) { + nls[i] = '\n'; + } + emit(new CommonToken(NEWLINE,new String(nls))); + } + } ) ; @@ -677,6 +986,6 @@ $channel=HIDDEN; } : {startPos==0}?=> (' '|'\t')* '#' (~'\n')* '\n'+ - | {startPos>0}?=> '#' (~'\n')* // let NEWLINE handle \n unless char pos==0 for '#' + | '#' (~'\n')* // let NEWLINE handle \n unless char pos==0 for '#' ; Property changes on: trunk/sandbox/wierzbicki/antlr/lib/antlr-3.1.jar ___________________________________________________________________ Added: svn:mime-type + application/octet-stream Property changes on: trunk/sandbox/wierzbicki/antlr/lib/antlr-runtime-3.1.jar ___________________________________________________________________ Added: svn:mime-type + application/octet-stream Property changes on: trunk/sandbox/wierzbicki/antlr/lib/stringtemplate-3.2.jar ___________________________________________________________________ Added: svn:mime-type + application/octet-stream Modified: trunk/sandbox/wierzbicki/antlr/regr =================================================================== --- trunk/sandbox/wierzbicki/antlr/regr 2008-08-31 01:00:57 UTC (rev 5276) +++ trunk/sandbox/wierzbicki/antlr/regr 2008-09-01 04:03:21 UTC (rev 5277) @@ -7,18 +7,21 @@ ./run $RELEASE25/Lib/_MozillaCookieJar.py ./run $RELEASE25/Lib/_strptime.py ./run $RELEASE25/Lib/_threading_local.py +echo Lib/a ./run $RELEASE25/Lib/aifc.py ./run $RELEASE25/Lib/anydbm.py ./run $RELEASE25/Lib/asynchat.py ./run $RELEASE25/Lib/asyncore.py ./run $RELEASE25/Lib/atexit.py ./run $RELEASE25/Lib/audiodev.py +echo Lib/b ./run $RELEASE25/Lib/base64.py ./run $RELEASE25/Lib/BaseHTTPServer.py ./run $RELEASE25/Lib/Bastion.py ./run $RELEASE25/Lib/bdb.py ./run $RELEASE25/Lib/binhex.py ./run $RELEASE25/Lib/bisect.py +echo Lib/bsddb ./run $RELEASE25/Lib/bsddb/__init__.py ./run $RELEASE25/Lib/bsddb/db.py ./run $RELEASE25/Lib/bsddb/dbobj.py @@ -26,6 +29,7 @@ ./run $RELEASE25/Lib/bsddb/dbshelve.py ./run $RELEASE25/Lib/bsddb/dbtables.py ./run $RELEASE25/Lib/bsddb/dbutils.py +echo Lib/bsddb/test ./run $RELEASE25/Lib/bsddb/test/__init__.py ./run $RELEASE25/Lib/bsddb/test/test_1413192.py ./run $RELEASE25/Lib/bsddb/test/test_all.py @@ -562,6 +566,7 @@ ./run $RELEASE25/Lib/plat-linux2/TYPES.py echo Lib/plat-mac ./run $RELEASE25/Lib/plat-mac/aepack.py +#aetools uses 'as' not as a keyword #./run $RELEASE25/Lib/plat-mac/aetools.py ./run $RELEASE25/Lib/plat-mac/aetypes.py ./run $RELEASE25/Lib/plat-mac/applesingle.py @@ -677,6 +682,7 @@ ./run $RELEASE25/Lib/plat-mac/lib-scriptpackages/Netscape/Text.py ./run $RELEASE25/Lib/plat-mac/lib-scriptpackages/Netscape/WorldWideWeb_suite.py ./run $RELEASE25/Lib/plat-mac/lib-scriptpackages/StdSuites/__init__.py +#AppleScript_Suite.py uses 'as' not as a keyword #./run $RELEASE25/Lib/plat-mac/lib-scriptpackages/StdSuites/AppleScript_Suite.py ./run $RELEASE25/Lib/plat-mac/lib-scriptpackages/StdSuites/Macintosh_Connectivity_Clas.py ./run $RELEASE25/Lib/plat-mac/lib-scriptpackages/StdSuites/QuickDraw_Graphics_Suite.py @@ -722,6 +728,7 @@ ./run $RELEASE25/Lib/plat-sunos5/CDIO.py ./run $RELEASE25/Lib/plat-sunos5/DLFCN.py ./run $RELEASE25/Lib/plat-sunos5/IN.py +#STROPTS.py uses 'as' not as a keyword #./run $RELEASE25/Lib/plat-sunos5/STROPTS.py ./run $RELEASE25/Lib/plat-sunos5/SUNAUDIODEV.py ./run $RELEASE25/Lib/plat-sunos5/TYPES.py @@ -739,6 +746,7 @@ ./run $RELEASE25/Lib/py_compile.py ./run $RELEASE25/Lib/pyclbr.py ./run $RELEASE25/Lib/pydoc.py +echo Lib/q ./run $RELEASE25/Lib/Queue.py ./run $RELEASE25/Lib/quopri.py ./run $RELEASE25/Lib/random.py @@ -811,18 +819,15 @@ ./run $RELEASE25/Lib/test/crashers/bogus_code_obj.py ./run $RELEASE25/Lib/test/crashers/borrowed_ref_1.py ./run $RELEASE25/Lib/test/crashers/borrowed_ref_2.py -./run $RELEASE25/Lib/test/crashers/dangerous_subclassing.py ./run $RELEASE25/Lib/test/crashers/gc_inspection.py ./run $RELEASE25/Lib/test/crashers/infinite_rec_1.py ./run $RELEASE25/Lib/test/crashers/infinite_rec_2.py ./run $RELEASE25/Lib/test/crashers/infinite_rec_4.py ./run $RELEASE25/Lib/test/crashers/infinite_rec_5.py ./run $RELEASE25/Lib/test/crashers/loosing_dict_ref.py -./run $RELEASE25/Lib/test/crashers/modify_dict_attr.py ./run $RELEASE25/Lib/test/crashers/nasty_eq_vs_dict.py ./run $RELEASE25/Lib/test/crashers/recursion_limit_too_high.py ./run $RELEASE25/Lib/test/crashers/recursive_call.py -./run $RELEASE25/Lib/test/crashers/weakref_in_del.py ./run $RELEASE25/Lib/test/doctest_aliases.py ./run $RELEASE25/Lib/test/double_const.py echo Lib/test/f @@ -979,13 +984,13 @@ ./run $RELEASE25/Lib/test/test_gl.py ./run $RELEASE25/Lib/test/test_glob.py ./run $RELEASE25/Lib/test/test_global.py -#./run $RELEASE25/Lib/test/test_grammar.py +./run $RELEASE25/Lib/test/test_grammar.py ./run $RELEASE25/Lib/test/test_grp.py ./run $RELEASE25/Lib/test/test_gzip.py ./run $RELEASE25/Lib/test/test_hash.py ./run $RELEASE25/Lib/test/test_hashlib.py ./run $RELEASE25/Lib/test/test_heapq.py -#./run $RELEASE25/Lib/test/test_hexoct.py +./run $RELEASE25/Lib/test/test_hexoct.py ./run $RELEASE25/Lib/test/test_hmac.py ./run $RELEASE25/Lib/test/test_hotshot.py ./run $RELEASE25/Lib/test/test_htmllib.py @@ -1027,7 +1032,7 @@ ./run $RELEASE25/Lib/test/test_minidom.py ./run $RELEASE25/Lib/test/test_mmap.py ./run $RELEASE25/Lib/test/test_module.py -#./run $RELEASE25/Lib/test/test_multibytecodec.py +./run $RELEASE25/Lib/test/test_multibytecodec.py ./run $RELEASE25/Lib/test/test_multibytecodec_support.py ./run $RELEASE25/Lib/test/test_multifile.py ./run $RELEASE25/Lib/test/test_mutants.py @@ -1109,7 +1114,7 @@ ./run $RELEASE25/Lib/test/test_stringprep.py ./run $RELEASE25/Lib/test/test_strop.py ./run $RELEASE25/Lib/test/test_strptime.py -#./run $RELEASE25/Lib/test/test_struct.py +./run $RELEASE25/Lib/test/test_struct.py ./run $RELEASE25/Lib/test/test_structmembers.py ./run $RELEASE25/Lib/test/test_structseq.py ./run $RELEASE25/Lib/test/test_subprocess.py @@ -1196,8 +1201,8 @@ ./run $RELEASE25/Lib/types.py echo Lib/u ./run $RELEASE25/Lib/unittest.py -#./run $RELEASE25/Lib/urllib.py -#./run $RELEASE25/Lib/urllib2.py +./run $RELEASE25/Lib/urllib.py +./run $RELEASE25/Lib/urllib2.py ./run $RELEASE25/Lib/urlparse.py ./run $RELEASE25/Lib/user.py ./run $RELEASE25/Lib/UserDict.py Modified: trunk/sandbox/wierzbicki/antlr/run =================================================================== --- trunk/sandbox/wierzbicki/antlr/run 2008-08-31 01:00:57 UTC (rev 5276) +++ trunk/sandbox/wierzbicki/antlr/run 2008-09-01 04:03:21 UTC (rev 5277) @@ -1 +1 @@ -java -classpath lib/antlr-3.0.1.jar:build Main $* +java -classpath lib/antlr-3.1.jar:build Main $* Modified: trunk/sandbox/wierzbicki/antlr/src/Main.java =================================================================== --- trunk/sandbox/wierzbicki/antlr/src/Main.java 2008-08-31 01:00:57 UTC (rev 5276) +++ trunk/sandbox/wierzbicki/antlr/src/Main.java 2008-09-01 04:03:21 UTC (rev 5277) @@ -20,7 +20,7 @@ PythonLexer lexer = new MyLexer(input); CommonTokenStream tokens = new CommonTokenStream(lexer); tokens.discardOffChannelTokens(true); - PythonTokenSource indentedSource = new PythonTokenSource(tokens); + PythonTokenSource indentedSource = new PythonTokenSource(tokens, "<test>"); tokens = new CommonTokenStream(indentedSource); //System.out.println("tokens="+tokens.getTokens()); PythonParser parser = new PythonParser(tokens); Modified: trunk/sandbox/wierzbicki/antlr/src/PythonTokenSource.java =================================================================== --- trunk/sandbox/wierzbicki/antlr/src/PythonTokenSource.java 2008-08-31 01:00:57 UTC (rev 5276) +++ trunk/sandbox/wierzbicki/antlr/src/PythonTokenSource.java 2008-09-01 04:03:21 UTC (rev 5277) @@ -70,184 +70,263 @@ February 2004 */ public class PythonTokenSource implements TokenSource { - public static final int MAX_INDENTS = 100; - public static final int FIRST_CHAR_POSITION = 0; + public static final int MAX_INDENTS = 100; + public static final int FIRST_CHAR_POSITION = 0; - /** The stack of indent levels (column numbers) */ - int[] indentStack = new int[MAX_INDENTS]; - /** stack pointer */ - int sp=-1; // grow upwards + /** The stack of indent levels (column numbers) */ + int[] indentStack = new int[MAX_INDENTS]; + /** stack pointer */ + int sp=-1; // grow upwards - /** The queue of tokens */ - Vector tokens = new Vector(); + /** The queue of tokens */ + Vector tokens = new Vector(); - /** We pull real tokens from this lexer */ - CommonTokenStream stream; + /** We pull real tokens from this lexer */ + CommonTokenStream stream; - int lastTokenAddedIndex = -1; + int lastTokenAddedIndex = -1; - public PythonTokenSource(PythonLexer lexer) { - } + String filename; + boolean inSingle; - public PythonTokenSource(CommonTokenStream stream) { - this.stream = stream; - // "state" of indent level is FIRST_CHAR_POSITION - push(FIRST_CHAR_POSITION); - } + public PythonTokenSource(PythonLexer lexer) { + } - /** From http://www.python.org/doc/2.2.3/ref/indentation.html - "Before the first line of the file is read, a single zero is - pushed on the stack; this will never be popped off again. The - numbers pushed on the stack will always be strictly increasing - from bottom to top. At the beginning of each logical line, the - line's indentation level is compared to the top of the - stack. If it is equal, nothing happens. If it is larger, it is - pushed on the stack, and one INDENT token is generated. If it - is smaller, it must be one of the numbers occurring on the - stack; all numbers on the stack that are larger are popped - off, and for each number popped off a DEDENT token is - generated. At the end of the file, a DEDENT token is generated - for each number remaining on the stack that is larger than - zero." + public PythonTokenSource(CommonTokenStream stream, String filename) { + this(stream, filename, false); + } - I use char position in line 0..n-1 instead. + public PythonTokenSource(CommonTokenStream stream, String filename, boolean single) { + this.stream = stream; + this.filename = filename; + this.inSingle = single; + // "state" of indent level is FIRST_CHAR_POSITION + push(FIRST_CHAR_POSITION); + } - The DEDENTS possibly needed at EOF are gracefully handled by forcing - EOF to have char pos 0 even though with UNIX it's hard to get EOF - at a non left edge. - */ - public Token nextToken() { - // if something in queue, just remove and return it - if ( tokens.size()>0 ) { - Token t = (Token)tokens.firstElement(); - tokens.removeElementAt(0); - // System.out.println(t); - return t; - } + /** From http://www.python.org/doc/2.2.3/ref/indentation.html - insertImaginaryIndentDedentTokens(); + "Before the first line of the file is read, a single zero is + pushed on the stack; this will never be popped off again. The + numbers pushed on the stack will always be strictly increasing + from bottom to top. At the beginning of each logical line, the + line's indentation level is compared to the top of the + stack. If it is equal, nothing happens. If it is larger, it is + pushed on the stack, and one INDENT token is generated. If it + is smaller, it must be one of the numbers occurring on the + stack; all numbers on the stack that are larger are popped + off, and for each number popped off a DEDENT token is + generated. At the end of the file, a DEDENT token is generated + for each number remaining on the stack that is larger than + zero." - return nextToken(); - } + I use char position in line 0..n-1 instead. - protected void insertImaginaryIndentDedentTokens() - { - Token t = stream.LT(1); - stream.consume(); + The DEDENTS possibly needed at EOF are gracefully handled by forcing + EOF to have char pos 0 even though with UNIX it's hard to get EOF + at a non left edge. + */ + public Token nextToken() { + // if something in queue, just remove and return it + if (tokens.size() > 0) { + Token t = (Token)tokens.firstElement(); + tokens.removeElementAt(0); + //System.out.println(filename + t); + return t; + } - // if not a NEWLINE, doesn't signal indent/dedent work; just enqueue - if ( t.getType()!=PythonLexer.NEWLINE ) { - List hiddenTokens = stream.getTokens(lastTokenAddedIndex+1,t.getTokenIndex()-1); - if ( hiddenTokens!=null ) { - tokens.addAll(hiddenTokens); - } - lastTokenAddedIndex = t.getTokenIndex(); - tokens.addElement(t); - return; - } + insertImaginaryIndentDedentTokens(); - // save NEWLINE in the queue - //System.out.println("found newline: "+t+" stack is "+stackString()); - List hiddenTokens = stream.getTokens(lastTokenAddedIndex+1,t.getTokenIndex()-1); - if ( hiddenTokens!=null ) { - tokens.addAll(hiddenTokens); - } - lastTokenAddedIndex = t.getTokenIndex(); - tokens.addElement(t); + return nextToken(); + } - // grab first token of next line - t = stream.LT(1); - stream.consume(); + private void generateNewline(Token t) { + CommonToken newline = new CommonToken(PythonLexer.NEWLINE, "\n"); + newline.setLine(t.getLine()); + newline.setCharPositionInLine(t.getCharPositionInLine()); + tokens.addElement(newline); + } - hiddenTokens = stream.getTokens(lastTokenAddedIndex+1,t.getTokenIndex()-1); - if ( hiddenTokens!=null ) { - tokens.addAll(hiddenTokens); - } - lastTokenAddedIndex = t.getTokenIndex(); + protected void insertImaginaryIndentDedentTokens() { + Token t = stream.LT(1); + stream.consume(); - // compute cpos as the char pos of next non-WS token in line - int cpos = t.getCharPositionInLine(); // column dictates indent/dedent - if ( t.getType()==Token.EOF ) { - cpos = -1; // pretend EOF always happens at left edge - } - else if ( t.getType()==PythonLexer.LEADING_WS ) { - cpos = t.getText().length(); - } + if (t.getType() == Token.EOF) { + if (!inSingle) { + Token prev = stream.LT(-1); + if (prev == null || prev.getType() != PythonLexer.NEWLINE) { + generateNewline(t); + } + } - //System.out.println("next token is: "+t); + handleDedents(-1, (CommonToken)t); + enqueue(t); + } else if (t.getType() == PythonLexer.NEWLINE) { + // save NEWLINE in the queue + //System.out.println("found newline: "+t+" stack is "+stackString()); + enqueueHiddens(t); + tokens.addElement(t); + Token newline = t; - // compare to last indent level - int lastIndent = peek(); - //System.out.println("cpos, lastIndent = "+cpos+", "+lastIndent); - if ( cpos > lastIndent ) { // they indented; track and gen INDENT - push(cpos); - //System.out.println("push("+cpos+"): "+stackString()); - Token indent = new ClassicToken(PythonParser.INDENT,""); - indent.setCharPositionInLine(t.getCharPositionInLine()); - indent.setLine(t.getLine()); - tokens.addElement(indent); - } - else if ( cpos < lastIndent ) { // they dedented - // how far back did we dedent? - int prevIndex = findPreviousIndent(cpos); - //System.out.println("dedented; prevIndex of cpos="+cpos+" is "+prevIndex); - // generate DEDENTs for each indent level we backed up over - for (int d=sp-1; d>=prevIndex; d--) { - Token dedent = new ClassicToken(PythonParser.DEDENT,""); - dedent.setCharPositionInLine(t.getCharPositionInLine()); - dedent.setLine(t.getLine()); - tokens.addElement(dedent); - } - sp = prevIndex; // pop those off indent level - } - if ( t.getType()!=PythonLexer.LEADING_WS ) { // discard WS - tokens.addElement(t); - } - } + // grab first token of next line + t = stream.LT(1); + stream.consume(); - // T O K E N S T A C K M E T H O D S + enqueueHiddens(t); - protected void push(int i) { - if (sp>=MAX_INDENTS) { - throw new IllegalStateException("stack overflow"); - } - sp++; - indentStack[sp] = i; - } + // compute cpos as the char pos of next non-WS token in line + int cpos = t.getCharPositionInLine(); // column dictates indent/dedent + if (t.getType() == Token.EOF) { + cpos = -1; // pretend EOF always happens at left edge + } + else if (t.getType() == PythonLexer.LEADING_WS) { + Token next = stream.LT(1); + if (next != null && next.getType() == Token.EOF) { + stream.consume(); + return; + } else { + cpos = t.getText().length(); + } + } - protected int pop() { - if (sp<0) { - throw new IllegalStateException("stack underflow"); - } - int top = indentStack[sp]; - sp--; - return top; - } + //System.out.println("next token is: "+t); - protected int peek() { - return indentStack[sp]; - } + // compare to last indent level + int lastIndent = peek(); + //System.out.println("cpos, lastIndent = "+cpos+", "+lastIndent); + if (cpos > lastIndent) { // they indented; track and gen INDENT + handleIndents(cpos, (CommonToken)t); + } + else if (cpos < lastIndent) { // they dedented + handleDedents(cpos, (CommonToken)t); + } - /** Return the index on stack of previous indent level == i else -1 */ - protected int findPreviousIndent(int i) { - for (int j=sp-1; j>=0; j--) { - if ( indentStack[j]==i ) { - return j; - } - } - return FIRST_CHAR_POSITION; - } + if (t.getType() == Token.EOF && inSingle) { + String newlines = newline.getText(); + for(int i=1;i<newlines.length();i++) { + generateNewline(newline); + } + } - public String stackString() { - StringBuffer buf = new StringBuffer(); - for (int j=sp; j>=0; j--) { - buf.append(" "); - buf.append(indentStack[j]); - } - return buf.toString(); - } + if (t.getType() != PythonLexer.LEADING_WS) { // discard WS + tokens.addElement(t); + } + } else { + enqueue(t); + } + } + + private void enqueue(Token t) { + enqueueHiddens(t); + tokens.addElement(t); + } + + private void enqueueHiddens(Token t) { + if (inSingle && t.getType() == Token.EOF) { + if (stream.size() > lastTokenAddedIndex + 1) { + Token hidden = stream.get(lastTokenAddedIndex + 1); + if (hidden.getType() == PythonLexer.COMMENT) { + String text = hidden.getText(); + int i = text.indexOf("\n"); + while(i != -1) { + generateNewline(hidden); + i = text.indexOf("\n", i + 1); + } + } + } + } + List hiddenTokens = stream.getTokens(lastTokenAddedIndex + 1,t.getTokenIndex() - 1); + if (hiddenTokens != null) { + tokens.addAll(hiddenTokens); + } + lastTokenAddedIndex = t.getTokenIndex(); + } + + private void handleIndents(int cpos, CommonToken t) { + push(cpos); + //System.out.println("push("+cpos+"): "+stackString()); + Token indent = new CommonToken(PythonParser.INDENT,""); + indent.setCharPositionInLine(t.getCharPositionInLine()); + indent.setLine(t.getLine()); + tokens.addElement(indent); + } + + private void handleDedents(int cpos, CommonToken t) { + // how far back did we dedent? + int prevIndex = findPreviousIndent(cpos, t); + //System.out.println("dedented; prevIndex of cpos="+cpos+" is "+prevIndex); + // generate DEDENTs for each indent level we backed up over + for (int d = sp - 1; d >= prevIndex; d--) { + CommonToken dedent = new CommonToken(PythonParser.DEDENT,""); + dedent.setCharPositionInLine(t.getCharPositionInLine()); + dedent.setLine(t.getLine()); + + //XXX: this will get messed up by comments. + dedent.setStartIndex(t.getStartIndex()); + dedent.setStopIndex(t.getStopIndex()); + + tokens.addElement(dedent); + } + sp = prevIndex; // pop those off indent level + } + + // T O K E N S T A C K M E T H O D S + + protected void push(int i) { + if (sp >= MAX_INDENTS) { + throw new IllegalStateException("stack overflow"); + } + sp++; + indentStack[sp] = i; + } + + protected int pop() { + if (sp<0) { + throw new IllegalStateException("stack underflow"); + } + int top = indentStack[sp]; + sp--; + return top; + } + + protected int peek() { + return indentStack[sp]; + } + + /** Return the index on stack of previous indent level == i else -1 */ + protected int findPreviousIndent(int i, Token t) { + for (int j = sp - 1; j >= 0; j--) { + if (indentStack[j] == i) { + return j; + } + } + //The -2 is for the special case of getCharPositionInLine in multiline str nodes. + if (i == -1 || i == -2) { + return FIRST_CHAR_POSITION; + } + /* ParseException p = new ParseException("unindent does not match any outer indentation level", t.getLine(), t.getCharPositionInLine()); + p.setType(Py.IndentationError); + throw p; + */ + throw new RuntimeException("unindent does not match any outer indentation level"); + } + + public String stackString() { + StringBuffer buf = new StringBuffer(); + for (int j = sp; j >= 0; j--) { + buf.append(" "); + buf.append(indentStack[j]); + } + return buf.toString(); + } + + //FIXME: needed this for the Antlr 3.1b interface change. + public String getSourceName() { + return filename; + } + } /* More example input / output pairs with code simplified to single chars This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |