[Jython-checkins] SF.net SVN: jython:[5277] trunk/sandbox/wierzbicki/antlr

SourceForge Headquarters 1320 Columbia Street Suite 310 San Diego, CA 92101 +1 (858) 422-6466

Revision: 5277
          http://jython.svn.sourceforge.net/jython/?rev=5277&view=rev
Author:   fwierzbicki
Date:     2008-09-01 04:03:21 +0000 (Mon, 01 Sep 2008)

Log Message:
-----------
update "bare" grammar.

Modified Paths:
--------------
    trunk/sandbox/wierzbicki/antlr/grammar/Python.g
    trunk/sandbox/wierzbicki/antlr/regr
    trunk/sandbox/wierzbicki/antlr/run
    trunk/sandbox/wierzbicki/antlr/src/Main.java
    trunk/sandbox/wierzbicki/antlr/src/PythonTokenSource.java

Added Paths:
-----------
    trunk/sandbox/wierzbicki/antlr/lib/antlr-3.1.jar
    trunk/sandbox/wierzbicki/antlr/lib/antlr-runtime-3.1.jar
    trunk/sandbox/wierzbicki/antlr/lib/stringtemplate-3.2.jar

Removed Paths:
-------------
    trunk/sandbox/wierzbicki/antlr/lib/antlr-3.0.1.jar
    trunk/sandbox/wierzbicki/antlr/lib/stringtemplate-3.1b1.jar

Modified: trunk/sandbox/wierzbicki/antlr/grammar/Python.g
===================================================================

--- trunk/sandbox/wierzbicki/antlr/grammar/Python.g	2008-08-31 01:00:57 UTC (rev 5276)
+++ trunk/sandbox/wierzbicki/antlr/grammar/Python.g	2008-09-01 04:03:21 UTC (rev 5277)
@@ -52,7 +52,16 @@
  *
  *  I (Terence) tested this by running it on the jython-2.1/Lib
  *  directory of 40k lines of Python.
- *  
+ *
+ *  REQUIRES ANTLR v3
+ *
+ *
+ *  Updated the original parser for Python 2.5 features. The parser has been
+ *  altered to produce an AST - the AST work started from tne newcompiler
+ *  grammar from Jim Baker.  The current parsing and compiling strategy looks
+ *  like this:
+ *
+ *  Python source->Python.g->AST (org/python/parser/ast/*)->CodeCompiler(ASM)->.class
  */
 
 grammar Python;
@@ -72,60 +81,127 @@
 int startPos=-1;
 }
 
-single_input : NEWLINE
-             | simple_stmt
-             | compound_stmt NEWLINE
-             ;
+single_input
+    : NEWLINE* EOF
+    | simple_stmt NEWLINE* EOF
+    | compound_stmt NEWLINE+ EOF
+    ;
 
-file_input : (NEWLINE | stmt)*
-           ;
+file_input
+    : (NEWLINE
+      | stmt
+      )*
+    ;
 
-eval_input : (NEWLINE)* testlist (NEWLINE)*
-           ;
+eval_input
+    : LEADING_WS? (NEWLINE)* testlist (NEWLINE)* EOF 
+    ;
 
-decorators: decorator+
-          ;
+dotted_attr
+    : NAME
+      ( (DOT NAME)+
+      |
+      )
+    ;
 
-decorator: AT dotted_attr (LPAREN arglist? RPAREN)? NEWLINE
-         ;
+//attr is here for Java  compatibility.  A Java foo.getIf() can be called from Jython as foo.if
+//     so we need to support any keyword as an attribute.
+attr
+    : NAME
+    | AND
+    | AS
+    | ASSERT
+    | BREAK
+    | CLASS
+    | CONTINUE
+    | DEF
+    | DELETE
+    | ELIF
+    | EXCEPT
+    | EXEC
+    | FINALLY
+    | FROM
+    | FOR
+    | GLOBAL
+    | IF
+    | IMPORT
+    | IN
+    | IS
+    | LAMBDA
+    | NOT
+    | OR
+    | ORELSE
+    | PASS
+    | PRINT
+    | RAISE
+    | RETURN
+    | TRY
+    | WHILE
+    | WITH
+    | YIELD
+    ;
 
-dotted_attr
-    : NAME (DOT NAME)*
+decorator
+    : AT dotted_attr 
+    ( LPAREN
+      ( arglist
+      |
+      )
+      RPAREN
+    |
+    ) NEWLINE
     ;
 
-funcdef : decorators? 'def' NAME parameters COLON suite
-        ;
+decorators
+    : decorator+
+    ;
 
-parameters : LPAREN (varargslist)? RPAREN
-           ;
+funcdef
+    : decorators? DEF NAME parameters COLON suite
+    ;
 
-varargslist : defparameter (options {greedy=true;}:COMMA defparameter)*
-              (COMMA
-                  ( STAR NAME (COMMA DOUBLESTAR NAME)?
-                  | DOUBLESTAR NAME
-                  )?
-              )?
-            | STAR NAME (COMMA DOUBLESTAR NAME)?
-            | DOUBLESTAR NAME
-            ;
+parameters
+    : LPAREN 
+      (varargslist
+      |
+      )
+      RPAREN
+    ;
 
-defparameter : fpdef (ASSIGN test)?
-             ;
+defparameter
+    : fpdef (ASSIGN test)?
+    ;
 
-fpdef : NAME
-      | LPAREN fplist RPAREN
-      ;
+varargslist
+    : defparameter (options {greedy=true;}:COMMA defparameter)*
+      (COMMA
+          (STAR NAME (COMMA DOUBLESTAR NAME)?
+          | DOUBLESTAR NAME
+          )?
+      )?
+    | STAR NAME (COMMA DOUBLESTAR NAME)?
+    | DOUBLESTAR NAME
+    ;
 
-fplist : fpdef (options {greedy=true;}:COMMA fpdef)* (COMMA)?
-       ;
+fpdef
+    : NAME 
+    | LPAREN fplist RPAREN
+    ;
 
-stmt : simple_stmt
-     | compound_stmt
-     ;
+fplist
+    : fpdef
+      (options {greedy=true;}:COMMA fpdef)* (COMMA)?
+    ;
 
-simple_stmt : small_stmt (options {greedy=true;}:SEMI small_stmt)* (SEMI)? NEWLINE
-            ;
+stmt
+    : simple_stmt
+    | compound_stmt
+    ;
 
+simple_stmt
+    : small_stmt (options {greedy=true;}:SEMI small_stmt)* (SEMI)? NEWLINE
+    ;
+
 small_stmt : expr_stmt
            | print_stmt
            | del_stmt
@@ -137,308 +213,515 @@
            | assert_stmt
            ;
 
-expr_stmt : testlist
-            ( augassign yield_expr
-            | augassign testlist
-            | assigns
-            )?
-          ;
+expr_stmt 
+    : ((testlist augassign) => lhs=testlist
+        ( (augassign yield_expr 
+          )
+        | (augassign testlist
+          )
+        )
+    | (testlist ASSIGN) => lhs=testlist
+        (
+        | ((ASSIGN testlist)+
+          )
+        | ((ASSIGN yield_expr)+
+          )
+        )
+    | lhs=testlist
+    )
+    ;
 
-assigns
-    : assign_testlist+
-    | assign_yield+
+augassign
+    : PLUSEQUAL
+    | MINUSEQUAL
+    | STAREQUAL
+    | SLASHEQUAL
+    | PERCENTEQUAL
+    | AMPEREQUAL
+    | VBAREQUAL
+    | CIRCUMFLEXEQUAL
+    | LEFTSHIFTEQUAL
+    | RIGHTSHIFTEQUAL
+    | DOUBLESTAREQUAL
+    | DOUBLESLASHEQUAL
     ;
 
-assign_testlist
-       : ASSIGN testlist
-       ;
+print_stmt
+    : PRINT 
+      (printlist
+      | RIGHTSHIFT printlist2
+      |
+      )
+    ;
 
-assign_yield
-    : ASSIGN yield_expr
+//not in CPython's Grammar file
+printlist
+    : (test COMMA) =>
+       test (options {k=2;}: COMMA test)*
+         (trailcomma=COMMA)?
+    | test
     ;
 
-augassign : PLUSEQUAL
-          | MINUSEQUAL
-          | STAREQUAL
-          | SLASHEQUAL
-          | PERCENTEQUAL
-          | AMPEREQUAL
-          | VBAREQUAL
-          | CIRCUMFLEXEQUAL
-          | LEFTSHIFTEQUAL
-          | RIGHTSHIFTEQUAL
-          | DOUBLESTAREQUAL
-          | DOUBLESLASHEQUAL
-          ;
+//XXX: would be nice if printlist and printlist2 could be merged.
+//not in CPython's Grammar file
+printlist2
+    : (test COMMA test) =>
+       test (options {k=2;}: COMMA test)*
+         (trailcomma=COMMA)?
+    | test
+    ;
 
-print_stmt : 'print' (printlist | RIGHTSHIFT printlist)?
-           ;
+del_stmt
+    : DELETE del_list
+    ;
 
-printlist returns [boolean newline]
-    : test (options {k=2;}: COMMA test)* (COMMA)?
+pass_stmt
+    : PASS
     ;
 
+flow_stmt
+    : break_stmt
+    | continue_stmt
+    | return_stmt
+    | raise_stmt
+    | yield_stmt
+    ;
 
-del_stmt : 'del' exprlist
-         ;
+break_stmt
+    : BREAK
+    ;
 
-pass_stmt : 'pass'
-          ;
+continue_stmt
+    : CONTINUE
+    ;
 
-flow_stmt : break_stmt
-          | continue_stmt
-          | return_stmt
-          | raise_stmt
-          | yield_stmt
-          ;
+return_stmt
+    : RETURN 
+      (testlist
+      |
+      )
+      ;
 
-break_stmt : 'break'
-           ;
+yield_stmt
+    : yield_expr
+    ;
 
-continue_stmt : 'continue'
-              ;
+raise_stmt
+    : RAISE (test (COMMA test
+        (COMMA test)?)?)?
+    ;
 
-return_stmt : 'return' (testlist)?
-            ;
+import_stmt
+    : import_name
+    | import_from
+    ;
 
-yield_stmt : yield_expr
-           ;
+import_name
+    : IMPORT dotted_as_names
+    ;
 
-raise_stmt: 'raise' (test (COMMA test (COMMA test)?)?)?
-          ;
+import_from
+    : FROM (DOT* dotted_name | DOT+) IMPORT 
+        (STAR
+        | import_as_names
+        | LPAREN import_as_names COMMA? RPAREN
+        )
+    ;
 
-import_stmt : import_name
-            | import_from
-            ;
+import_as_names
+    : import_as_name (COMMA import_as_name)*
+    ;
 
-import_name : 'import' dotted_as_names
-            ;
+import_as_name
+    : name=NAME (AS asname=NAME)?
+    ;
 
-import_from: 'from' (DOT* dotted_name | DOT+) 'import'
-              (STAR
-              | import_as_names
-              | LPAREN import_as_names RPAREN
-              )
-           ;
+dotted_as_name
+    : dotted_name (AS NAME)?
+    ;
 
-import_as_names : import_as_name (COMMA import_as_name)* (COMMA)?
-                ;
+dotted_as_names
+    : dotted_as_name (COMMA dotted_as_name)*
+    ;
 
-import_as_name : NAME ('as' NAME)?
-               ;
+dotted_name
+    : NAME (DOT attr)*
+    ;
 
-dotted_as_name : dotted_name ('as' NAME)?
-               ;
+global_stmt
+    : GLOBAL NAME (COMMA NAME)*
+    ;
 
-dotted_as_names : dotted_as_name (COMMA dotted_as_name)*
-                ;
-dotted_name : NAME (DOT NAME)*
-            ;
+exec_stmt
+    : EXEC expr (IN test
+        (COMMA test)?)?
+    ;
 
-global_stmt : 'global' NAME (COMMA NAME)*
-            ;
+assert_stmt
+    : ASSERT test (COMMA test)?
+    ;
 
-exec_stmt : 'exec' expr ('in' test (COMMA test)?)?
-          ;
+compound_stmt
+    : if_stmt
+    | while_stmt
+    | for_stmt
+    | try_stmt
+    | with_stmt
+    | funcdef
+    | classdef
+    ;
 
-assert_stmt : 'assert' test (COMMA test)?
-            ;
+if_stmt
+    : IF test COLON suite elif_clause*
+        (ORELSE COLON suite)?
+    ;
 
-compound_stmt : if_stmt
-              | while_stmt
-              | for_stmt
-              | try_stmt
-              | with_stmt
-              | funcdef
-              | classdef
-              ;
+//not in CPython's Grammar file
+elif_clause
+    : ELIF test COLON suite
+    ;
 
-if_stmt: 'if' test COLON suite elif_clause*  ('else' COLON suite)?
-       ;
+while_stmt
+    : WHILE test COLON suite (ORELSE COLON suite)?
+    ;
 
-elif_clause : 'elif' test COLON suite
-            ;
+for_stmt
+    : FOR exprlist IN testlist COLON suite
+        (ORELSE COLON suite)?
+    ;
 
-while_stmt : 'while' test COLON suite ('else' COLON suite)?
-           ;
+try_stmt
+    : TRY COLON suite
+      ( except_clause+ (ORELSE COLON suite)? (FINALLY COLON suite)?
+      | FINALLY COLON suite
+      )
+      ;
 
-for_stmt : 'for' exprlist 'in' testlist COLON suite ('else' COLON suite)?
-         ;
+with_stmt
+    : WITH test (with_var)? COLON suite
+    ;
 
-try_stmt : 'try' COLON suite
-           ( except_clause+ ('else' COLON suite)? ('finally' COLON suite)?
-           | 'finally' COLON suite
-           )
-         ;
+with_var
+    : (AS | NAME) expr
+    ;
 
-with_stmt: 'with' test (with_var)? COLON suite
-         ;
+except_clause
+    : EXCEPT (test (COMMA test)?)? COLON suite
+    ;
 
-with_var: ('as' | NAME) expr
-        ;
+suite
+    : simple_stmt
+    | NEWLINE INDENT
+      (stmt
+      )+ DEDENT
+    ;
 
-except_clause : 'except' (test (COMMA test)?)? COLON suite
-              ;
+test
+    :or_test
+      ( (IF or_test ORELSE) => IF o2=or_test ORELSE e=test
+      |
+      )
+    | lambdef
+    ;
 
-suite : simple_stmt
-      | NEWLINE INDENT (stmt)+ DEDENT
-      ;
+or_test
+    : left=and_test
+        ( (OR and_test
+          )+
+        |
+        )
+    ;
 
-test: or_test
-    ( ('if' or_test 'else') => 'if' or_test 'else' test)?
-    | lambdef
+and_test
+    : not_test
+        ( (AND not_test
+          )+
+        |
+        )
     ;
 
-or_test : and_test (OR and_test)*
-        ;
+not_test
+    : NOT nt=not_test
+    | comparison
+    ;
 
-and_test : not_test (AND not_test)*
-         ;
+comparison
+    : left=expr
+       ( ( comp_op expr
+         )+
+       |
+       )
+    ;
 
-not_test : NOT not_test
-         | comparison
-         ;
+comp_op
+    : LESS
+    | GREATER
+    | EQUAL
+    | GREATEREQUAL
+    | LESSEQUAL
+    | ALT_NOTEQUAL
+    | NOTEQUAL
+    | IN
+    | NOT IN
+    | IS
+    | IS NOT
+    ;
 
-comparison: expr (comp_op expr)*
-          ;
+expr
+    : left=xor_expr
+        ( (VBAR xor_expr
+          )+
+        |
+        )
+    ;
 
-comp_op : LESS
-        | GREATER
-        | EQUAL
-        | GREATEREQUAL
-        | LESSEQUAL
-        | ALT_NOTEQUAL
-        | NOTEQUAL
-        | 'in'
-        | NOT 'in'
-        | 'is'
-        | 'is' NOT
-        ;
+xor_expr
+    : left=and_expr
+        ( (CIRCUMFLEX and_expr
+          )+
+        |
+        )
+    ;
 
-expr : xor_expr (VBAR xor_expr)*
-     ;
+and_expr
+    : shift_expr
+        ( (AMPER shift_expr
+          )+
+        |
+        )
+    ;
 
-xor_expr : and_expr (CIRCUMFLEX and_expr)*
-         ;
+shift_expr
+    : left=arith_expr
+        ( ( shift_op arith_expr
+          )+
+        |
+        )
+    ;
 
-and_expr : shift_expr (AMPER shift_expr)*
-         ;
+shift_op
+    : LEFTSHIFT
+    | RIGHTSHIFT
+    ;
 
-shift_expr : arith_expr ((LEFTSHIFT|RIGHTSHIFT) arith_expr)*
-           ;
+arith_expr
+    : left=term
+        ( (arith_op term
+          )+
+        |
+        )
+    ;
 
-arith_expr: term ((PLUS|MINUS) term)*
-          ;
+arith_op
+    : PLUS
+    | MINUS
+    ;
 
-term : factor ((STAR | SLASH | PERCENT | DOUBLESLASH ) factor)*
-     ;
+term
+    : factor
+        ( (term_op factor
+          )+
+        |
+        )
+    ;
 
-factor : PLUS factor
-       | MINUS factor
-       | TILDE factor
-       | power
-       ;
+term_op
+    :STAR
+    |SLASH
+    |PERCENT
+    |DOUBLESLASH
+    ;
 
-power : atom (trailer)* (options {greedy=true;}:DOUBLESTAR factor)?
-      ;
+factor
+    : PLUS factor
+    | MINUS factor
+    | TILDE factor
+    | power
+    ;
 
-atom : LPAREN 
-       ( yield_expr
-       | testlist_gexp
-       )?
-       RPAREN
-     | LBRACK (listmaker)? RBRACK
-     | LCURLY (dictmaker)? RCURLY
+power
+    : atom (trailer)* (options {greedy=true;}:DOUBLESTAR factor)?
+    ;
+
+atom
+    : LPAREN 
+      ( yield_expr
+      | testlist_gexp
+      |
+      )
+      RPAREN
+    | LBRACK
+      (listmaker
+      |
+      )
+      RBRACK
+    | LCURLY 
+       (dictmaker
+       |
+       )
+       RCURLY
      | BACKQUOTE testlist BACKQUOTE
      | NAME
      | INT
      | LONGINT
      | FLOAT
      | COMPLEX
-     | (STRING)+
+     | (STRING)+ 
      ;
 
-listmaker : test 
-            ( list_for
-            | (options {greedy=true;}:COMMA test)*
-            ) (COMMA)?
+listmaker
+    : test 
+        (list_for
+        | (options {greedy=true;}:COMMA test)*
+        ) (COMMA)?
           ;
 
 testlist_gexp
-    : test ( (options {k=2;}: COMMA test)* (COMMA)?
-           | gen_for
-           )
-           
+    : test
+        ( ((options {k=2;}: COMMA test)* (COMMA)?
+          )
+        | (gen_for
+          )
+        )
     ;
 
-lambdef: 'lambda' (varargslist)? COLON test
-       ;
+lambdef
+    : LAMBDA (varargslist)? COLON test
+    ;
 
-trailer : LPAREN (arglist)? RPAREN
-        | LBRACK subscriptlist RBRACK
-        | DOT NAME
-        ;
+trailer
+    : LPAREN 
+        (arglist
+        |
+        )
+      RPAREN
+    | LBRACK subscriptlist RBRACK
+    | DOT attr
+    ;
 
-subscriptlist : subscript (options {greedy=true;}:COMMA subscript)* (COMMA)?
-              ;
+subscriptlist
+    : subscript (options {greedy=true;}:COMMA subscript)* (COMMA)?
+    ;
 
-subscript : DOT DOT DOT
-          | test (COLON (test)? (sliceop)?)?
-          | COLON (test)? (sliceop)?
-          ;
+subscript
+    : DOT DOT DOT
+    | (test COLON)
+   => test (COLON (test)? (sliceop)?)?
+    | (COLON)
+   => COLON (test)? (sliceop)?
+    | test
+    ;
 
-sliceop : COLON (test)?
-        ;
+sliceop
+    : COLON
+      (test
+      )?
+    ;
 
-exprlist : expr (options {k=2;}: COMMA expr)* (COMMA)?
-         ;
+exprlist
+    : (expr COMMA) => expr (options {k=2;}: COMMA expr)* (COMMA)?
+    | expr
+    ;
 
+//not in CPython's Grammar file
+del_list
+    : expr (options {k=2;}: COMMA expr)* (COMMA)?
+    ;
+
 testlist
-    : test (options {k=2;}: COMMA test)* (COMMA)?
+    : (test COMMA)
+   => test (options {k=2;}: COMMA test)* (COMMA)?
+    | test
     ;
 
-dictmaker : test COLON test (options {k=2;}:COMMA test COLON test)* (COMMA)?
-          ;
+dictmaker
+    : test COLON test
+        (options {k=2;}:COMMA test COLON test)*
+        (COMMA)?
+    ;
 
-classdef: 'class' NAME (LPAREN testlist? RPAREN)? COLON suite
-        ;
+classdef
+    : CLASS NAME (LPAREN testlist? RPAREN)? COLON suite
+    ;
 
-arglist : argument (COMMA argument)*
-          ( COMMA
-            ( STAR test (COMMA DOUBLESTAR test)?
-            | DOUBLESTAR test
-            )?
+arglist
+    : argument (COMMA argument)*
+          (COMMA
+              ( STAR test (COMMA DOUBLESTAR test)?
+              | DOUBLESTAR test
+              )?
           )?
-        |   STAR test (COMMA DOUBLESTAR test)?
-        |   DOUBLESTAR test
-        ;
+    | STAR test (COMMA DOUBLESTAR test)?
+    | DOUBLESTAR test
+    ;
 
-argument : test ( (ASSIGN test) | gen_for)?
-         ;
+argument
+    : t1=test
+        ((ASSIGN t2=test)
+        | gen_for
+        |
+        )
+    ;
 
-list_iter : list_for
-          | list_if
-          ;
+list_iter
+    : list_for
+    | list_if
+    ;
 
-list_for : 'for' exprlist 'in' testlist (list_iter)?
-         ;
+list_for
+    : FOR exprlist IN testlist (list_iter)?
+    ;
 
-list_if : 'if' test (list_iter)?
-        ;
+list_if
+    : IF test (list_iter)?
+    ;
 
-gen_iter: gen_for
-        | gen_if
-        ;
+gen_iter
+    : gen_for
+    | gen_if
+    ;
 
-gen_for: 'for' exprlist 'in' or_test gen_iter?
-       ;
+gen_for
+    : FOR exprlist IN or_test gen_iter?
+    ;
 
-gen_if: 'if' test gen_iter?
-      ;
+gen_if
+    : IF test gen_iter?
+    ;
 
-yield_expr : 'yield' testlist?
-           ;
+yield_expr
+    : YIELD testlist?
+    ;
 
+AS        : 'as' ;
+ASSERT    : 'assert' ;
+BREAK     : 'break' ;
+CLASS     : 'class' ;
+CONTINUE  : 'continue' ;
+DEF       : 'def' ;
+DELETE    : 'del' ;
+ELIF      : 'elif' ;
+EXCEPT    : 'except' ;
+EXEC      : 'exec' ;
+FINALLY   : 'finally' ;
+FROM      : 'from' ;
+FOR       : 'for' ;
+GLOBAL    : 'global' ;
+IF        : 'if' ;
+IMPORT    : 'import' ;
+IN        : 'in' ;
+IS        : 'is' ;
+LAMBDA    : 'lambda' ;
+ORELSE    : 'else' ;
+PASS      : 'pass'  ;
+PRINT     : 'print' ;
+RAISE     : 'raise' ;
+RETURN    : 'return' ;
+TRY       : 'try' ;
+WHILE     : 'while' ;
+WITH      : 'with' ;
+YIELD     : 'yield' ;
+
 LPAREN    : '(' {implicitLineJoiningLevel++;} ;
 
 RPAREN    : ')' {implicitLineJoiningLevel--;} ;
@@ -553,12 +836,12 @@
 INT :   // Hex
         '0' ('x' | 'X') ( '0' .. '9' | 'a' .. 'f' | 'A' .. 'F' )+
     |   // Octal
-        '0' DIGITS*
+        '0'  ( '0' .. '7' )*
     |   '1'..'9' DIGITS*
     ;
 
 COMPLEX
-    :   INT ('j'|'J')
+    :   DIGITS+ ('j'|'J')
     |   FLOAT ('j'|'J')
     ;
 
@@ -573,12 +856,17 @@
  *  should make us exit loop not continue.
  */
 STRING
-    :   ('r'|'u'|'ur')?
+    :   ('r'|'u'|'ur'|'R'|'U'|'UR'|'uR'|'Ur')?
         (   '\'\'\'' (options {greedy=false;}:TRIAPOS)* '\'\'\''
         |   '"""' (options {greedy=false;}:TRIQUOTE)* '"""'
         |   '"' (ESC|~('\\'|'\n'|'"'))* '"'
         |   '\'' (ESC|~('\\'|'\n'|'\''))* '\''
-        )
+        ) {
+           if (state.tokenStartLine != input.getLine()) {
+               state.tokenStartLine = input.getLine();
+               state.tokenStartCharPositionInLine = -2;
+           }
+        }
     ;
 
 /** the two '"'? cause a warning -- is there a way to avoid that? */
@@ -604,7 +892,7 @@
  */
 CONTINUED_LINE
     :    '\\' ('\r')? '\n' (' '|'\t')*  { $channel=HIDDEN; }
-         ( nl=NEWLINE {emit(new ClassicToken(NEWLINE,nl.getText()));}
+         ( nl=NEWLINE {emit(new CommonToken(NEWLINE,nl.getText()));}
          |
          )
     ;
@@ -616,8 +904,11 @@
  *  Frank Wierzbicki added: Also ignore FORMFEEDS (\u000C).
  */
 NEWLINE
-    :   (('\u000C')?('\r')? '\n' )+
-        {if ( startPos==0 || implicitLineJoiningLevel>0 )
+@init {
+    int newlines = 0;
+}
+    :   (('\u000C')?('\r')? '\n' {newlines++; } )+ {
+         if ( startPos==0 || implicitLineJoiningLevel>0 )
             $channel=HIDDEN;
         }
     ;
@@ -634,24 +925,42 @@
 LEADING_WS
 @init {
     int spaces = 0;
+    int newlines = 0;
 }
     :   {startPos==0}?=>
         (   {implicitLineJoiningLevel>0}? ( ' ' | '\t' )+ {$channel=HIDDEN;}
-           |    (     ' '  { spaces++; }
-            |    '\t' { spaces += 8; spaces -= (spaces \% 8); }
-               )+
-            {
-            // make a string of n spaces where n is column number - 1
-            char[] indentation = new char[spaces];
-            for (int i=0; i<spaces; i++) {
-                indentation[i] = ' ';
-            }
-            String s = new String(indentation);
-            emit(new ClassicToken(LEADING_WS,new String(indentation)));
-            }
-            // kill trailing newline if present and then ignore
-            ( ('\r')? '\n' {if (token!=null) token.setChannel(HIDDEN); else $channel=HIDDEN;})*
-           // {token.setChannel(99); }
+        |    (     ' '  { spaces++; }
+             |    '\t' { spaces += 8; spaces -= (spaces \% 8); }
+             )+
+             ( ('\r')? '\n' {newlines++; }
+             )* {
+                   if (input.LA(1) != -1) {
+                       // make a string of n spaces where n is column number - 1
+                       char[] indentation = new char[spaces];
+                       for (int i=0; i<spaces; i++) {
+                           indentation[i] = ' ';
+                       }
+                       CommonToken c = new CommonToken(LEADING_WS,new String(indentation));
+                       c.setLine(input.getLine());
+                       c.setCharPositionInLine(input.getCharPositionInLine());
+                       emit(c);
+                       // kill trailing newline if present and then ignore
+                       if (newlines != 0) {
+                           if (state.token!=null) {
+                               state.token.setChannel(HIDDEN);
+                           } else {
+                               $channel=HIDDEN;
+                           }
+                       }
+                   } else {
+                       // make a string of n newlines
+                       char[] nls = new char[newlines];
+                       for (int i=0; i<newlines; i++) {
+                           nls[i] = '\n';
+                       }
+                       emit(new CommonToken(NEWLINE,new String(nls)));
+                   }
+                }
         )
     ;
 
@@ -677,6 +986,6 @@
     $channel=HIDDEN;
 }
     :    {startPos==0}?=> (' '|'\t')* '#' (~'\n')* '\n'+
-    |    {startPos>0}?=> '#' (~'\n')* // let NEWLINE handle \n unless char pos==0 for '#'
+    |    '#' (~'\n')* // let NEWLINE handle \n unless char pos==0 for '#'
     ;
 


Property changes on: trunk/sandbox/wierzbicki/antlr/lib/antlr-3.1.jar
___________________________________________________________________
Added: svn:mime-type
   + application/octet-stream


Property changes on: trunk/sandbox/wierzbicki/antlr/lib/antlr-runtime-3.1.jar
___________________________________________________________________
Added: svn:mime-type
   + application/octet-stream


Property changes on: trunk/sandbox/wierzbicki/antlr/lib/stringtemplate-3.2.jar
___________________________________________________________________
Added: svn:mime-type
   + application/octet-stream

Modified: trunk/sandbox/wierzbicki/antlr/regr
===================================================================
--- trunk/sandbox/wierzbicki/antlr/regr	2008-08-31 01:00:57 UTC (rev 5276)
+++ trunk/sandbox/wierzbicki/antlr/regr	2008-09-01 04:03:21 UTC (rev 5277)
@@ -7,18 +7,21 @@
 ./run $RELEASE25/Lib/_MozillaCookieJar.py
 ./run $RELEASE25/Lib/_strptime.py
 ./run $RELEASE25/Lib/_threading_local.py
+echo Lib/a
 ./run $RELEASE25/Lib/aifc.py
 ./run $RELEASE25/Lib/anydbm.py
 ./run $RELEASE25/Lib/asynchat.py
 ./run $RELEASE25/Lib/asyncore.py
 ./run $RELEASE25/Lib/atexit.py
 ./run $RELEASE25/Lib/audiodev.py
+echo Lib/b
 ./run $RELEASE25/Lib/base64.py
 ./run $RELEASE25/Lib/BaseHTTPServer.py
 ./run $RELEASE25/Lib/Bastion.py
 ./run $RELEASE25/Lib/bdb.py
 ./run $RELEASE25/Lib/binhex.py
 ./run $RELEASE25/Lib/bisect.py
+echo Lib/bsddb
 ./run $RELEASE25/Lib/bsddb/__init__.py
 ./run $RELEASE25/Lib/bsddb/db.py
 ./run $RELEASE25/Lib/bsddb/dbobj.py
@@ -26,6 +29,7 @@
 ./run $RELEASE25/Lib/bsddb/dbshelve.py
 ./run $RELEASE25/Lib/bsddb/dbtables.py
 ./run $RELEASE25/Lib/bsddb/dbutils.py
+echo Lib/bsddb/test
 ./run $RELEASE25/Lib/bsddb/test/__init__.py
 ./run $RELEASE25/Lib/bsddb/test/test_1413192.py
 ./run $RELEASE25/Lib/bsddb/test/test_all.py
@@ -562,6 +566,7 @@
 ./run $RELEASE25/Lib/plat-linux2/TYPES.py
 echo Lib/plat-mac
 ./run $RELEASE25/Lib/plat-mac/aepack.py
+#aetools uses 'as' not as a keyword
 #./run $RELEASE25/Lib/plat-mac/aetools.py
 ./run $RELEASE25/Lib/plat-mac/aetypes.py
 ./run $RELEASE25/Lib/plat-mac/applesingle.py
@@ -677,6 +682,7 @@
 ./run $RELEASE25/Lib/plat-mac/lib-scriptpackages/Netscape/Text.py
 ./run $RELEASE25/Lib/plat-mac/lib-scriptpackages/Netscape/WorldWideWeb_suite.py
 ./run $RELEASE25/Lib/plat-mac/lib-scriptpackages/StdSuites/__init__.py
+#AppleScript_Suite.py uses 'as' not as a keyword
 #./run $RELEASE25/Lib/plat-mac/lib-scriptpackages/StdSuites/AppleScript_Suite.py
 ./run $RELEASE25/Lib/plat-mac/lib-scriptpackages/StdSuites/Macintosh_Connectivity_Clas.py
 ./run $RELEASE25/Lib/plat-mac/lib-scriptpackages/StdSuites/QuickDraw_Graphics_Suite.py
@@ -722,6 +728,7 @@
 ./run $RELEASE25/Lib/plat-sunos5/CDIO.py
 ./run $RELEASE25/Lib/plat-sunos5/DLFCN.py
 ./run $RELEASE25/Lib/plat-sunos5/IN.py
+#STROPTS.py uses 'as' not as a keyword
 #./run $RELEASE25/Lib/plat-sunos5/STROPTS.py
 ./run $RELEASE25/Lib/plat-sunos5/SUNAUDIODEV.py
 ./run $RELEASE25/Lib/plat-sunos5/TYPES.py
@@ -739,6 +746,7 @@
 ./run $RELEASE25/Lib/py_compile.py
 ./run $RELEASE25/Lib/pyclbr.py
 ./run $RELEASE25/Lib/pydoc.py
+echo Lib/q
 ./run $RELEASE25/Lib/Queue.py
 ./run $RELEASE25/Lib/quopri.py
 ./run $RELEASE25/Lib/random.py
@@ -811,18 +819,15 @@
 ./run $RELEASE25/Lib/test/crashers/bogus_code_obj.py
 ./run $RELEASE25/Lib/test/crashers/borrowed_ref_1.py
 ./run $RELEASE25/Lib/test/crashers/borrowed_ref_2.py
-./run $RELEASE25/Lib/test/crashers/dangerous_subclassing.py
 ./run $RELEASE25/Lib/test/crashers/gc_inspection.py
 ./run $RELEASE25/Lib/test/crashers/infinite_rec_1.py
 ./run $RELEASE25/Lib/test/crashers/infinite_rec_2.py
 ./run $RELEASE25/Lib/test/crashers/infinite_rec_4.py
 ./run $RELEASE25/Lib/test/crashers/infinite_rec_5.py
 ./run $RELEASE25/Lib/test/crashers/loosing_dict_ref.py
-./run $RELEASE25/Lib/test/crashers/modify_dict_attr.py
 ./run $RELEASE25/Lib/test/crashers/nasty_eq_vs_dict.py
 ./run $RELEASE25/Lib/test/crashers/recursion_limit_too_high.py
 ./run $RELEASE25/Lib/test/crashers/recursive_call.py
-./run $RELEASE25/Lib/test/crashers/weakref_in_del.py
 ./run $RELEASE25/Lib/test/doctest_aliases.py
 ./run $RELEASE25/Lib/test/double_const.py
 echo Lib/test/f
@@ -979,13 +984,13 @@
 ./run $RELEASE25/Lib/test/test_gl.py
 ./run $RELEASE25/Lib/test/test_glob.py
 ./run $RELEASE25/Lib/test/test_global.py
-#./run $RELEASE25/Lib/test/test_grammar.py
+./run $RELEASE25/Lib/test/test_grammar.py
 ./run $RELEASE25/Lib/test/test_grp.py
 ./run $RELEASE25/Lib/test/test_gzip.py
 ./run $RELEASE25/Lib/test/test_hash.py
 ./run $RELEASE25/Lib/test/test_hashlib.py
 ./run $RELEASE25/Lib/test/test_heapq.py
-#./run $RELEASE25/Lib/test/test_hexoct.py
+./run $RELEASE25/Lib/test/test_hexoct.py
 ./run $RELEASE25/Lib/test/test_hmac.py
 ./run $RELEASE25/Lib/test/test_hotshot.py
 ./run $RELEASE25/Lib/test/test_htmllib.py
@@ -1027,7 +1032,7 @@
 ./run $RELEASE25/Lib/test/test_minidom.py
 ./run $RELEASE25/Lib/test/test_mmap.py
 ./run $RELEASE25/Lib/test/test_module.py
-#./run $RELEASE25/Lib/test/test_multibytecodec.py
+./run $RELEASE25/Lib/test/test_multibytecodec.py
 ./run $RELEASE25/Lib/test/test_multibytecodec_support.py
 ./run $RELEASE25/Lib/test/test_multifile.py
 ./run $RELEASE25/Lib/test/test_mutants.py
@@ -1109,7 +1114,7 @@
 ./run $RELEASE25/Lib/test/test_stringprep.py
 ./run $RELEASE25/Lib/test/test_strop.py
 ./run $RELEASE25/Lib/test/test_strptime.py
-#./run $RELEASE25/Lib/test/test_struct.py
+./run $RELEASE25/Lib/test/test_struct.py
 ./run $RELEASE25/Lib/test/test_structmembers.py
 ./run $RELEASE25/Lib/test/test_structseq.py
 ./run $RELEASE25/Lib/test/test_subprocess.py
@@ -1196,8 +1201,8 @@
 ./run $RELEASE25/Lib/types.py
 echo Lib/u
 ./run $RELEASE25/Lib/unittest.py
-#./run $RELEASE25/Lib/urllib.py
-#./run $RELEASE25/Lib/urllib2.py
+./run $RELEASE25/Lib/urllib.py
+./run $RELEASE25/Lib/urllib2.py
 ./run $RELEASE25/Lib/urlparse.py
 ./run $RELEASE25/Lib/user.py
 ./run $RELEASE25/Lib/UserDict.py

Modified: trunk/sandbox/wierzbicki/antlr/run
===================================================================
--- trunk/sandbox/wierzbicki/antlr/run	2008-08-31 01:00:57 UTC (rev 5276)
+++ trunk/sandbox/wierzbicki/antlr/run	2008-09-01 04:03:21 UTC (rev 5277)
@@ -1 +1 @@
-java -classpath lib/antlr-3.0.1.jar:build Main $*
+java -classpath lib/antlr-3.1.jar:build Main $*

Modified: trunk/sandbox/wierzbicki/antlr/src/Main.java
===================================================================
--- trunk/sandbox/wierzbicki/antlr/src/Main.java	2008-08-31 01:00:57 UTC (rev 5276)
+++ trunk/sandbox/wierzbicki/antlr/src/Main.java	2008-09-01 04:03:21 UTC (rev 5277)
@@ -20,7 +20,7 @@
 	PythonLexer lexer = new MyLexer(input);
 	CommonTokenStream tokens = new CommonTokenStream(lexer);
 	tokens.discardOffChannelTokens(true);
-	PythonTokenSource indentedSource = new PythonTokenSource(tokens);
+	PythonTokenSource indentedSource = new PythonTokenSource(tokens, "<test>");
 	tokens = new CommonTokenStream(indentedSource);
 	//System.out.println("tokens="+tokens.getTokens());
 	PythonParser parser = new PythonParser(tokens);

Modified: trunk/sandbox/wierzbicki/antlr/src/PythonTokenSource.java
===================================================================
--- trunk/sandbox/wierzbicki/antlr/src/PythonTokenSource.java	2008-08-31 01:00:57 UTC (rev 5276)
+++ trunk/sandbox/wierzbicki/antlr/src/PythonTokenSource.java	2008-09-01 04:03:21 UTC (rev 5277)
@@ -70,184 +70,263 @@
  February 2004
  */
 public class PythonTokenSource implements TokenSource {
-	public static final int MAX_INDENTS = 100;
-	public static final int FIRST_CHAR_POSITION = 0;
+    public static final int MAX_INDENTS = 100;
+    public static final int FIRST_CHAR_POSITION = 0;
 
-	/** The stack of indent levels (column numbers) */
-	int[] indentStack = new int[MAX_INDENTS];
-	/** stack pointer */
-	int sp=-1; // grow upwards
+    /** The stack of indent levels (column numbers) */
+    int[] indentStack = new int[MAX_INDENTS];
+    /** stack pointer */
+    int sp=-1; // grow upwards
 
-	/** The queue of tokens */
-	Vector tokens = new Vector();
+    /** The queue of tokens */
+    Vector tokens = new Vector();
 
-	/** We pull real tokens from this lexer */
-	CommonTokenStream stream;
+    /** We pull real tokens from this lexer */
+    CommonTokenStream stream;
 
-	int lastTokenAddedIndex = -1;
+    int lastTokenAddedIndex = -1;
 
-	public PythonTokenSource(PythonLexer lexer) {
-	}
+    String filename;
+    boolean inSingle;
 
-	public PythonTokenSource(CommonTokenStream stream) {
-		this.stream = stream;
-		// "state" of indent level is FIRST_CHAR_POSITION
-		push(FIRST_CHAR_POSITION);
-	}
+    public PythonTokenSource(PythonLexer lexer) {
+    }
 
-	/** From http://www.python.org/doc/2.2.3/ref/indentation.html
 
-	 "Before the first line of the file is read, a single zero is
-	 pushed on the stack; this will never be popped off again. The
-	 numbers pushed on the stack will always be strictly increasing
-	 from bottom to top. At the beginning of each logical line, the
-	 line's indentation level is compared to the top of the
-	 stack. If it is equal, nothing happens. If it is larger, it is
-	 pushed on the stack, and one INDENT token is generated. If it
-	 is smaller, it must be one of the numbers occurring on the
-	 stack; all numbers on the stack that are larger are popped
-	 off, and for each number popped off a DEDENT token is
-	 generated. At the end of the file, a DEDENT token is generated
-	 for each number remaining on the stack that is larger than
-	 zero."
+    public PythonTokenSource(CommonTokenStream stream, String filename) {
+        this(stream, filename, false);
+    }
 
-	 I use char position in line 0..n-1 instead.
+    public PythonTokenSource(CommonTokenStream stream, String filename, boolean single) {
+        this.stream = stream;
+        this.filename = filename;
+        this.inSingle = single;
+        // "state" of indent level is FIRST_CHAR_POSITION
+        push(FIRST_CHAR_POSITION);
+    }
 
-	 The DEDENTS possibly needed at EOF are gracefully handled by forcing
-	 EOF to have char pos 0 even though with UNIX it's hard to get EOF
-	 at a non left edge.
-	 */
-	public Token nextToken() {
-		// if something in queue, just remove and return it
-		if ( tokens.size()>0 ) {
-			Token t = (Token)tokens.firstElement();
-			tokens.removeElementAt(0);
-			// System.out.println(t);
-			return t;
-		}
+    /** From http://www.python.org/doc/2.2.3/ref/indentation.html
 
-		insertImaginaryIndentDedentTokens();
+     "Before the first line of the file is read, a single zero is
+     pushed on the stack; this will never be popped off again. The
+     numbers pushed on the stack will always be strictly increasing
+     from bottom to top. At the beginning of each logical line, the
+     line's indentation level is compared to the top of the
+     stack. If it is equal, nothing happens. If it is larger, it is
+     pushed on the stack, and one INDENT token is generated. If it
+     is smaller, it must be one of the numbers occurring on the
+     stack; all numbers on the stack that are larger are popped
+     off, and for each number popped off a DEDENT token is
+     generated. At the end of the file, a DEDENT token is generated
+     for each number remaining on the stack that is larger than
+     zero."
 
-		return nextToken();
-	}
+     I use char position in line 0..n-1 instead.
 
-	protected void insertImaginaryIndentDedentTokens()
-	{
-		Token t = stream.LT(1);
-		stream.consume();
+     The DEDENTS possibly needed at EOF are gracefully handled by forcing
+     EOF to have char pos 0 even though with UNIX it's hard to get EOF
+     at a non left edge.
+     */
+    public Token nextToken() {
+        // if something in queue, just remove and return it
+        if (tokens.size() > 0) {
+            Token t = (Token)tokens.firstElement();
+            tokens.removeElementAt(0);
+            //System.out.println(filename + t);
+            return t;
+        }
 
-		// if not a NEWLINE, doesn't signal indent/dedent work; just enqueue
-		if ( t.getType()!=PythonLexer.NEWLINE ) {
-			List hiddenTokens = stream.getTokens(lastTokenAddedIndex+1,t.getTokenIndex()-1);
-			if ( hiddenTokens!=null ) {
-				tokens.addAll(hiddenTokens);
-			}
-			lastTokenAddedIndex = t.getTokenIndex();
-			tokens.addElement(t);
-			return;
-		}
+        insertImaginaryIndentDedentTokens();
 
-		// save NEWLINE in the queue
-		//System.out.println("found newline: "+t+" stack is "+stackString());
-		List hiddenTokens = stream.getTokens(lastTokenAddedIndex+1,t.getTokenIndex()-1);
-		if ( hiddenTokens!=null ) {
-			tokens.addAll(hiddenTokens);
-		}
-		lastTokenAddedIndex = t.getTokenIndex();
-		tokens.addElement(t);
+        return nextToken();
+    }
 
-		// grab first token of next line
-		t = stream.LT(1);
-		stream.consume();
+    private void generateNewline(Token t) {
+        CommonToken newline = new CommonToken(PythonLexer.NEWLINE, "\n");
+        newline.setLine(t.getLine());
+        newline.setCharPositionInLine(t.getCharPositionInLine());
+        tokens.addElement(newline);
+    }
 
-		hiddenTokens = stream.getTokens(lastTokenAddedIndex+1,t.getTokenIndex()-1);
-		if ( hiddenTokens!=null ) {
-			tokens.addAll(hiddenTokens);
-		}
-		lastTokenAddedIndex = t.getTokenIndex();
+    protected void insertImaginaryIndentDedentTokens() {
+        Token t = stream.LT(1);
+        stream.consume();
 
-		// compute cpos as the char pos of next non-WS token in line
-		int cpos = t.getCharPositionInLine(); // column dictates indent/dedent
-		if ( t.getType()==Token.EOF ) {
-			cpos = -1; // pretend EOF always happens at left edge
-		}
-		else if ( t.getType()==PythonLexer.LEADING_WS ) {
-			cpos = t.getText().length();
-		}
+        if (t.getType() == Token.EOF) {
+            if (!inSingle) {
+                Token prev = stream.LT(-1);
+                if (prev == null || prev.getType() != PythonLexer.NEWLINE) {
+                    generateNewline(t);
+                }
+            }
 
-		//System.out.println("next token is: "+t);
+            handleDedents(-1, (CommonToken)t);
+            enqueue(t);
+        } else if (t.getType() == PythonLexer.NEWLINE) {
+            // save NEWLINE in the queue
+            //System.out.println("found newline: "+t+" stack is "+stackString());
+            enqueueHiddens(t);
+            tokens.addElement(t);
+            Token newline = t;
 
-		// compare to last indent level
-		int lastIndent = peek();
-		//System.out.println("cpos, lastIndent = "+cpos+", "+lastIndent);
-		if ( cpos > lastIndent ) { // they indented; track and gen INDENT
-			push(cpos);
-			//System.out.println("push("+cpos+"): "+stackString());
-			Token indent = new ClassicToken(PythonParser.INDENT,"");
-			indent.setCharPositionInLine(t.getCharPositionInLine());
-			indent.setLine(t.getLine());
-			tokens.addElement(indent);
-		}
-		else if ( cpos < lastIndent ) { // they dedented
-			// how far back did we dedent?
-			int prevIndex = findPreviousIndent(cpos);
-			//System.out.println("dedented; prevIndex of cpos="+cpos+" is "+prevIndex);
-			// generate DEDENTs for each indent level we backed up over
-			for (int d=sp-1; d>=prevIndex; d--) {
-				Token dedent = new ClassicToken(PythonParser.DEDENT,"");
-				dedent.setCharPositionInLine(t.getCharPositionInLine());
-				dedent.setLine(t.getLine());
-				tokens.addElement(dedent);
-			}
-			sp = prevIndex; // pop those off indent level
-		}
-		if ( t.getType()!=PythonLexer.LEADING_WS ) { // discard WS
-			tokens.addElement(t);
-		}
-	}
+            // grab first token of next line
+            t = stream.LT(1);
+            stream.consume();
 
-	//  T O K E N  S T A C K  M E T H O D S
+            enqueueHiddens(t);
 
-	protected void push(int i) {
-		if (sp>=MAX_INDENTS) {
-			throw new IllegalStateException("stack overflow");
-		}
-		sp++;
-		indentStack[sp] = i;
-	}
+            // compute cpos as the char pos of next non-WS token in line
+            int cpos = t.getCharPositionInLine(); // column dictates indent/dedent
+            if (t.getType() == Token.EOF) {
+                cpos = -1; // pretend EOF always happens at left edge
+            }
+            else if (t.getType() == PythonLexer.LEADING_WS) {
+                Token next = stream.LT(1);
+                if (next != null && next.getType() == Token.EOF) {
+                    stream.consume();
+                    return;
+                } else {
+                    cpos = t.getText().length();
+                }
+            }
 
-	protected int pop() {
-		if (sp<0) {
-			throw new IllegalStateException("stack underflow");
-		}
-		int top = indentStack[sp];
-		sp--;
-		return top;
-	}
+            //System.out.println("next token is: "+t);
 
-	protected int peek() {
-		return indentStack[sp];
-	}
+            // compare to last indent level
+            int lastIndent = peek();
+            //System.out.println("cpos, lastIndent = "+cpos+", "+lastIndent);
+            if (cpos > lastIndent) { // they indented; track and gen INDENT
+                handleIndents(cpos, (CommonToken)t);
+            }
+            else if (cpos < lastIndent) { // they dedented
+                handleDedents(cpos, (CommonToken)t);
+            }
 
-	/** Return the index on stack of previous indent level == i else -1 */
-	protected int findPreviousIndent(int i) {
-		for (int j=sp-1; j>=0; j--) {
-			if ( indentStack[j]==i ) {
-				return j;
-			}
-		}
-		return FIRST_CHAR_POSITION;
-	}
+            if (t.getType() == Token.EOF && inSingle) {
+                String newlines = newline.getText();
+                for(int i=1;i<newlines.length();i++) {
+                    generateNewline(newline);
+                }
+            }
 
-	public String stackString() {
-		StringBuffer buf = new StringBuffer();
-		for (int j=sp; j>=0; j--) {
-			buf.append(" ");
-			buf.append(indentStack[j]);
-		}
-		return buf.toString();
-	}
+            if (t.getType() != PythonLexer.LEADING_WS) { // discard WS
+                tokens.addElement(t);
+            }
 
+        } else {
+            enqueue(t);
+        }
+    }
+    
+    private void enqueue(Token t) {
+        enqueueHiddens(t);
+        tokens.addElement(t);
+    }
+
+    private void enqueueHiddens(Token t) {
+        if (inSingle && t.getType() == Token.EOF) {
+            if (stream.size() > lastTokenAddedIndex + 1) {
+                Token hidden = stream.get(lastTokenAddedIndex + 1);
+                if (hidden.getType() == PythonLexer.COMMENT) {
+                    String text = hidden.getText();
+                    int i = text.indexOf("\n");
+                    while(i != -1) {
+                        generateNewline(hidden);
+                        i = text.indexOf("\n", i + 1);
+                    }
+                }
+            }
+        }
+        List hiddenTokens = stream.getTokens(lastTokenAddedIndex + 1,t.getTokenIndex() - 1);
+        if (hiddenTokens != null) {
+            tokens.addAll(hiddenTokens);
+        }
+        lastTokenAddedIndex = t.getTokenIndex();
+    }
+
+    private void handleIndents(int cpos, CommonToken t) {
+        push(cpos);
+        //System.out.println("push("+cpos+"): "+stackString());
+        Token indent = new CommonToken(PythonParser.INDENT,"");
+        indent.setCharPositionInLine(t.getCharPositionInLine());
+        indent.setLine(t.getLine());
+        tokens.addElement(indent);
+    }
+
+    private void handleDedents(int cpos, CommonToken t) {
+        // how far back did we dedent?
+        int prevIndex = findPreviousIndent(cpos, t);
+        //System.out.println("dedented; prevIndex of cpos="+cpos+" is "+prevIndex);
+        // generate DEDENTs for each indent level we backed up over
+        for (int d = sp - 1; d >= prevIndex; d--) {
+            CommonToken dedent = new CommonToken(PythonParser.DEDENT,"");
+            dedent.setCharPositionInLine(t.getCharPositionInLine());
+            dedent.setLine(t.getLine());
+
+            //XXX: this will get messed up by comments.
+            dedent.setStartIndex(t.getStartIndex());
+            dedent.setStopIndex(t.getStopIndex());
+
+            tokens.addElement(dedent);
+        }
+        sp = prevIndex; // pop those off indent level
+    }
+
+    //  T O K E N  S T A C K  M E T H O D S
+
+    protected void push(int i) {
+        if (sp >= MAX_INDENTS) {
+            throw new IllegalStateException("stack overflow");
+        }
+        sp++;
+        indentStack[sp] = i;
+    }
+
+    protected int pop() {
+        if (sp<0) {
+            throw new IllegalStateException("stack underflow");
+        }
+        int top = indentStack[sp];
+        sp--;
+        return top;
+    }
+
+    protected int peek() {
+        return indentStack[sp];
+    }
+
+    /** Return the index on stack of previous indent level == i else -1 */
+    protected int findPreviousIndent(int i, Token t) {
+        for (int j = sp - 1; j >= 0; j--) {
+            if (indentStack[j] == i) {
+                return j;
+            }
+        }
+        //The -2 is for the special case of getCharPositionInLine in multiline str nodes.
+        if (i == -1 || i == -2) {
+            return FIRST_CHAR_POSITION;
+        }
+        /* ParseException p = new ParseException("unindent does not match any outer indentation level", t.getLine(), t.getCharPositionInLine());
+        p.setType(Py.IndentationError);
+        throw p;
+        */
+        throw new RuntimeException("unindent does not match any outer indentation level");
+    }
+
+    public String stackString() {
+        StringBuffer buf = new StringBuffer();
+        for (int j = sp; j >= 0; j--) {
+            buf.append(" ");
+            buf.append(indentStack[j]);
+        }
+        return buf.toString();
+    }
+
+    //FIXME: needed this for the Antlr 3.1b interface change.
+    public String getSourceName() {
+        return filename;
+    }
+
 }
 
 /* More example input / output pairs with code simplified to single chars


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.