[Jython-checkins] SF.net SVN: jython:[5079] branches/asm

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Revision: 5079
          http://jython.svn.sourceforge.net/jython/?rev=5079&view=rev
Author:   fwierzbicki
Date:     2008-08-05 16:55:18 +0000 (Tue, 05 Aug 2008)

Log Message:
-----------
Reworked lexing of newlines and EOF.  Broke up PythonTokenSource into more
useful methods.  Removed inSingle from Python.g and added it to
PythonTokenSource.

Modified Paths:
--------------
    branches/asm/grammar/Python.g
    branches/asm/src/org/python/antlr/InteractiveParser.java
    branches/asm/src/org/python/antlr/PythonTokenSource.java

Modified: branches/asm/grammar/Python.g
===================================================================

--- branches/asm/grammar/Python.g	2008-08-04 21:34:24 UTC (rev 5078)
+++ branches/asm/grammar/Python.g	2008-08-05 16:55:18 UTC (rev 5079)
@@ -182,9 +182,6 @@
 } 
 
 @members {
-    //XXX: only used for single_input -- seems kludgy.
-    public boolean inSingle = false;
-
     boolean debugOn = false;
 
     private ErrorHandler errorHandler;
@@ -443,8 +440,6 @@
 //and the same one in the parser.
 private ErrorHandler errorHandler;
 
-//XXX: Hopefully we can remove inSingle when we get PyCF_DONT_IMPLY_DEDENT support.
-public boolean inSingle = false;
 int implicitLineJoiningLevel = 0;
 int startPos=-1;
 
@@ -489,7 +484,7 @@
 }
 
 //single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
-single_input : NEWLINE -> ^(Interactive)
+single_input : NEWLINE? -> ^(Interactive)
              | simple_stmt -> ^(Interactive simple_stmt)
              | compound_stmt NEWLINE -> ^(Interactive compound_stmt)
              ;
@@ -1349,12 +1344,11 @@
  *  Frank Wierzbicki added: Also ignore FORMFEEDS (\u000C).
  */
 NEWLINE
-    :   {inSingle}? => (('\u000C')?('\r')? '\n' )
-            {if (implicitLineJoiningLevel>0 )
-                $channel=HIDDEN;
-            }
-    |   (('\u000C')?('\r')? '\n' )+
-        {if ( startPos==0 || implicitLineJoiningLevel>0 )
+@init {
+    int newlines = 0;
+}
+    :   (('\u000C')?('\r')? '\n' {newlines++; } )+ {
+         if ( startPos==0 || implicitLineJoiningLevel>0 )
             $channel=HIDDEN;
         }
     ;
@@ -1371,30 +1365,42 @@
 LEADING_WS
 @init {
     int spaces = 0;
+    int newlines = 0;
 }
     :   {startPos==0}?=>
         (   {implicitLineJoiningLevel>0}? ( ' ' | '\t' )+ {$channel=HIDDEN;}
         |    (     ' '  { spaces++; }
              |    '\t' { spaces += 8; spaces -= (spaces \% 8); }
              )+
-            {
-               // make a string of n spaces where n is column number - 1
-               char[] indentation = new char[spaces];
-               for (int i=0; i<spaces; i++) {
-                   indentation[i] = ' ';
-               }
-               if (input.LA(1) != -1) {
-                   CommonToken c = new CommonToken(LEADING_WS,new String(indentation));
-                   c.setLine(input.getLine());
-                   c.setCharPositionInLine(input.getCharPositionInLine());
-                   emit(c);
-               } else {
-                   emit(new CommonToken(LEADING_WS,""));
-               }
-            }
-            // kill trailing newline if present and then ignore
-            ( ('\r')? '\n' {if (state.token!=null) state.token.setChannel(HIDDEN); else $channel=HIDDEN;})*
-           // {state.token.setChannel(99); }
+             ( ('\r')? '\n' {newlines++; }
+             )* {
+                   if (input.LA(1) != -1) {
+                       // make a string of n spaces where n is column number - 1
+                       char[] indentation = new char[spaces];
+                       for (int i=0; i<spaces; i++) {
+                           indentation[i] = ' ';
+                       }
+                       CommonToken c = new CommonToken(LEADING_WS,new String(indentation));
+                       c.setLine(input.getLine());
+                       c.setCharPositionInLine(input.getCharPositionInLine());
+                       emit(c);
+                       // kill trailing newline if present and then ignore
+                       if (newlines != 0) {
+                           if (state.token!=null) {
+                               state.token.setChannel(HIDDEN);
+                           } else {
+                               $channel=HIDDEN;
+                           }
+                       }
+                   } else {
+                       // make a string of n newlines
+                       char[] nls = new char[newlines];
+                       for (int i=0; i<newlines; i++) {
+                           nls[i] = '\n';
+                       }
+                       emit(new CommonToken(NEWLINE,new String(nls)));
+                   }
+                }
         )
     ;
 

Modified: branches/asm/src/org/python/antlr/InteractiveParser.java
===================================================================
--- branches/asm/src/org/python/antlr/InteractiveParser.java	2008-08-04 21:34:24 UTC (rev 5078)
+++ branches/asm/src/org/python/antlr/InteractiveParser.java	2008-08-05 16:55:18 UTC (rev 5079)
@@ -41,15 +41,12 @@
         modType tree = null;
         PythonLexer lexer = new PyLexer(new NoCloseReaderStream(bufreader));
         lexer.setErrorHandler(errorHandler);
-        //XXX: Hopefully we can remove inSingle when we get PyCF_DONT_IMPLY_DEDENT support.
-        lexer.inSingle = true;
         CommonTokenStream tokens = new CommonTokenStream(lexer);
         tokens.discardOffChannelTokens(true);
-        PythonTokenSource indentedSource = new PythonTokenSource(tokens, filename);
+        PythonTokenSource indentedSource = new PythonTokenSource(tokens, filename, true);
         tokens = new CommonTokenStream(indentedSource);
         PythonParser parser = new PythonParser(tokens);
         parser.setErrorHandler(errorHandler);
-        parser.inSingle = true;
         parser.setTreeAdaptor(new PythonTreeAdaptor());
 
         try {

Modified: branches/asm/src/org/python/antlr/PythonTokenSource.java
===================================================================
--- branches/asm/src/org/python/antlr/PythonTokenSource.java	2008-08-04 21:34:24 UTC (rev 5078)
+++ branches/asm/src/org/python/antlr/PythonTokenSource.java	2008-08-05 16:55:18 UTC (rev 5079)
@@ -91,13 +91,20 @@
     int lastTokenAddedIndex = -1;
 
     String filename;
+    boolean inSingle;
 
     public PythonTokenSource(PythonLexer lexer) {
     }
 
+
     public PythonTokenSource(CommonTokenStream stream, String filename) {
+        this(stream, filename, false);
+    }
+
+    public PythonTokenSource(CommonTokenStream stream, String filename, boolean single) {
         this.stream = stream;
         this.filename = filename;
+        this.inSingle = single;
         // "state" of indent level is FIRST_CHAR_POSITION
         push(FIRST_CHAR_POSITION);
     }
@@ -129,7 +136,7 @@
         if (tokens.size() > 0) {
             Token t = (Token)tokens.firstElement();
             tokens.removeElementAt(0);
-            //System.out.println(t);
+            //System.out.println(filename + t);
             return t;
         }
 
@@ -138,32 +145,36 @@
         return nextToken();
     }
 
+    private void generateNewline(Token t) {
+        // Imaginary newline before EOF
+        CommonToken newline = new CommonToken(PythonLexer.NEWLINE, "\n");
+        newline.setLine(t.getLine());
+        newline.setCharPositionInLine(t.getCharPositionInLine());
+        tokens.addElement(newline);
+    }
+
     protected void insertImaginaryIndentDedentTokens() {
         Token t = stream.LT(1);
         stream.consume();
 
-        // if the current token is not a NEWLINE or EOF, it doesn't signal indent/dedent work; just enqueue
-        if (t.getType() != PythonLexer.NEWLINE && t.getType() != PythonLexer.EOF) {
-            List hiddenTokens = stream.getTokens(lastTokenAddedIndex + 1,t.getTokenIndex() - 1);
-            if (hiddenTokens != null) {
-                tokens.addAll(hiddenTokens);
+        if (t.getType() == Token.EOF) {
+            Token prev = stream.LT(-1);
+            if (!inSingle && (prev == null || prev.getType() != PythonLexer.NEWLINE)) {
+                generateNewline(t);
             }
-            lastTokenAddedIndex = t.getTokenIndex();
-            tokens.addElement(t);
-            return;
-        }
 
-        CommonToken newline;
-        if (t.getType() == PythonLexer.NEWLINE) {
+            handleDedents(-1, (CommonToken)t);
+            enqueue(t);
+        } else if (t.getType() == PythonLexer.NEWLINE) {
             // save NEWLINE in the queue
             //System.out.println("found newline: "+t+" stack is "+stackString());
-            newline = (CommonToken)t;
             List hiddenTokens = stream.getTokens(lastTokenAddedIndex + 1,t.getTokenIndex() - 1);
             if (hiddenTokens!=null) {
                 tokens.addAll(hiddenTokens);
             }
             lastTokenAddedIndex = t.getTokenIndex();
             tokens.addElement(t);
+            Token newline = t;
 
             // grab first token of next line
             t = stream.LT(1);
@@ -174,63 +185,85 @@
                 tokens.addAll(hiddenTokens);
             }
             lastTokenAddedIndex = t.getTokenIndex();
-        } else {
-            // Imaginary newline before EOF
-            newline = new CommonToken(PythonLexer.NEWLINE, "\n");
-            newline.setLine(t.getLine());
-            newline.setCharPositionInLine(t.getCharPositionInLine());
-            //XXX: this is where lsoto had this...
-            //tokens.addElement(newline);
-        }
 
-        // compute cpos as the char pos of next non-WS token in line
-        int cpos = t.getCharPositionInLine(); // column dictates indent/dedent
-        if (t.getType() == Token.EOF) {
-            cpos = -1; // pretend EOF always happens at left edge
-        }
-        else if (t.getType() == PythonLexer.LEADING_WS) {
-            cpos = t.getText().length();
-        }
+            // compute cpos as the char pos of next non-WS token in line
+            int cpos = t.getCharPositionInLine(); // column dictates indent/dedent
+            if (t.getType() == Token.EOF) {
+                cpos = -1; // pretend EOF always happens at left edge
+            }
+            else if (t.getType() == PythonLexer.LEADING_WS) {
+                Token next = stream.LT(1);
+                if (next != null && next.getType() == Token.EOF) {
+                    stream.consume();
+                    return;
+                } else {
+                    cpos = t.getText().length();
+                }
+            }
 
-        //System.out.println("next token is: "+t);
+            //System.out.println("next token is: "+t);
 
-        // compare to last indent level
-        int lastIndent = peek();
-        //System.out.println("cpos, lastIndent = "+cpos+", "+lastIndent);
-        if (cpos > lastIndent) { // they indented; track and gen INDENT
-            push(cpos);
-            //System.out.println("push("+cpos+"): "+stackString());
-            Token indent = new CommonToken(PythonParser.INDENT,"");
-            indent.setCharPositionInLine(t.getCharPositionInLine());
-            indent.setLine(t.getLine());
-            tokens.addElement(indent);
-        }
-        else if (cpos < lastIndent) { // they dedented
-            // how far back did we dedent?
-            int prevIndex = findPreviousIndent(cpos, t);
-            //System.out.println("dedented; prevIndex of cpos="+cpos+" is "+prevIndex);
-            // generate DEDENTs for each indent level we backed up over
-            for (int d = sp - 1; d >= prevIndex; d--) {
-                CommonToken dedent = new CommonToken(PythonParser.DEDENT,"");
-                dedent.setCharPositionInLine(t.getCharPositionInLine());
-                dedent.setLine(t.getLine());
+            // compare to last indent level
+            int lastIndent = peek();
+            //System.out.println("cpos, lastIndent = "+cpos+", "+lastIndent);
+            if (cpos > lastIndent) { // they indented; track and gen INDENT
+                handleIndents(cpos, (CommonToken)t);
+            }
+            else if (cpos < lastIndent) { // they dedented
+                handleDedents(cpos, (CommonToken)t);
+            }
 
-                //XXX: this will get messed up by comments.
-                dedent.setStartIndex(newline.getStartIndex());
-                dedent.setStopIndex(newline.getStopIndex());
+            if (t.getType() == Token.EOF && inSingle) {
+                String newlines = newline.getText();
+                for(int i=1;i<newlines.length();i++) {
+                    generateNewline(newline);
+                }
+            }
 
-                tokens.addElement(dedent);
+            if (t.getType() != PythonLexer.LEADING_WS) { // discard WS
+                tokens.addElement(t);
             }
-            sp = prevIndex; // pop those off indent level
+
+        } else {
+            enqueue(t);
         }
-        //XXX: make sure lsoto's stuff isn't broken by this...
-        if (t.getType() == PythonLexer.EOF) {
-            tokens.addElement(newline);
+    }
+    
+    private void enqueue(Token t) {
+        List hiddenTokens = stream.getTokens(lastTokenAddedIndex + 1,t.getTokenIndex() - 1);
+        if (hiddenTokens != null) {
+            tokens.addAll(hiddenTokens);
         }
+        lastTokenAddedIndex = t.getTokenIndex();
+        tokens.addElement(t);
+    }
 
-        if (t.getType() != PythonLexer.LEADING_WS) { // discard WS
-            tokens.addElement(t);
+    private void handleIndents(int cpos, CommonToken t) {
+        push(cpos);
+        //System.out.println("push("+cpos+"): "+stackString());
+        Token indent = new CommonToken(PythonParser.INDENT,"");
+        indent.setCharPositionInLine(t.getCharPositionInLine());
+        indent.setLine(t.getLine());
+        tokens.addElement(indent);
+    }
+
+    private void handleDedents(int cpos, CommonToken t) {
+        // how far back did we dedent?
+        int prevIndex = findPreviousIndent(cpos, t);
+        //System.out.println("dedented; prevIndex of cpos="+cpos+" is "+prevIndex);
+        // generate DEDENTs for each indent level we backed up over
+        for (int d = sp - 1; d >= prevIndex; d--) {
+            CommonToken dedent = new CommonToken(PythonParser.DEDENT,"");
+            dedent.setCharPositionInLine(t.getCharPositionInLine());
+            dedent.setLine(t.getLine());
+
+            //XXX: this will get messed up by comments.
+            dedent.setStartIndex(t.getStartIndex());
+            dedent.setStopIndex(t.getStopIndex());
+
+            tokens.addElement(dedent);
         }
+        sp = prevIndex; // pop those off indent level
     }
 
     //  T O K E N  S T A C K  M E T H O D S


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.