[Jython-checkins] SF.net SVN: jython:[7268] trunk/sandbox/wierzbicki/antlr

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Revision: 7268
          http://jython.svn.sourceforge.net/jython/?rev=7268&view=rev
Author:   fwierzbicki
Date:     2011-03-26 16:21:37 +0000 (Sat, 26 Mar 2011)

Log Message:
-----------
Update to latest antlr for sandbox testing + other updates.

Modified Paths:
--------------
    trunk/sandbox/wierzbicki/antlr/build.xml
    trunk/sandbox/wierzbicki/antlr/grammar/Python.g
    trunk/sandbox/wierzbicki/antlr/run
    trunk/sandbox/wierzbicki/antlr/src/Main.java
    trunk/sandbox/wierzbicki/antlr/src/PythonTokenSource.java

Removed Paths:
-------------
    trunk/sandbox/wierzbicki/antlr/lib/antlr-2.7.7.jar
    trunk/sandbox/wierzbicki/antlr/lib/antlr-3.1.jar
    trunk/sandbox/wierzbicki/antlr/lib/antlr-runtime-3.1.jar
    trunk/sandbox/wierzbicki/antlr/lib/stringtemplate-3.2.jar

Modified: trunk/sandbox/wierzbicki/antlr/build.xml
===================================================================

--- trunk/sandbox/wierzbicki/antlr/build.xml	2011-03-25 15:25:07 UTC (rev 7267)
+++ trunk/sandbox/wierzbicki/antlr/build.xml	2011-03-26 16:21:37 UTC (rev 7268)
@@ -8,22 +8,22 @@
     </path>
 
     <target name="init">
-        <mkdir dir="build/gensrc/org/python/antlr/ast"/>
+        <mkdir dir="build/gensrc"/>
     </target>
 
     <target name="antlr_gen" depends="init">
         <java classname="org.antlr.Tool">
             <arg value="-fo"/>
-            <arg path="build/gensrc/org/python/antlr"/>
+            <arg path="build/gensrc"/>
             <arg value="-lib"/>
-            <arg path="build/gensrc/org/python/antlr"/>
+            <arg path="build/gensrc"/>
             <arg file="grammar/Python.g"/>
             <classpath refid="main.classpath"/>
         </java>
         
         <!-- copy the .tokens to /grammar, for usage in ANTLRWorks -->
         <copy todir="grammar" preservelastmodified="true">
-            <fileset dir="build/gensrc/org/python/antlr">
+            <fileset dir="build/gensrc">
                 <include name="Python.tokens" />
             </fileset>
         </copy>

Modified: trunk/sandbox/wierzbicki/antlr/grammar/Python.g
===================================================================
--- trunk/sandbox/wierzbicki/antlr/grammar/Python.g	2011-03-25 15:25:07 UTC (rev 7267)
+++ trunk/sandbox/wierzbicki/antlr/grammar/Python.g	2011-03-26 16:21:37 UTC (rev 7268)
@@ -69,6 +69,7 @@
 tokens {
     INDENT;
     DEDENT;
+    TRAILBACKSLASH; //For dangling backslashes when partial parsing.
 }
 
 @lexer::members {

Deleted: trunk/sandbox/wierzbicki/antlr/lib/antlr-2.7.7.jar
===================================================================
(Binary files differ)

Deleted: trunk/sandbox/wierzbicki/antlr/lib/antlr-3.1.jar
===================================================================
(Binary files differ)

Deleted: trunk/sandbox/wierzbicki/antlr/lib/antlr-runtime-3.1.jar
===================================================================
(Binary files differ)

Deleted: trunk/sandbox/wierzbicki/antlr/lib/stringtemplate-3.2.jar
===================================================================
(Binary files differ)

Modified: trunk/sandbox/wierzbicki/antlr/run
===================================================================
--- trunk/sandbox/wierzbicki/antlr/run	2011-03-25 15:25:07 UTC (rev 7267)
+++ trunk/sandbox/wierzbicki/antlr/run	2011-03-26 16:21:37 UTC (rev 7268)
@@ -1 +1 @@
-java -classpath lib/antlr-3.1.jar:build Main /home/frank/svn/python/release26-maint
+java -classpath lib/antlr-runtime-3.3.jar:build Main /home/frank/svn/python/release26-maint

Modified: trunk/sandbox/wierzbicki/antlr/src/Main.java
===================================================================
--- trunk/sandbox/wierzbicki/antlr/src/Main.java	2011-03-25 15:25:07 UTC (rev 7267)
+++ trunk/sandbox/wierzbicki/antlr/src/Main.java	2011-03-26 16:21:37 UTC (rev 7268)
@@ -42,7 +42,7 @@
         CharStream input = new ANTLRFileStream(path);
         PythonLexer lexer = new MyLexer(input);
         CommonTokenStream tokens = new CommonTokenStream(lexer);
-        tokens.discardOffChannelTokens(true);
+        //tokens.discardOffChannelTokens(true);
         PythonTokenSource indentedSource = new PythonTokenSource(tokens, "<test>");
         tokens = new CommonTokenStream(indentedSource);
         PythonParser parser = new PythonParser(tokens);

Modified: trunk/sandbox/wierzbicki/antlr/src/PythonTokenSource.java
===================================================================
--- trunk/sandbox/wierzbicki/antlr/src/PythonTokenSource.java	2011-03-25 15:25:07 UTC (rev 7267)
+++ trunk/sandbox/wierzbicki/antlr/src/PythonTokenSource.java	2011-03-26 16:21:37 UTC (rev 7268)
@@ -79,7 +79,7 @@
     int sp=-1; // grow upwards
 
     /** The queue of tokens */
-    Vector tokens = new Vector();
+    Vector<Token> tokens = new Vector<Token>();
 
     /** We pull real tokens from this lexer */
     CommonTokenStream stream;
@@ -130,7 +130,7 @@
     public Token nextToken() {
         // if something in queue, just remove and return it
         if (tokens.size() > 0) {
-            Token t = (Token)tokens.firstElement();
+            Token t = tokens.firstElement();
             tokens.removeElementAt(0);
             //System.out.println(filename + t);
             return t;
@@ -142,25 +142,43 @@
     }
 
     private void generateNewline(Token t) {
+        //System.out.println("generating newline from token: " + t);
         CommonToken newline = new CommonToken(PythonLexer.NEWLINE, "\n");
         newline.setLine(t.getLine());
         newline.setCharPositionInLine(t.getCharPositionInLine());
         tokens.addElement(newline);
     }
 
+    private void handleEOF(CommonToken eof, CommonToken prev) {
+        //System.out.println("processing eof with token: " + prev);
+        if (prev != null) {
+            eof.setStartIndex(prev.getStopIndex());
+            eof.setStopIndex(prev.getStopIndex());
+            eof.setLine(prev.getLine());
+            eof.setCharPositionInLine(prev.getCharPositionInLine());
+        }
+    }
+
     protected void insertImaginaryIndentDedentTokens() {
         Token t = stream.LT(1);
         stream.consume();
 
         if (t.getType() == Token.EOF) {
+            Token prev = stream.LT(-1);
+            handleEOF((CommonToken)t, (CommonToken)prev);
             if (!inSingle) {
-                Token prev = stream.LT(-1);
-                if (prev == null || prev.getType() != PythonLexer.NEWLINE) {
+                if (prev == null) {
                     generateNewline(t);
+                } else if (prev.getType() == PythonLexer.LEADING_WS) {
+                    handleDedents(-1, (CommonToken)t);
+                    generateNewline(t);
+                } else if (prev.getType() != PythonLexer.NEWLINE) {
+                    generateNewline(t);
+                    handleDedents(-1, (CommonToken)t);
                 }
+            } else {
+                handleDedents(-1, (CommonToken)t);
             }
-
-            handleDedents(-1, (CommonToken)t);
             enqueue(t);
         } else if (t.getType() == PythonLexer.NEWLINE) {
             // save NEWLINE in the queue
@@ -173,11 +191,12 @@
             t = stream.LT(1);
             stream.consume();
 
-            enqueueHiddens(t);
+            List<Token> commentedNewlines = enqueueHiddens(t);
 
             // compute cpos as the char pos of next non-WS token in line
             int cpos = t.getCharPositionInLine(); // column dictates indent/dedent
             if (t.getType() == Token.EOF) {
+                handleEOF((CommonToken)t, (CommonToken)newline);
                 cpos = -1; // pretend EOF always happens at left edge
             }
             else if (t.getType() == PythonLexer.LEADING_WS) {
@@ -207,6 +226,9 @@
                 for(int i=1;i<newlines.length();i++) {
                     generateNewline(newline);
                 }
+                for (Token c : commentedNewlines) {
+                    generateNewline(c);
+                }
             }
 
             if (t.getType() != PythonLexer.LEADING_WS) { // discard WS
@@ -223,7 +245,8 @@
         tokens.addElement(t);
     }
 
-    private void enqueueHiddens(Token t) {
+    private List<Token> enqueueHiddens(Token t) {
+        List<Token> newlines = new ArrayList<Token>();
         if (inSingle && t.getType() == Token.EOF) {
             if (stream.size() > lastTokenAddedIndex + 1) {
                 Token hidden = stream.get(lastTokenAddedIndex + 1);
@@ -231,25 +254,28 @@
                     String text = hidden.getText();
                     int i = text.indexOf("\n");
                     while(i != -1) {
-                        generateNewline(hidden);
+                        newlines.add(hidden);
                         i = text.indexOf("\n", i + 1);
                     }
                 }
             }
         }
-        List hiddenTokens = stream.getTokens(lastTokenAddedIndex + 1,t.getTokenIndex() - 1);
+        List<Token> hiddenTokens = stream.getTokens(lastTokenAddedIndex + 1,t.getTokenIndex() - 1);
         if (hiddenTokens != null) {
             tokens.addAll(hiddenTokens);
         }
         lastTokenAddedIndex = t.getTokenIndex();
+        return newlines;
     }
 
     private void handleIndents(int cpos, CommonToken t) {
         push(cpos);
         //System.out.println("push("+cpos+"): "+stackString());
-        Token indent = new CommonToken(PythonParser.INDENT,"");
+        CommonToken indent = new CommonToken(PythonParser.INDENT,"");
         indent.setCharPositionInLine(t.getCharPositionInLine());
         indent.setLine(t.getLine());
+        indent.setStartIndex(t.getStartIndex() - 1);
+        indent.setStopIndex(t.getStartIndex() - 1);
         tokens.addElement(indent);
     }
 
@@ -263,9 +289,8 @@
             dedent.setCharPositionInLine(t.getCharPositionInLine());
             dedent.setLine(t.getLine());
 
-            //XXX: this will get messed up by comments.
-            dedent.setStartIndex(t.getStartIndex());
-            dedent.setStopIndex(t.getStopIndex());
+            dedent.setStartIndex(t.getStartIndex() - 1);
+            dedent.setStopIndex(t.getStartIndex() - 1);
 
             tokens.addElement(dedent);
         }
@@ -306,11 +331,8 @@
         if (i == -1 || i == -2) {
             return FIRST_CHAR_POSITION;
         }
-        /* ParseException p = new ParseException("unindent does not match any outer indentation level", t.getLine(), t.getCharPositionInLine());
-        p.setType(Py.IndentationError);
+        RuntimeException p = new RuntimeException("unindent does not match any outer indentation level");//, t.getLine(), t.getCharPositionInLine());
         throw p;
-        */
-        throw new RuntimeException("unindent does not match any outer indentation level");
     }
 
     public String stackString() {
@@ -322,7 +344,6 @@
         return buf.toString();
     }
 
-    //FIXME: needed this for the Antlr 3.1b interface change.
     public String getSourceName() {
         return filename;
     }


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.