[Jython-checkins] SF.net SVN: jython: [4667] branches/asm

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 454-5900

Revision: 4667
          http://jython.svn.sourceforge.net/jython/?rev=4667&view=rev
Author:   fwierzbicki
Date:     2008-06-18 13:21:10 -0700 (Wed, 18 Jun 2008)

Log Message:
-----------
Incremental improvement on interactive mode.

Modified Paths:
--------------
    branches/asm/grammar/PythonPartial.g
    branches/asm/src/org/python/antlr/InteractiveParser.java
    branches/asm/src/org/python/antlr/PythonPartialTokenSource.java
    branches/asm/src/org/python/core/ParserFacade.java

Added Paths:
-----------
    branches/asm/src/org/python/antlr/NoCloseReaderStream.java

Modified: branches/asm/grammar/PythonPartial.g
===================================================================

--- branches/asm/grammar/PythonPartial.g	2008-06-18 20:01:28 UTC (rev 4666)
+++ branches/asm/grammar/PythonPartial.g	2008-06-18 20:21:10 UTC (rev 4667)
@@ -75,6 +75,14 @@
 } 
 
 @members {
+    boolean debugOn = false;
+
+    private void debug(String message) {
+        if (debugOn) {
+            System.out.println(message);
+        }
+    }
+
     protected void mismatch(IntStream input, int ttype, BitSet follow) throws RecognitionException {
         throw new MismatchedTokenException(ttype, input);
     }
@@ -82,12 +90,14 @@
         throw e;
     }
 
+    /*
 	protected Object recoverFromMismatchedToken(IntStream input, int ttype, BitSet follow)
 		throws RecognitionException
 	{
         mismatch(input, ttype, follow);
         return null;
     }
+    */
 
 	public void reportError(RecognitionException e) {
 		System.err.print("[REPORTING] ");
@@ -154,7 +164,7 @@
 }
 
 single_input : NEWLINE
-             | simple_stmt {System.out.println("matched simple_stmt");}
+             | simple_stmt {debug("matched simple_stmt");}
              | compound_stmt NEWLINE?
              ;
 
@@ -212,7 +222,7 @@
            | assert_stmt
            ;
 
-expr_stmt : testlist {System.out.println("matched expr_stmt");}
+expr_stmt : testlist {debug("matched expr_stmt");}
             ( augassign yield_expr
             | augassign testlist
             | assigns
@@ -358,12 +368,12 @@
       | NEWLINE ((INDENT (stmt)+ (DEDENT|ENDMARK))|ENDMARK)
       ;
 
-test: or_test {System.out.println("matched test: or_test");} 
+test: or_test {debug("matched test: or_test");} 
     ( ('if' or_test 'else') => 'if' or_test 'else' test)?
     | lambdef
     ;
 
-or_test : and_test (OR and_test)* {System.out.println("matched or_test");} 
+or_test : and_test (OR and_test)* {debug("matched or_test");} 
         ;
 
 and_test : not_test (AND not_test)*
@@ -389,7 +399,7 @@
         | 'is' NOT
         ;
 
-expr : xor_expr (VBAR xor_expr)* {System.out.println("matched expr");}
+expr : xor_expr (VBAR xor_expr)* {debug("matched expr");}
      ;
 
 xor_expr : and_expr (CIRCUMFLEX and_expr)*
@@ -429,7 +439,7 @@
      | LONGINT
      | FLOAT
      | COMPLEX
-     | (STRING)+ {System.out.println("matched STRING");} 
+     | (STRING)+ {debug("matched STRING");} 
      ;
 
 listmaker : test 
@@ -468,7 +478,7 @@
          ;
 
 testlist
-    : test (options {k=2;}: COMMA test)* (COMMA)? {System.out.println("matched testlist");}
+    : test (options {k=2;}: COMMA test)* (COMMA)? {debug("matched testlist");}
     ;
 
 dictmaker : test COLON test (options {k=2;}:COMMA test COLON test)* (COMMA)?

Modified: branches/asm/src/org/python/antlr/InteractiveParser.java
===================================================================
--- branches/asm/src/org/python/antlr/InteractiveParser.java	2008-06-18 20:01:28 UTC (rev 4666)
+++ branches/asm/src/org/python/antlr/InteractiveParser.java	2008-06-18 20:21:10 UTC (rev 4667)
@@ -1,5 +1,8 @@
 package org.python.antlr;
 
+import java.io.BufferedReader;
+import java.io.IOException;
+
 import org.antlr.runtime.CharStream;
 import org.antlr.runtime.CommonToken;
 import org.antlr.runtime.CommonTokenStream;
@@ -16,7 +19,7 @@
 
 public class InteractiveParser {
 
-    private CharStream charStream;
+    private BufferedReader bufreader;
 
     //Extract superclass from this and the other XParsers.
     public static class PyLexer extends PythonLexer {
@@ -30,34 +33,24 @@
         }
     }
 
-    public InteractiveParser(CharStream cs) {
-        this.charStream = cs;
-    }
+    public static class PPLexer extends PythonPartialLexer {
+        public PPLexer(CharStream lexer) {
+            super(lexer);
+        }
 
-    public modType partialParse() {
-        /*
-        CPython codeop exploits that with CPython parser adding newlines
-        to a partial valid sentence move the reported error position,
-        this is not true for our parser, so we need a different approach:
-        we check whether all sentence tokens have been consumed or
-        the remaining ones fullfill lookahead expectations. See:
-        PythonGrammar.partial_valid_sentence (def in python.jjt)
-
-        FJW: the above comment needs to be changed when the current partial
-        parse strategy gels.
-        */
-        try {
-            return parse();
-        } catch (ParseException e) {
-            //FIXME: This needs plenty of tuning, this just calls all errors
-            //partial matches.
-            return null;
+        public Token nextToken() {
+            startPos = getCharPositionInLine();
+            return super.nextToken();
         }
     }
-            
-    public modType parse() {
+
+    public InteractiveParser(BufferedReader br) {
+        this.bufreader = br;
+    }
+
+    public modType parse() throws IOException {
         modType tree = null;
-        PythonLexer lexer = new PyLexer(this.charStream);
+        PythonLexer lexer = new PyLexer(new NoCloseReaderStream(bufreader));
         CommonTokenStream tokens = new CommonTokenStream(lexer);
         tokens.discardOffChannelTokens(true);
         PythonTokenSource indentedSource = new PythonTokenSource(tokens);
@@ -76,5 +69,5 @@
             //     generated code.
         }
         return tree;
-    } 
+    }
 }

Added: branches/asm/src/org/python/antlr/NoCloseReaderStream.java
===================================================================
--- branches/asm/src/org/python/antlr/NoCloseReaderStream.java	                        (rev 0)
+++ branches/asm/src/org/python/antlr/NoCloseReaderStream.java	2008-06-18 20:21:10 UTC (rev 4667)
@@ -0,0 +1,81 @@
+/*
+ [The "BSD licence"]
+ Copyright (c) 2005-2008 Terence Parr
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ 1. Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+ 2. Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in the
+    documentation and/or other materials provided with the distribution.
+ 3. The name of the author may not be used to endorse or promote products
+    derived from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+package org.python.antlr;
+
+import java.io.*;
+
+import org.antlr.runtime.ANTLRStringStream;
+
+//This is the same as ANTLRReaderStream except it does not close the Reader.
+//Used for interactive mode where we may need to parse twice.
+public class NoCloseReaderStream extends ANTLRStringStream {
+	public static final int READ_BUFFER_SIZE = 1024;
+	public static final int INITIAL_BUFFER_SIZE = 1024;
+
+	public NoCloseReaderStream(Reader r) throws IOException {
+		this(r, INITIAL_BUFFER_SIZE, READ_BUFFER_SIZE);
+	}
+
+	public NoCloseReaderStream(Reader r, int size) throws IOException {
+		this(r, size, READ_BUFFER_SIZE);
+	}
+
+	public NoCloseReaderStream(Reader r, int size, int readChunkSize) throws IOException {
+		load(r, size, readChunkSize);
+	}
+
+	public void load(Reader r, int size, int readChunkSize)
+		throws IOException
+	{
+		if ( r==null ) {
+			return;
+		}
+		if ( size<=0 ) {
+			size = INITIAL_BUFFER_SIZE;
+		}
+		if ( readChunkSize<=0 ) {
+			readChunkSize = READ_BUFFER_SIZE;
+		}
+		data = new char[size];
+		// read all the data in chunks of readChunkSize
+		int numRead=0;
+		int p = 0;
+		do {
+			if ( p+readChunkSize > data.length ) { // overflow?
+				char[] newdata = new char[data.length*2]; // resize
+				System.arraycopy(data, 0, newdata, 0, data.length);
+				data = newdata;
+			}
+			numRead = r.read(data, p, readChunkSize);
+			p += numRead;
+		} while (numRead!=-1); // while not EOF
+		// set the actual size of the data available;
+		// EOF subtracted one above in p+=numRead; add one back
+		super.n = p+1;
+	}
+}

Modified: branches/asm/src/org/python/antlr/PythonPartialTokenSource.java
===================================================================
--- branches/asm/src/org/python/antlr/PythonPartialTokenSource.java	2008-06-18 20:01:28 UTC (rev 4666)
+++ branches/asm/src/org/python/antlr/PythonPartialTokenSource.java	2008-06-18 20:21:10 UTC (rev 4667)
@@ -68,7 +68,7 @@
         if ( tokens.size()>0 ) {
             Token t = (Token)tokens.firstElement();
             tokens.removeElementAt(0);
-            System.out.println(t);
+            //System.out.println(t);
             return t;
         }
 
@@ -115,7 +115,6 @@
         // compute cpos as the char pos of next non-WS token in line
         int cpos = t.getCharPositionInLine(); // column dictates indent/dedent
         if ( t.getType()==Token.EOF ) {
-            System.out.println("EOF!!!");
             atEnd = true;
 
             Token em = new ClassicToken(PythonPartialParser.ENDMARK,"");

Modified: branches/asm/src/org/python/core/ParserFacade.java
===================================================================
--- branches/asm/src/org/python/core/ParserFacade.java	2008-06-18 20:01:28 UTC (rev 4666)
+++ branches/asm/src/org/python/core/ParserFacade.java	2008-06-18 20:21:10 UTC (rev 4667)
@@ -14,16 +14,21 @@
 
 import org.antlr.runtime.ANTLRReaderStream;
 import org.antlr.runtime.CharStream;
+import org.antlr.runtime.CommonTokenStream;
 import org.python.antlr.ExpressionParser;
 import org.python.antlr.InteractiveParser;
 import org.python.antlr.LeadingSpaceSkippingStream;
 import org.python.antlr.ParseException;
 import org.python.antlr.ModuleParser;
+import org.python.antlr.NoCloseReaderStream;
 import org.python.antlr.PythonParser;
 import org.python.antlr.PythonTree;
 import org.python.core.util.StringUtil;
 import org.python.antlr.IParserHost;
 import org.python.antlr.PythonTree;
+import org.python.antlr.PythonPartialLexer;
+import org.python.antlr.PythonPartialParser;
+import org.python.antlr.PythonPartialTokenSource;
 import org.python.antlr.ast.modType;
 
 /**
@@ -70,8 +75,14 @@
                 col = node.getCharPositionInLine();
             }
             String text=getLine(reader, line);
-            return new PySyntaxError(e.getMessage(), line, col,
-                                     text, filename);
+            String msg = e.getMessage();
+            if (msg == null) {
+                msg = "XXX: missing msg";
+            }
+            if (text == null) {
+                text = "XXX: missing text";
+            }
+            return new PySyntaxError(msg, line, col, text, filename);
         }
         else return Py.JavaError(t);
     }
@@ -85,26 +96,23 @@
                                 String kind,
                                 String filename,
                                 CompilerFlags cflags) {
-        CharStream cs = null;
         BufferedInputStream bstream = new BufferedInputStream(stream);
-        //FIXME: definite NPE potential here -- do we even need prepBufreader
-        //       now?
+        //FIMXE: npe?
         BufferedReader bufreader = null;
         modType node = null;
         try {
             if (kind.equals("eval")) {
                 bufreader = prepBufreader(new LeadingSpaceSkippingStream(bstream), cflags);
-                cs = new ANTLRReaderStream(bufreader);
+                CharStream cs = new ANTLRReaderStream(bufreader);
                 ExpressionParser e = new ExpressionParser(cs);
                 node = e.parse();
             } else if (kind.equals("single")) {
                 bufreader = prepBufreader(bstream, cflags);
-                cs = new ANTLRReaderStream(bufreader);
-                InteractiveParser i = new InteractiveParser(cs);
-                node = i.partialParse();
+                InteractiveParser i = new InteractiveParser(bufreader);
+                node = i.parse();
             } else if (kind.equals("exec")) {
                 bufreader = prepBufreader(bstream, cflags);
-                cs = new ANTLRReaderStream(bufreader);
+                CharStream cs = new ANTLRReaderStream(bufreader);
                 ModuleParser g = new ModuleParser(cs);
                 node = g.file_input();
             } else {
@@ -121,43 +129,50 @@
                                        String filename,
                                        CompilerFlags cflags,
                                        boolean stdprompt) {
-        CharStream cs = null;
-        //FIXME: definite NPE potential here -- do we even need prepBufreader
-        //       now?
+        modType node = null;
+        //FIMXE: npe?
         BufferedReader bufreader = null;
-        modType node = null;
-        if (kind.equals("single")) {
-            ByteArrayInputStream bi = new ByteArrayInputStream(
-                    StringUtil.toBytes(string));
-            BufferedInputStream bstream = new BufferedInputStream(bi);
-            try {
+        try {
+            if (kind.equals("single")) {
+                ByteArrayInputStream bi = new ByteArrayInputStream(
+                        StringUtil.toBytes(string));
+                BufferedInputStream bstream = bstream = new BufferedInputStream(bi);
                 bufreader = prepBufreader(bstream, cflags);
-                cs = new ANTLRReaderStream(bufreader);
-            } catch (IOException io) {
-                //FIXME:
+                InteractiveParser i = new InteractiveParser(bufreader);
+                node = i.parse();
+            } else {
+                throw Py.ValueError("parse kind must be eval, exec, " + "or single");
             }
-            InteractiveParser i = new InteractiveParser(cs);
-            node = i.partialParse();
-        } else {
-            throw Py.ValueError("parse kind must be eval, exec, " + "or single");
+        } catch (Throwable t) {
+            PyException p = fixParseError(bufreader, t, filename);
+            if (validPartialSentence(bufreader)) {
+                return null;
+            }
+            throw p;
         }
         return node;
     }
 
-    private static BufferedReader prepBufreader(InputStream istream,
-                                                CompilerFlags cflags) throws IOException {
-        int nbytes;
+    private static boolean validPartialSentence(BufferedReader bufreader) {
         try {
-            nbytes = istream.available();
+            bufreader.reset();
+            CharStream cs = new NoCloseReaderStream(bufreader);
+            PythonPartialLexer lexer = new InteractiveParser.PPLexer(cs);
+            CommonTokenStream tokens = new CommonTokenStream(lexer);
+            tokens.discardOffChannelTokens(true);
+            PythonPartialTokenSource indentedSource = new PythonPartialTokenSource(tokens);
+            tokens = new CommonTokenStream(indentedSource);
+            PythonPartialParser parser = new PythonPartialParser(tokens);
+            parser.single_input();
+        } catch (Exception e) {
+            return false;
         }
-        catch (IOException ioe1) {
-            nbytes = 10000;
-        }
-        if (nbytes <= 0)
-            nbytes = 10000;
-        if (nbytes > 100000)
-            nbytes = 100000;
+        return true;
+    }
 
+
+    private static BufferedReader prepBufreader(InputStream istream,
+                                                CompilerFlags cflags) throws IOException {
         String encoding = readEncoding(istream);
         if(encoding == null && cflags != null && cflags.encoding != null) {
             encoding = cflags.encoding;
@@ -183,7 +198,7 @@
         
         BufferedReader bufreader = new BufferedReader(reader);
         
-        bufreader.mark(nbytes);
+        bufreader.mark(100000);
         return bufreader;
     }
 


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.