From: <fwi...@us...> - 2008-06-18 20:21:13
|
Revision: 4667 http://jython.svn.sourceforge.net/jython/?rev=4667&view=rev Author: fwierzbicki Date: 2008-06-18 13:21:10 -0700 (Wed, 18 Jun 2008) Log Message: ----------- Incremental improvement on interactive mode. Modified Paths: -------------- branches/asm/grammar/PythonPartial.g branches/asm/src/org/python/antlr/InteractiveParser.java branches/asm/src/org/python/antlr/PythonPartialTokenSource.java branches/asm/src/org/python/core/ParserFacade.java Added Paths: ----------- branches/asm/src/org/python/antlr/NoCloseReaderStream.java Modified: branches/asm/grammar/PythonPartial.g =================================================================== --- branches/asm/grammar/PythonPartial.g 2008-06-18 20:01:28 UTC (rev 4666) +++ branches/asm/grammar/PythonPartial.g 2008-06-18 20:21:10 UTC (rev 4667) @@ -75,6 +75,14 @@ } @members { + boolean debugOn = false; + + private void debug(String message) { + if (debugOn) { + System.out.println(message); + } + } + protected void mismatch(IntStream input, int ttype, BitSet follow) throws RecognitionException { throw new MismatchedTokenException(ttype, input); } @@ -82,12 +90,14 @@ throw e; } + /* protected Object recoverFromMismatchedToken(IntStream input, int ttype, BitSet follow) throws RecognitionException { mismatch(input, ttype, follow); return null; } + */ public void reportError(RecognitionException e) { System.err.print("[REPORTING] "); @@ -154,7 +164,7 @@ } single_input : NEWLINE - | simple_stmt {System.out.println("matched simple_stmt");} + | simple_stmt {debug("matched simple_stmt");} | compound_stmt NEWLINE? ; @@ -212,7 +222,7 @@ | assert_stmt ; -expr_stmt : testlist {System.out.println("matched expr_stmt");} +expr_stmt : testlist {debug("matched expr_stmt");} ( augassign yield_expr | augassign testlist | assigns @@ -358,12 +368,12 @@ | NEWLINE ((INDENT (stmt)+ (DEDENT|ENDMARK))|ENDMARK) ; -test: or_test {System.out.println("matched test: or_test");} +test: or_test {debug("matched test: or_test");} ( ('if' or_test 'else') => 'if' or_test 'else' test)? | lambdef ; -or_test : and_test (OR and_test)* {System.out.println("matched or_test");} +or_test : and_test (OR and_test)* {debug("matched or_test");} ; and_test : not_test (AND not_test)* @@ -389,7 +399,7 @@ | 'is' NOT ; -expr : xor_expr (VBAR xor_expr)* {System.out.println("matched expr");} +expr : xor_expr (VBAR xor_expr)* {debug("matched expr");} ; xor_expr : and_expr (CIRCUMFLEX and_expr)* @@ -429,7 +439,7 @@ | LONGINT | FLOAT | COMPLEX - | (STRING)+ {System.out.println("matched STRING");} + | (STRING)+ {debug("matched STRING");} ; listmaker : test @@ -468,7 +478,7 @@ ; testlist - : test (options {k=2;}: COMMA test)* (COMMA)? {System.out.println("matched testlist");} + : test (options {k=2;}: COMMA test)* (COMMA)? {debug("matched testlist");} ; dictmaker : test COLON test (options {k=2;}:COMMA test COLON test)* (COMMA)? Modified: branches/asm/src/org/python/antlr/InteractiveParser.java =================================================================== --- branches/asm/src/org/python/antlr/InteractiveParser.java 2008-06-18 20:01:28 UTC (rev 4666) +++ branches/asm/src/org/python/antlr/InteractiveParser.java 2008-06-18 20:21:10 UTC (rev 4667) @@ -1,5 +1,8 @@ package org.python.antlr; +import java.io.BufferedReader; +import java.io.IOException; + import org.antlr.runtime.CharStream; import org.antlr.runtime.CommonToken; import org.antlr.runtime.CommonTokenStream; @@ -16,7 +19,7 @@ public class InteractiveParser { - private CharStream charStream; + private BufferedReader bufreader; //Extract superclass from this and the other XParsers. public static class PyLexer extends PythonLexer { @@ -30,34 +33,24 @@ } } - public InteractiveParser(CharStream cs) { - this.charStream = cs; - } + public static class PPLexer extends PythonPartialLexer { + public PPLexer(CharStream lexer) { + super(lexer); + } - public modType partialParse() { - /* - CPython codeop exploits that with CPython parser adding newlines - to a partial valid sentence move the reported error position, - this is not true for our parser, so we need a different approach: - we check whether all sentence tokens have been consumed or - the remaining ones fullfill lookahead expectations. See: - PythonGrammar.partial_valid_sentence (def in python.jjt) - - FJW: the above comment needs to be changed when the current partial - parse strategy gels. - */ - try { - return parse(); - } catch (ParseException e) { - //FIXME: This needs plenty of tuning, this just calls all errors - //partial matches. - return null; + public Token nextToken() { + startPos = getCharPositionInLine(); + return super.nextToken(); } } - - public modType parse() { + + public InteractiveParser(BufferedReader br) { + this.bufreader = br; + } + + public modType parse() throws IOException { modType tree = null; - PythonLexer lexer = new PyLexer(this.charStream); + PythonLexer lexer = new PyLexer(new NoCloseReaderStream(bufreader)); CommonTokenStream tokens = new CommonTokenStream(lexer); tokens.discardOffChannelTokens(true); PythonTokenSource indentedSource = new PythonTokenSource(tokens); @@ -76,5 +69,5 @@ // generated code. } return tree; - } + } } Added: branches/asm/src/org/python/antlr/NoCloseReaderStream.java =================================================================== --- branches/asm/src/org/python/antlr/NoCloseReaderStream.java (rev 0) +++ branches/asm/src/org/python/antlr/NoCloseReaderStream.java 2008-06-18 20:21:10 UTC (rev 4667) @@ -0,0 +1,81 @@ +/* + [The "BSD licence"] + Copyright (c) 2005-2008 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ +package org.python.antlr; + +import java.io.*; + +import org.antlr.runtime.ANTLRStringStream; + +//This is the same as ANTLRReaderStream except it does not close the Reader. +//Used for interactive mode where we may need to parse twice. +public class NoCloseReaderStream extends ANTLRStringStream { + public static final int READ_BUFFER_SIZE = 1024; + public static final int INITIAL_BUFFER_SIZE = 1024; + + public NoCloseReaderStream(Reader r) throws IOException { + this(r, INITIAL_BUFFER_SIZE, READ_BUFFER_SIZE); + } + + public NoCloseReaderStream(Reader r, int size) throws IOException { + this(r, size, READ_BUFFER_SIZE); + } + + public NoCloseReaderStream(Reader r, int size, int readChunkSize) throws IOException { + load(r, size, readChunkSize); + } + + public void load(Reader r, int size, int readChunkSize) + throws IOException + { + if ( r==null ) { + return; + } + if ( size<=0 ) { + size = INITIAL_BUFFER_SIZE; + } + if ( readChunkSize<=0 ) { + readChunkSize = READ_BUFFER_SIZE; + } + data = new char[size]; + // read all the data in chunks of readChunkSize + int numRead=0; + int p = 0; + do { + if ( p+readChunkSize > data.length ) { // overflow? + char[] newdata = new char[data.length*2]; // resize + System.arraycopy(data, 0, newdata, 0, data.length); + data = newdata; + } + numRead = r.read(data, p, readChunkSize); + p += numRead; + } while (numRead!=-1); // while not EOF + // set the actual size of the data available; + // EOF subtracted one above in p+=numRead; add one back + super.n = p+1; + } +} Modified: branches/asm/src/org/python/antlr/PythonPartialTokenSource.java =================================================================== --- branches/asm/src/org/python/antlr/PythonPartialTokenSource.java 2008-06-18 20:01:28 UTC (rev 4666) +++ branches/asm/src/org/python/antlr/PythonPartialTokenSource.java 2008-06-18 20:21:10 UTC (rev 4667) @@ -68,7 +68,7 @@ if ( tokens.size()>0 ) { Token t = (Token)tokens.firstElement(); tokens.removeElementAt(0); - System.out.println(t); + //System.out.println(t); return t; } @@ -115,7 +115,6 @@ // compute cpos as the char pos of next non-WS token in line int cpos = t.getCharPositionInLine(); // column dictates indent/dedent if ( t.getType()==Token.EOF ) { - System.out.println("EOF!!!"); atEnd = true; Token em = new ClassicToken(PythonPartialParser.ENDMARK,""); Modified: branches/asm/src/org/python/core/ParserFacade.java =================================================================== --- branches/asm/src/org/python/core/ParserFacade.java 2008-06-18 20:01:28 UTC (rev 4666) +++ branches/asm/src/org/python/core/ParserFacade.java 2008-06-18 20:21:10 UTC (rev 4667) @@ -14,16 +14,21 @@ import org.antlr.runtime.ANTLRReaderStream; import org.antlr.runtime.CharStream; +import org.antlr.runtime.CommonTokenStream; import org.python.antlr.ExpressionParser; import org.python.antlr.InteractiveParser; import org.python.antlr.LeadingSpaceSkippingStream; import org.python.antlr.ParseException; import org.python.antlr.ModuleParser; +import org.python.antlr.NoCloseReaderStream; import org.python.antlr.PythonParser; import org.python.antlr.PythonTree; import org.python.core.util.StringUtil; import org.python.antlr.IParserHost; import org.python.antlr.PythonTree; +import org.python.antlr.PythonPartialLexer; +import org.python.antlr.PythonPartialParser; +import org.python.antlr.PythonPartialTokenSource; import org.python.antlr.ast.modType; /** @@ -70,8 +75,14 @@ col = node.getCharPositionInLine(); } String text=getLine(reader, line); - return new PySyntaxError(e.getMessage(), line, col, - text, filename); + String msg = e.getMessage(); + if (msg == null) { + msg = "XXX: missing msg"; + } + if (text == null) { + text = "XXX: missing text"; + } + return new PySyntaxError(msg, line, col, text, filename); } else return Py.JavaError(t); } @@ -85,26 +96,23 @@ String kind, String filename, CompilerFlags cflags) { - CharStream cs = null; BufferedInputStream bstream = new BufferedInputStream(stream); - //FIXME: definite NPE potential here -- do we even need prepBufreader - // now? + //FIMXE: npe? BufferedReader bufreader = null; modType node = null; try { if (kind.equals("eval")) { bufreader = prepBufreader(new LeadingSpaceSkippingStream(bstream), cflags); - cs = new ANTLRReaderStream(bufreader); + CharStream cs = new ANTLRReaderStream(bufreader); ExpressionParser e = new ExpressionParser(cs); node = e.parse(); } else if (kind.equals("single")) { bufreader = prepBufreader(bstream, cflags); - cs = new ANTLRReaderStream(bufreader); - InteractiveParser i = new InteractiveParser(cs); - node = i.partialParse(); + InteractiveParser i = new InteractiveParser(bufreader); + node = i.parse(); } else if (kind.equals("exec")) { bufreader = prepBufreader(bstream, cflags); - cs = new ANTLRReaderStream(bufreader); + CharStream cs = new ANTLRReaderStream(bufreader); ModuleParser g = new ModuleParser(cs); node = g.file_input(); } else { @@ -121,43 +129,50 @@ String filename, CompilerFlags cflags, boolean stdprompt) { - CharStream cs = null; - //FIXME: definite NPE potential here -- do we even need prepBufreader - // now? + modType node = null; + //FIMXE: npe? BufferedReader bufreader = null; - modType node = null; - if (kind.equals("single")) { - ByteArrayInputStream bi = new ByteArrayInputStream( - StringUtil.toBytes(string)); - BufferedInputStream bstream = new BufferedInputStream(bi); - try { + try { + if (kind.equals("single")) { + ByteArrayInputStream bi = new ByteArrayInputStream( + StringUtil.toBytes(string)); + BufferedInputStream bstream = bstream = new BufferedInputStream(bi); bufreader = prepBufreader(bstream, cflags); - cs = new ANTLRReaderStream(bufreader); - } catch (IOException io) { - //FIXME: + InteractiveParser i = new InteractiveParser(bufreader); + node = i.parse(); + } else { + throw Py.ValueError("parse kind must be eval, exec, " + "or single"); } - InteractiveParser i = new InteractiveParser(cs); - node = i.partialParse(); - } else { - throw Py.ValueError("parse kind must be eval, exec, " + "or single"); + } catch (Throwable t) { + PyException p = fixParseError(bufreader, t, filename); + if (validPartialSentence(bufreader)) { + return null; + } + throw p; } return node; } - private static BufferedReader prepBufreader(InputStream istream, - CompilerFlags cflags) throws IOException { - int nbytes; + private static boolean validPartialSentence(BufferedReader bufreader) { try { - nbytes = istream.available(); + bufreader.reset(); + CharStream cs = new NoCloseReaderStream(bufreader); + PythonPartialLexer lexer = new InteractiveParser.PPLexer(cs); + CommonTokenStream tokens = new CommonTokenStream(lexer); + tokens.discardOffChannelTokens(true); + PythonPartialTokenSource indentedSource = new PythonPartialTokenSource(tokens); + tokens = new CommonTokenStream(indentedSource); + PythonPartialParser parser = new PythonPartialParser(tokens); + parser.single_input(); + } catch (Exception e) { + return false; } - catch (IOException ioe1) { - nbytes = 10000; - } - if (nbytes <= 0) - nbytes = 10000; - if (nbytes > 100000) - nbytes = 100000; + return true; + } + + private static BufferedReader prepBufreader(InputStream istream, + CompilerFlags cflags) throws IOException { String encoding = readEncoding(istream); if(encoding == null && cflags != null && cflags.encoding != null) { encoding = cflags.encoding; @@ -183,7 +198,7 @@ BufferedReader bufreader = new BufferedReader(reader); - bufreader.mark(nbytes); + bufreader.mark(100000); return bufreader; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |