From: <fwi...@us...> - 2008-07-31 16:15:04
|
Revision: 5028 http://jython.svn.sourceforge.net/jython/?rev=5028&view=rev Author: fwierzbicki Date: 2008-07-31 16:14:55 +0000 (Thu, 31 Jul 2008) Log Message: ----------- Adding a Jython specific version of CommonErrorNode to use when there are errors in the AST but we want to continue parsing (for now this is only of use by external tools, but I'm considering making it an option since the information can be useful. Thanks to Allan Davis for the original version of CommonErrorNode. I've refactored his version so that PythonErrorNode does not repeat much of CommonErrorNode and put the creation logic into PythonTreeAdaptor. I have also added an interface ErrorHandler that allows the error handling in antlr to be customized. FailFastHandler implements the default behavior of dying on the first parse error. I also made the parse error output more useful, and factored a BaseParser out of the org/python/antlr/*Parser classes. Modified Paths: -------------- branches/asm/grammar/Python.g branches/asm/grammar/PythonPartial.g branches/asm/grammar/PythonWalker.g branches/asm/src/org/python/antlr/ExpressionParser.java branches/asm/src/org/python/antlr/InteractiveParser.java branches/asm/src/org/python/antlr/ModuleParser.java branches/asm/src/org/python/antlr/ParseException.java branches/asm/src/org/python/antlr/PythonTree.java branches/asm/src/org/python/antlr/PythonTreeAdaptor.java Added Paths: ----------- branches/asm/src/org/python/antlr/BaseParser.java branches/asm/src/org/python/antlr/ErrorHandler.java branches/asm/src/org/python/antlr/FailFastHandler.java branches/asm/src/org/python/antlr/PythonErrorNode.java Modified: branches/asm/grammar/Python.g =================================================================== --- branches/asm/grammar/Python.g 2008-07-31 16:03:59 UTC (rev 5027) +++ branches/asm/grammar/Python.g 2008-07-31 16:14:55 UTC (rev 5028) @@ -182,16 +182,19 @@ } @members { - //If you want to use antlr's default error recovery mechanisms change this - //and the same one in the lexer to true. - public boolean antlrErrorHandling = false; - //XXX: only used for single_input -- seems kludgy. public boolean inSingle = false; - private boolean seenSingleOuterSuite = false; boolean debugOn = false; + private ErrorHandler errorHandler; + + private boolean seenSingleOuterSuite = false; + + public void setErrorHandler(ErrorHandler eh) { + this.errorHandler = eh; + } + private void debug(String message) { if (debugOn) { System.out.println(message); @@ -398,7 +401,7 @@ protected void mismatch(IntStream input, int ttype, BitSet follow) throws RecognitionException { - if (antlrErrorHandling) { + if (errorHandler.isRecoverable()) { super.mismatch(input, ttype, follow); } else { throw new MismatchedTokenException(ttype, input); @@ -408,7 +411,7 @@ protected Object recoverFromMismatchedToken(IntStream input, int ttype, BitSet follow) throws RecognitionException { - if (antlrErrorHandling) { + if (errorHandler.isRecoverable()) { return super.recoverFromMismatchedToken(input, ttype, follow); } mismatch(input, ttype, follow); @@ -419,13 +422,9 @@ @rulecatch { catch (RecognitionException re) { - if (antlrErrorHandling) { - reportError(re); - recover(input,re); - retval.tree = (PythonTree)adaptor.errorNode(input, retval.start, input.LT(-1), re); - } else { - throw new ParseException(re); - } + errorHandler.reportError(this, re); + errorHandler.recover(this, input,re); + retval.tree = (PythonTree)adaptor.errorNode(input, retval.start, input.LT(-1), re); } } @@ -440,44 +439,55 @@ * 4] */ -//If you want to use antlr's default error recovery mechanisms change this -//and the same one in the parser to true. -public boolean antlrErrorHandling = false; +//If you want to use another error recovery mechanisms change this +//and the same one in the parser. +private ErrorHandler errorHandler; //XXX: Hopefully we can remove inSingle when we get PyCF_DONT_IMPLY_DEDENT support. public boolean inSingle = false; int implicitLineJoiningLevel = 0; int startPos=-1; - public Token nextToken() { - if (antlrErrorHandling) { - return super.nextToken(); - } - while (true) { - state.token = null; - state.channel = Token.DEFAULT_CHANNEL; - state.tokenStartCharIndex = input.index(); - state.tokenStartCharPositionInLine = input.getCharPositionInLine(); - state.tokenStartLine = input.getLine(); - state.text = null; - if ( input.LA(1)==CharStream.EOF ) { - return Token.EOF_TOKEN; - } - try { - mTokens(); - if ( state.token==null ) { - emit(); - } - else if ( state.token==Token.SKIP_TOKEN ) { - continue; - } - return state.token; - } - catch (RecognitionException re) { - throw new ParseException(re); - } - } + public void setErrorHandler(ErrorHandler eh) { + this.errorHandler = eh; } + + /** + * Taken directly from antlr's Lexer.java -- needs to be re-integrated every time + * we upgrade from Antlr (need to consider a Lexer subclass, though the issue would + * remain). + */ + public Token nextToken() { + while (true) { + state.token = null; + state.channel = Token.DEFAULT_CHANNEL; + state.tokenStartCharIndex = input.index(); + state.tokenStartCharPositionInLine = input.getCharPositionInLine(); + state.tokenStartLine = input.getLine(); + state.text = null; + if ( input.LA(1)==CharStream.EOF ) { + return Token.EOF_TOKEN; + } + try { + mTokens(); + if ( state.token==null ) { + emit(); + } + else if ( state.token==Token.SKIP_TOKEN ) { + continue; + } + return state.token; + } + catch (NoViableAltException nva) { + errorHandler.reportError(this, nva); + errorHandler.recover(this, nva); // throw out current char and try again + } + catch (RecognitionException re) { + errorHandler.reportError(this, re); + // match() routine has already called recover() + } + } + } } //single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE Modified: branches/asm/grammar/PythonPartial.g =================================================================== --- branches/asm/grammar/PythonPartial.g 2008-07-31 16:03:59 UTC (rev 5027) +++ branches/asm/grammar/PythonPartial.g 2008-07-31 16:14:55 UTC (rev 5028) @@ -165,7 +165,7 @@ return state.token; } catch (RecognitionException re) { - throw new ParseException(re); + throw new ParseException("failed partial", re); } } } Modified: branches/asm/grammar/PythonWalker.g =================================================================== --- branches/asm/grammar/PythonWalker.g 2008-07-31 16:03:59 UTC (rev 5027) +++ branches/asm/grammar/PythonWalker.g 2008-07-31 16:14:55 UTC (rev 5028) @@ -90,7 +90,13 @@ } @members { boolean debugOn = false; + private ErrorHandler errorHandler; + public void setErrorHandler(ErrorHandler eh) { + this.errorHandler = eh; + } + + public void debug(String message) { if (debugOn) { System.out.println(message); @@ -283,8 +289,9 @@ } @rulecatch { -catch (RecognitionException r) { - throw new ParseException(r); +catch (RecognitionException re) { + errorHandler.reportError(this, re); + errorHandler.recover(this, input,re); } } Added: branches/asm/src/org/python/antlr/BaseParser.java =================================================================== --- branches/asm/src/org/python/antlr/BaseParser.java (rev 0) +++ branches/asm/src/org/python/antlr/BaseParser.java 2008-07-31 16:14:55 UTC (rev 5028) @@ -0,0 +1,36 @@ +package org.python.antlr; + +import org.antlr.runtime.CharStream; +import org.antlr.runtime.CommonToken; +import org.antlr.runtime.CommonTokenStream; +import org.antlr.runtime.RecognitionException; +import org.antlr.runtime.Token; +import org.antlr.runtime.tree.CommonTree; +import org.antlr.runtime.tree.CommonTreeAdaptor; +import org.antlr.runtime.tree.CommonTreeNodeStream; +import org.antlr.runtime.tree.Tree; +import org.antlr.runtime.tree.TreeAdaptor; +import org.python.antlr.ast.modType; +import org.python.antlr.ast.Module; +import org.python.antlr.ast.stmtType; + +public class BaseParser { + public static class PyLexer extends PythonLexer { + public PyLexer(CharStream lexer) { + super(lexer); + } + + public Token nextToken() { + startPos = getCharPositionInLine(); + return super.nextToken(); + } + } + + protected CharStream charStream; + protected boolean partial; + protected ErrorHandler errorHandler = new FailFastHandler(); + + public void setAntlrErrorHandler(ErrorHandler eh) { + this.errorHandler = eh; + } +} Added: branches/asm/src/org/python/antlr/ErrorHandler.java =================================================================== --- branches/asm/src/org/python/antlr/ErrorHandler.java (rev 0) +++ branches/asm/src/org/python/antlr/ErrorHandler.java 2008-07-31 16:14:55 UTC (rev 5028) @@ -0,0 +1,12 @@ +package org.python.antlr; + +import org.antlr.runtime.BaseRecognizer; +import org.antlr.runtime.IntStream; +import org.antlr.runtime.RecognitionException; + +interface ErrorHandler { + void reportError(BaseRecognizer br, RecognitionException re); + void recover(BaseRecognizer br, RecognitionException re); + void recover(BaseRecognizer br, IntStream input, RecognitionException re); + boolean isRecoverable(); +} Modified: branches/asm/src/org/python/antlr/ExpressionParser.java =================================================================== --- branches/asm/src/org/python/antlr/ExpressionParser.java 2008-07-31 16:03:59 UTC (rev 5027) +++ branches/asm/src/org/python/antlr/ExpressionParser.java 2008-07-31 16:14:55 UTC (rev 5028) @@ -14,22 +14,8 @@ import org.python.antlr.ast.Module; import org.python.antlr.ast.stmtType; -public class ExpressionParser { +public class ExpressionParser extends BaseParser { - private CharStream charStream; - - //Extract superclass from this and the other XParsers. - public static class PyLexer extends PythonLexer { - public PyLexer(CharStream lexer) { - super(lexer); - } - - public Token nextToken() { - startPos = getCharPositionInLine(); - return super.nextToken(); - } - } - public ExpressionParser(CharStream cs) { this.charStream = cs; } @@ -37,11 +23,13 @@ public modType parse() { modType tree = null; PythonLexer lexer = new PyLexer(this.charStream); + lexer.setErrorHandler(errorHandler); CommonTokenStream tokens = new CommonTokenStream(lexer); tokens.discardOffChannelTokens(true); PythonTokenSource indentedSource = new PythonTokenSource(tokens); tokens = new CommonTokenStream(indentedSource); PythonParser parser = new PythonParser(tokens); + parser.setErrorHandler(errorHandler); parser.setTreeAdaptor(new PythonTreeAdaptor()); try { @@ -49,6 +37,7 @@ CommonTreeNodeStream nodes = new CommonTreeNodeStream((Tree)r.tree); nodes.setTokenStream(tokens); PythonWalker walker = new PythonWalker(nodes); + walker.setErrorHandler(errorHandler); tree = walker.expression(); } catch (RecognitionException e) { //XXX: this can't happen. Need to strip the throws from antlr Added: branches/asm/src/org/python/antlr/FailFastHandler.java =================================================================== --- branches/asm/src/org/python/antlr/FailFastHandler.java (rev 0) +++ branches/asm/src/org/python/antlr/FailFastHandler.java 2008-07-31 16:14:55 UTC (rev 5028) @@ -0,0 +1,31 @@ +package org.python.antlr; + +import org.antlr.runtime.BaseRecognizer; +import org.antlr.runtime.IntStream; +import org.antlr.runtime.RecognitionException; + +public class FailFastHandler implements ErrorHandler { + private BaseRecognizer recognizer; + + public void reportError(BaseRecognizer br, RecognitionException re) { + throw new ParseException(message(br,re), re); + } + + public void recover(BaseRecognizer br, RecognitionException re) { + throw new ParseException(message(br,re), re); + } + + public void recover(BaseRecognizer br, IntStream input, RecognitionException re) { + throw new ParseException(message(br,re), re); + } + + public boolean isRecoverable() { + return false; + } + + private String message(BaseRecognizer br, RecognitionException re) { + String hdr = br.getErrorHeader(re); + String msg = br.getErrorMessage(re, br.getTokenNames()); + return hdr+" "+msg; + } +} Modified: branches/asm/src/org/python/antlr/InteractiveParser.java =================================================================== --- branches/asm/src/org/python/antlr/InteractiveParser.java 2008-07-31 16:03:59 UTC (rev 5027) +++ branches/asm/src/org/python/antlr/InteractiveParser.java 2008-07-31 16:14:55 UTC (rev 5028) @@ -17,22 +17,10 @@ import org.python.antlr.ast.Module; import org.python.antlr.ast.stmtType; -public class InteractiveParser { +public class InteractiveParser extends BaseParser { private BufferedReader bufreader; - //Extract superclass from this and the other XParsers. - public static class PyLexer extends PythonLexer { - public PyLexer(CharStream lexer) { - super(lexer); - } - - public Token nextToken() { - startPos = getCharPositionInLine(); - return super.nextToken(); - } - } - public static class PPLexer extends PythonPartialLexer { public PPLexer(CharStream lexer) { super(lexer); @@ -51,6 +39,7 @@ public modType parse() throws IOException { modType tree = null; PythonLexer lexer = new PyLexer(new NoCloseReaderStream(bufreader)); + lexer.setErrorHandler(errorHandler); //XXX: Hopefully we can remove inSingle when we get PyCF_DONT_IMPLY_DEDENT support. lexer.inSingle = true; CommonTokenStream tokens = new CommonTokenStream(lexer); @@ -58,6 +47,7 @@ PythonTokenSource indentedSource = new PythonTokenSource(tokens); tokens = new CommonTokenStream(indentedSource); PythonParser parser = new PythonParser(tokens); + parser.setErrorHandler(errorHandler); parser.inSingle = true; parser.setTreeAdaptor(new PythonTreeAdaptor()); @@ -66,6 +56,7 @@ CommonTreeNodeStream nodes = new CommonTreeNodeStream((Tree)r.tree); nodes.setTokenStream(tokens); PythonWalker walker = new PythonWalker(nodes); + walker.setErrorHandler(errorHandler); tree = walker.interactive(); } catch (RecognitionException e) { //I am only throwing ParseExceptions, but "throws RecognitionException" still gets Modified: branches/asm/src/org/python/antlr/ModuleParser.java =================================================================== --- branches/asm/src/org/python/antlr/ModuleParser.java 2008-07-31 16:03:59 UTC (rev 5027) +++ branches/asm/src/org/python/antlr/ModuleParser.java 2008-07-31 16:14:55 UTC (rev 5028) @@ -14,22 +14,7 @@ import org.python.antlr.ast.Module; import org.python.antlr.ast.stmtType; -public class ModuleParser { - public static class PyLexer extends PythonLexer { - public PyLexer(CharStream lexer) { - super(lexer); - } - - public Token nextToken() { - startPos = getCharPositionInLine(); - return super.nextToken(); - } - } - - - private CharStream charStream; - private boolean partial; - +public class ModuleParser extends BaseParser { public ModuleParser(CharStream cs) { this(cs, false); } @@ -42,17 +27,20 @@ public modType file_input() { modType tree = null; PythonLexer lexer = new PyLexer(this.charStream); + lexer.setErrorHandler(errorHandler); CommonTokenStream tokens = new CommonTokenStream(lexer); tokens.discardOffChannelTokens(true); PythonTokenSource indentedSource = new PythonTokenSource(tokens); tokens = new CommonTokenStream(indentedSource); PythonParser parser = new PythonParser(tokens); + parser.setErrorHandler(errorHandler); parser.setTreeAdaptor(new PythonTreeAdaptor()); try { PythonParser.file_input_return r = parser.file_input(); CommonTreeNodeStream nodes = new CommonTreeNodeStream((Tree)r.tree); nodes.setTokenStream(tokens); PythonWalker walker = new PythonWalker(nodes); + walker.setErrorHandler(errorHandler); tree = walker.module(); if (tree == null) { //XXX: seems like I should be able to get antlr to give me an empty Module instead @@ -66,4 +54,5 @@ return tree; } + } Modified: branches/asm/src/org/python/antlr/ParseException.java =================================================================== --- branches/asm/src/org/python/antlr/ParseException.java 2008-07-31 16:03:59 UTC (rev 5027) +++ branches/asm/src/org/python/antlr/ParseException.java 2008-07-31 16:14:55 UTC (rev 5028) @@ -25,8 +25,8 @@ super(message); } - public ParseException(RecognitionException r) { - super(getErrorMessage(r)); + public ParseException(String message, RecognitionException r) { + super(message); input = r.input; index = r.index; token = r.token; @@ -37,120 +37,4 @@ approximateLineInfo = r.approximateLineInfo; } - /** - * getErrorMessage is a modified version of org.antlr.runtime.BaseRecognizer's - * method of the same name from * antlr-3.1 beta1. When we upgrade we should - * make sure to remain consistent. - */ - private static String getErrorMessage(RecognitionException e) { - String msg = e.getMessage(); - String tokenNames[] = PythonParser.tokenNames; - if ( e instanceof UnwantedTokenException ) { - UnwantedTokenException ute = (UnwantedTokenException)e; - String tokenName="<unknown>"; - if ( ute.expecting== Token.EOF ) { - tokenName = "EOF"; - } - else { - tokenName = tokenNames[ute.expecting]; - } - msg = "extraneous input "+getTokenErrorDisplay(ute.getUnexpectedToken())+ - " expecting "+tokenName; - } - else if ( e instanceof MissingTokenException ) { - MissingTokenException mte = (MissingTokenException)e; - String tokenName="<unknown>"; - if ( mte.expecting== Token.EOF ) { - tokenName = "EOF"; - } - else { - tokenName = tokenNames[mte.expecting]; - } - msg = "missing "+tokenName+" at "+getTokenErrorDisplay(e.token); - } - if ( e instanceof MismatchedTokenException ) { - MismatchedTokenException mte = (MismatchedTokenException)e; - String tokenName="<unknown>"; - if ( mte.expecting== Token.EOF ) { - tokenName = "EOF"; - } - else { - tokenName = tokenNames[mte.expecting]; - } - msg = "mismatched input "+getTokenErrorDisplay(e.token)+ - " expecting "+tokenName; - } - else if ( e instanceof MismatchedTreeNodeException ) { - MismatchedTreeNodeException mtne = (MismatchedTreeNodeException)e; - String tokenName="<unknown>"; - if ( mtne.expecting==Token.EOF ) { - tokenName = "EOF"; - } - else { - tokenName = tokenNames[mtne.expecting]; - } - msg = "mismatched tree node: "+mtne.node+ - " expecting "+tokenName; - } - else if ( e instanceof NoViableAltException ) { - NoViableAltException nvae = (NoViableAltException)e; - // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>" - // and "(decision="+nvae.decisionNumber+") and - // "state "+nvae.stateNumber - msg = "no viable alternative at input "+getTokenErrorDisplay(e.token); - } - else if ( e instanceof EarlyExitException ) { - EarlyExitException eee = (EarlyExitException)e; - // for development, can add "(decision="+eee.decisionNumber+")" - msg = "required (...)+ loop did not match anything at input "+ - getTokenErrorDisplay(e.token); - } - else if ( e instanceof MismatchedSetException ) { - MismatchedSetException mse = (MismatchedSetException)e; - msg = "mismatched input "+getTokenErrorDisplay(e.token)+ - " expecting set "+mse.expecting; - } - else if ( e instanceof MismatchedNotSetException ) { - MismatchedNotSetException mse = (MismatchedNotSetException)e; - msg = "mismatched input "+getTokenErrorDisplay(e.token)+ - " expecting set "+mse.expecting; - } - else if ( e instanceof FailedPredicateException ) { - FailedPredicateException fpe = (FailedPredicateException)e; - msg = "rule "+fpe.ruleName+" failed predicate: {"+ - fpe.predicateText+"}?"; - } - return msg; - } - - private static String getTokenErrorDisplay(Token t) { - if (t == null) { - return ""; - } else { - return t.getText(); - } - } - - public int getOffset() { - if (input != null) { - if (input instanceof CharStream) { - return index; - } else { - return ((CommonToken)token).getStartIndex(); - } - } - return -1; - } - - public String info() { - return "info: " + - "input:" + input + - "\nindex:" + index + - "\ntoken:" + token + - "\nnode:" + node + - "\nc:" + c + - "\nline:" + line + - "\ncharPositionInLine:" + charPositionInLine + - "\napproximateLineInfo:" + approximateLineInfo; - } } Added: branches/asm/src/org/python/antlr/PythonErrorNode.java =================================================================== --- branches/asm/src/org/python/antlr/PythonErrorNode.java (rev 0) +++ branches/asm/src/org/python/antlr/PythonErrorNode.java 2008-07-31 16:14:55 UTC (rev 5028) @@ -0,0 +1,38 @@ +package org.python.antlr; + +import org.antlr.runtime.Token; +import org.antlr.runtime.TokenStream; +import org.antlr.runtime.RecognitionException; +import org.antlr.runtime.tree.CommonErrorNode; + +/** A node representing erroneous token range in token stream + */ +public class PythonErrorNode extends PythonTree { + + private CommonErrorNode errorNode; + + public PythonErrorNode(TokenStream input, Token start, Token stop, + RecognitionException e) { + this.errorNode = new CommonErrorNode(input, start, stop, e); + } + + public PythonErrorNode(CommonErrorNode errorNode){ + this.errorNode = errorNode; + } + + public boolean isNil() { + return errorNode.isNil(); + } + + public int getType() { + return errorNode.getType(); + } + + public String getText() { + return errorNode.getText(); + } + + public String toString() { + return errorNode.toString(); + } +} Modified: branches/asm/src/org/python/antlr/PythonTree.java =================================================================== --- branches/asm/src/org/python/antlr/PythonTree.java 2008-07-31 16:03:59 UTC (rev 5027) +++ branches/asm/src/org/python/antlr/PythonTree.java 2008-07-31 16:14:55 UTC (rev 5028) @@ -30,6 +30,12 @@ /** What index is this node in the child list? Range: 0..n-1 */ public int childIndex = -1; + /** + * The empty constructor is intended only for use by PythonErrorNode. + */ + public PythonTree() { + } + public PythonTree(int ttype, Token t) { CommonToken c = new CommonToken(ttype, t.getText()); c.setLine(t.getLine()); Modified: branches/asm/src/org/python/antlr/PythonTreeAdaptor.java =================================================================== --- branches/asm/src/org/python/antlr/PythonTreeAdaptor.java 2008-07-31 16:03:59 UTC (rev 5027) +++ branches/asm/src/org/python/antlr/PythonTreeAdaptor.java 2008-07-31 16:14:55 UTC (rev 5028) @@ -1,7 +1,9 @@ package org.python.antlr; import org.antlr.runtime.CommonToken; +import org.antlr.runtime.RecognitionException; import org.antlr.runtime.Token; +import org.antlr.runtime.TokenStream; import org.antlr.runtime.tree.CommonTreeAdaptor; public class PythonTreeAdaptor extends CommonTreeAdaptor { @@ -45,6 +47,14 @@ return new PythonTree(token); } + public Object errorNode(TokenStream input, Token start, Token stop, + RecognitionException e) + { + PythonErrorNode t = new PythonErrorNode(input, start, stop, e); + //System.out.println("returning error node '"+t+"' @index="+input.index()); + return t; + } + public Object dupNode(Object t) { if (t == null) { return null; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |