From: <fwi...@us...> - 2008-08-05 16:55:25
|
Revision: 5079 http://jython.svn.sourceforge.net/jython/?rev=5079&view=rev Author: fwierzbicki Date: 2008-08-05 16:55:18 +0000 (Tue, 05 Aug 2008) Log Message: ----------- Reworked lexing of newlines and EOF. Broke up PythonTokenSource into more useful methods. Removed inSingle from Python.g and added it to PythonTokenSource. Modified Paths: -------------- branches/asm/grammar/Python.g branches/asm/src/org/python/antlr/InteractiveParser.java branches/asm/src/org/python/antlr/PythonTokenSource.java Modified: branches/asm/grammar/Python.g =================================================================== --- branches/asm/grammar/Python.g 2008-08-04 21:34:24 UTC (rev 5078) +++ branches/asm/grammar/Python.g 2008-08-05 16:55:18 UTC (rev 5079) @@ -182,9 +182,6 @@ } @members { - //XXX: only used for single_input -- seems kludgy. - public boolean inSingle = false; - boolean debugOn = false; private ErrorHandler errorHandler; @@ -443,8 +440,6 @@ //and the same one in the parser. private ErrorHandler errorHandler; -//XXX: Hopefully we can remove inSingle when we get PyCF_DONT_IMPLY_DEDENT support. -public boolean inSingle = false; int implicitLineJoiningLevel = 0; int startPos=-1; @@ -489,7 +484,7 @@ } //single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE -single_input : NEWLINE -> ^(Interactive) +single_input : NEWLINE? -> ^(Interactive) | simple_stmt -> ^(Interactive simple_stmt) | compound_stmt NEWLINE -> ^(Interactive compound_stmt) ; @@ -1349,12 +1344,11 @@ * Frank Wierzbicki added: Also ignore FORMFEEDS (\u000C). */ NEWLINE - : {inSingle}? => (('\u000C')?('\r')? '\n' ) - {if (implicitLineJoiningLevel>0 ) - $channel=HIDDEN; - } - | (('\u000C')?('\r')? '\n' )+ - {if ( startPos==0 || implicitLineJoiningLevel>0 ) +@init { + int newlines = 0; +} + : (('\u000C')?('\r')? '\n' {newlines++; } )+ { + if ( startPos==0 || implicitLineJoiningLevel>0 ) $channel=HIDDEN; } ; @@ -1371,30 +1365,42 @@ LEADING_WS @init { int spaces = 0; + int newlines = 0; } : {startPos==0}?=> ( {implicitLineJoiningLevel>0}? ( ' ' | '\t' )+ {$channel=HIDDEN;} | ( ' ' { spaces++; } | '\t' { spaces += 8; spaces -= (spaces \% 8); } )+ - { - // make a string of n spaces where n is column number - 1 - char[] indentation = new char[spaces]; - for (int i=0; i<spaces; i++) { - indentation[i] = ' '; - } - if (input.LA(1) != -1) { - CommonToken c = new CommonToken(LEADING_WS,new String(indentation)); - c.setLine(input.getLine()); - c.setCharPositionInLine(input.getCharPositionInLine()); - emit(c); - } else { - emit(new CommonToken(LEADING_WS,"")); - } - } - // kill trailing newline if present and then ignore - ( ('\r')? '\n' {if (state.token!=null) state.token.setChannel(HIDDEN); else $channel=HIDDEN;})* - // {state.token.setChannel(99); } + ( ('\r')? '\n' {newlines++; } + )* { + if (input.LA(1) != -1) { + // make a string of n spaces where n is column number - 1 + char[] indentation = new char[spaces]; + for (int i=0; i<spaces; i++) { + indentation[i] = ' '; + } + CommonToken c = new CommonToken(LEADING_WS,new String(indentation)); + c.setLine(input.getLine()); + c.setCharPositionInLine(input.getCharPositionInLine()); + emit(c); + // kill trailing newline if present and then ignore + if (newlines != 0) { + if (state.token!=null) { + state.token.setChannel(HIDDEN); + } else { + $channel=HIDDEN; + } + } + } else { + // make a string of n newlines + char[] nls = new char[newlines]; + for (int i=0; i<newlines; i++) { + nls[i] = '\n'; + } + emit(new CommonToken(NEWLINE,new String(nls))); + } + } ) ; Modified: branches/asm/src/org/python/antlr/InteractiveParser.java =================================================================== --- branches/asm/src/org/python/antlr/InteractiveParser.java 2008-08-04 21:34:24 UTC (rev 5078) +++ branches/asm/src/org/python/antlr/InteractiveParser.java 2008-08-05 16:55:18 UTC (rev 5079) @@ -41,15 +41,12 @@ modType tree = null; PythonLexer lexer = new PyLexer(new NoCloseReaderStream(bufreader)); lexer.setErrorHandler(errorHandler); - //XXX: Hopefully we can remove inSingle when we get PyCF_DONT_IMPLY_DEDENT support. - lexer.inSingle = true; CommonTokenStream tokens = new CommonTokenStream(lexer); tokens.discardOffChannelTokens(true); - PythonTokenSource indentedSource = new PythonTokenSource(tokens, filename); + PythonTokenSource indentedSource = new PythonTokenSource(tokens, filename, true); tokens = new CommonTokenStream(indentedSource); PythonParser parser = new PythonParser(tokens); parser.setErrorHandler(errorHandler); - parser.inSingle = true; parser.setTreeAdaptor(new PythonTreeAdaptor()); try { Modified: branches/asm/src/org/python/antlr/PythonTokenSource.java =================================================================== --- branches/asm/src/org/python/antlr/PythonTokenSource.java 2008-08-04 21:34:24 UTC (rev 5078) +++ branches/asm/src/org/python/antlr/PythonTokenSource.java 2008-08-05 16:55:18 UTC (rev 5079) @@ -91,13 +91,20 @@ int lastTokenAddedIndex = -1; String filename; + boolean inSingle; public PythonTokenSource(PythonLexer lexer) { } + public PythonTokenSource(CommonTokenStream stream, String filename) { + this(stream, filename, false); + } + + public PythonTokenSource(CommonTokenStream stream, String filename, boolean single) { this.stream = stream; this.filename = filename; + this.inSingle = single; // "state" of indent level is FIRST_CHAR_POSITION push(FIRST_CHAR_POSITION); } @@ -129,7 +136,7 @@ if (tokens.size() > 0) { Token t = (Token)tokens.firstElement(); tokens.removeElementAt(0); - //System.out.println(t); + //System.out.println(filename + t); return t; } @@ -138,32 +145,36 @@ return nextToken(); } + private void generateNewline(Token t) { + // Imaginary newline before EOF + CommonToken newline = new CommonToken(PythonLexer.NEWLINE, "\n"); + newline.setLine(t.getLine()); + newline.setCharPositionInLine(t.getCharPositionInLine()); + tokens.addElement(newline); + } + protected void insertImaginaryIndentDedentTokens() { Token t = stream.LT(1); stream.consume(); - // if the current token is not a NEWLINE or EOF, it doesn't signal indent/dedent work; just enqueue - if (t.getType() != PythonLexer.NEWLINE && t.getType() != PythonLexer.EOF) { - List hiddenTokens = stream.getTokens(lastTokenAddedIndex + 1,t.getTokenIndex() - 1); - if (hiddenTokens != null) { - tokens.addAll(hiddenTokens); + if (t.getType() == Token.EOF) { + Token prev = stream.LT(-1); + if (!inSingle && (prev == null || prev.getType() != PythonLexer.NEWLINE)) { + generateNewline(t); } - lastTokenAddedIndex = t.getTokenIndex(); - tokens.addElement(t); - return; - } - CommonToken newline; - if (t.getType() == PythonLexer.NEWLINE) { + handleDedents(-1, (CommonToken)t); + enqueue(t); + } else if (t.getType() == PythonLexer.NEWLINE) { // save NEWLINE in the queue //System.out.println("found newline: "+t+" stack is "+stackString()); - newline = (CommonToken)t; List hiddenTokens = stream.getTokens(lastTokenAddedIndex + 1,t.getTokenIndex() - 1); if (hiddenTokens!=null) { tokens.addAll(hiddenTokens); } lastTokenAddedIndex = t.getTokenIndex(); tokens.addElement(t); + Token newline = t; // grab first token of next line t = stream.LT(1); @@ -174,63 +185,85 @@ tokens.addAll(hiddenTokens); } lastTokenAddedIndex = t.getTokenIndex(); - } else { - // Imaginary newline before EOF - newline = new CommonToken(PythonLexer.NEWLINE, "\n"); - newline.setLine(t.getLine()); - newline.setCharPositionInLine(t.getCharPositionInLine()); - //XXX: this is where lsoto had this... - //tokens.addElement(newline); - } - // compute cpos as the char pos of next non-WS token in line - int cpos = t.getCharPositionInLine(); // column dictates indent/dedent - if (t.getType() == Token.EOF) { - cpos = -1; // pretend EOF always happens at left edge - } - else if (t.getType() == PythonLexer.LEADING_WS) { - cpos = t.getText().length(); - } + // compute cpos as the char pos of next non-WS token in line + int cpos = t.getCharPositionInLine(); // column dictates indent/dedent + if (t.getType() == Token.EOF) { + cpos = -1; // pretend EOF always happens at left edge + } + else if (t.getType() == PythonLexer.LEADING_WS) { + Token next = stream.LT(1); + if (next != null && next.getType() == Token.EOF) { + stream.consume(); + return; + } else { + cpos = t.getText().length(); + } + } - //System.out.println("next token is: "+t); + //System.out.println("next token is: "+t); - // compare to last indent level - int lastIndent = peek(); - //System.out.println("cpos, lastIndent = "+cpos+", "+lastIndent); - if (cpos > lastIndent) { // they indented; track and gen INDENT - push(cpos); - //System.out.println("push("+cpos+"): "+stackString()); - Token indent = new CommonToken(PythonParser.INDENT,""); - indent.setCharPositionInLine(t.getCharPositionInLine()); - indent.setLine(t.getLine()); - tokens.addElement(indent); - } - else if (cpos < lastIndent) { // they dedented - // how far back did we dedent? - int prevIndex = findPreviousIndent(cpos, t); - //System.out.println("dedented; prevIndex of cpos="+cpos+" is "+prevIndex); - // generate DEDENTs for each indent level we backed up over - for (int d = sp - 1; d >= prevIndex; d--) { - CommonToken dedent = new CommonToken(PythonParser.DEDENT,""); - dedent.setCharPositionInLine(t.getCharPositionInLine()); - dedent.setLine(t.getLine()); + // compare to last indent level + int lastIndent = peek(); + //System.out.println("cpos, lastIndent = "+cpos+", "+lastIndent); + if (cpos > lastIndent) { // they indented; track and gen INDENT + handleIndents(cpos, (CommonToken)t); + } + else if (cpos < lastIndent) { // they dedented + handleDedents(cpos, (CommonToken)t); + } - //XXX: this will get messed up by comments. - dedent.setStartIndex(newline.getStartIndex()); - dedent.setStopIndex(newline.getStopIndex()); + if (t.getType() == Token.EOF && inSingle) { + String newlines = newline.getText(); + for(int i=1;i<newlines.length();i++) { + generateNewline(newline); + } + } - tokens.addElement(dedent); + if (t.getType() != PythonLexer.LEADING_WS) { // discard WS + tokens.addElement(t); } - sp = prevIndex; // pop those off indent level + + } else { + enqueue(t); } - //XXX: make sure lsoto's stuff isn't broken by this... - if (t.getType() == PythonLexer.EOF) { - tokens.addElement(newline); + } + + private void enqueue(Token t) { + List hiddenTokens = stream.getTokens(lastTokenAddedIndex + 1,t.getTokenIndex() - 1); + if (hiddenTokens != null) { + tokens.addAll(hiddenTokens); } + lastTokenAddedIndex = t.getTokenIndex(); + tokens.addElement(t); + } - if (t.getType() != PythonLexer.LEADING_WS) { // discard WS - tokens.addElement(t); + private void handleIndents(int cpos, CommonToken t) { + push(cpos); + //System.out.println("push("+cpos+"): "+stackString()); + Token indent = new CommonToken(PythonParser.INDENT,""); + indent.setCharPositionInLine(t.getCharPositionInLine()); + indent.setLine(t.getLine()); + tokens.addElement(indent); + } + + private void handleDedents(int cpos, CommonToken t) { + // how far back did we dedent? + int prevIndex = findPreviousIndent(cpos, t); + //System.out.println("dedented; prevIndex of cpos="+cpos+" is "+prevIndex); + // generate DEDENTs for each indent level we backed up over + for (int d = sp - 1; d >= prevIndex; d--) { + CommonToken dedent = new CommonToken(PythonParser.DEDENT,""); + dedent.setCharPositionInLine(t.getCharPositionInLine()); + dedent.setLine(t.getLine()); + + //XXX: this will get messed up by comments. + dedent.setStartIndex(t.getStartIndex()); + dedent.setStopIndex(t.getStopIndex()); + + tokens.addElement(dedent); } + sp = prevIndex; // pop those off indent level } // T O K E N S T A C K M E T H O D S This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |