[Jython-checkins] SF.net SVN: jython:[6509] trunk/jython

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Revision: 6509
          http://jython.svn.sourceforge.net/jython/?rev=6509&view=rev
Author:   fwierzbicki
Date:     2009-07-05 18:07:53 +0000 (Sun, 05 Jul 2009)

Log Message:
-----------
PythonPartial now has it's own lexer.

Modified Paths:
--------------
    trunk/jython/grammar/PythonPartial.g
    trunk/jython/src/org/python/antlr/BaseParser.java
    trunk/jython/src/org/python/core/ParserFacade.java
    trunk/jython/tests/java/org/python/antlr/PythonPartialTester.java

Modified: trunk/jython/grammar/PythonPartial.g
===================================================================

--- trunk/jython/grammar/PythonPartial.g	2009-07-05 14:55:44 UTC (rev 6508)
+++ trunk/jython/grammar/PythonPartial.g	2009-07-05 18:07:53 UTC (rev 6509)
@@ -59,7 +59,7 @@
  *
  */
 
-parser grammar PythonPartial;
+grammar PythonPartial;
 
 options {
     tokenVocab=Python;
@@ -90,6 +90,78 @@
 }
 }
 
+@lexer::header { 
+package org.python.antlr;
+}
+
+@lexer::members {
+/** Handles context-sensitive lexing of implicit line joining such as
+ *  the case where newline is ignored in cases like this:
+ *  a = [3,
+ *       4]
+ */
+
+//For use in partial parsing.
+public boolean eofWhileNested = false;
+public boolean partial = false;
+
+int implicitLineJoiningLevel = 0;
+int startPos=-1;
+
+//If you want to use another error recovery mechanism change this
+//and the same one in the parser.
+private ErrorHandler errorHandler;
+
+    public void setErrorHandler(ErrorHandler eh) {
+        this.errorHandler = eh;
+    }
+
+    /** 
+     *  Taken directly from antlr's Lexer.java -- needs to be re-integrated every time
+     *  we upgrade from Antlr (need to consider a Lexer subclass, though the issue would
+     *  remain).
+     */
+    public Token nextToken() {
+        while (true) {
+            state.token = null;
+            state.channel = Token.DEFAULT_CHANNEL;
+            state.tokenStartCharIndex = input.index();
+            state.tokenStartCharPositionInLine = input.getCharPositionInLine();
+            state.tokenStartLine = input.getLine();
+            state.text = null;
+            if ( input.LA(1)==CharStream.EOF ) {
+                if (implicitLineJoiningLevel > 0) {
+                    eofWhileNested = true;
+                }
+                return Token.EOF_TOKEN;
+            }
+            try {
+                mTokens();
+                if ( state.token==null ) {
+                    emit();
+                }
+                else if ( state.token==Token.SKIP_TOKEN ) {
+                    continue;
+                }
+                return state.token;
+            } catch (NoViableAltException nva) {
+                errorHandler.reportError(this, nva);
+                errorHandler.recover(this, nva); // throw out current char and try again
+            } catch (FailedPredicateException fp) {
+                //XXX: added this for failed STRINGPART -- the FailedPredicateException
+                //     hides a NoViableAltException.  This should be the only
+                //     FailedPredicateException that gets thrown by the lexer.
+                errorHandler.reportError(this, fp);
+                errorHandler.recover(this, fp); // throw out current char and try again
+            } catch (RecognitionException re) {
+                errorHandler.reportError(this, re);
+                // match() routine has already called recover()
+            }
+        }
+    }
+}
+
+
 //single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
 single_input
 
@@ -934,3 +1006,318 @@
    
     ;
 
+AS        : 'as' ;
+ASSERT    : 'assert' ;
+BREAK     : 'break' ;
+CLASS     : 'class' ;
+CONTINUE  : 'continue' ;
+DEF       : 'def' ;
+DELETE    : 'del' ;
+ELIF      : 'elif' ;
+EXCEPT    : 'except' ;
+EXEC      : 'exec' ;
+FINALLY   : 'finally' ;
+FROM      : 'from' ;
+FOR       : 'for' ;
+GLOBAL    : 'global' ;
+IF        : 'if' ;
+IMPORT    : 'import' ;
+IN        : 'in' ;
+IS        : 'is' ;
+LAMBDA    : 'lambda' ;
+ORELSE    : 'else' ;
+PASS      : 'pass'  ;
+PRINT     : 'print' ;
+RAISE     : 'raise' ;
+RETURN    : 'return' ;
+TRY       : 'try' ;
+WHILE     : 'while' ;
+WITH      : 'with' ;
+YIELD     : 'yield' ;
+
+LPAREN    : '(' {implicitLineJoiningLevel++;} ;
+
+RPAREN    : ')' {implicitLineJoiningLevel--;} ;
+
+LBRACK    : '[' {implicitLineJoiningLevel++;} ;
+
+RBRACK    : ']' {implicitLineJoiningLevel--;} ;
+
+COLON     : ':' ;
+
+COMMA    : ',' ;
+
+SEMI    : ';' ;
+
+PLUS    : '+' ;
+
+MINUS    : '-' ;
+
+STAR    : '*' ;
+
+SLASH    : '/' ;
+
+VBAR    : '|' ;
+
+AMPER    : '&' ;
+
+LESS    : '<' ;
+
+GREATER    : '>' ;
+
+ASSIGN    : '=' ;
+
+PERCENT    : '%' ;
+
+BACKQUOTE    : '`' ;
+
+LCURLY    : '{' {implicitLineJoiningLevel++;} ;
+
+RCURLY    : '}' {implicitLineJoiningLevel--;} ;
+
+CIRCUMFLEX    : '^' ;
+
+TILDE    : '~' ;
+
+EQUAL    : '==' ;
+
+NOTEQUAL    : '!=' ;
+
+ALT_NOTEQUAL: '<>' ;
+
+LESSEQUAL    : '<=' ;
+
+LEFTSHIFT    : '<<' ;
+
+GREATEREQUAL    : '>=' ;
+
+RIGHTSHIFT    : '>>' ;
+
+PLUSEQUAL    : '+=' ;
+
+MINUSEQUAL    : '-=' ;
+
+DOUBLESTAR    : '**' ;
+
+STAREQUAL    : '*=' ;
+
+DOUBLESLASH    : '//' ;
+
+SLASHEQUAL    : '/=' ;
+
+VBAREQUAL    : '|=' ;
+
+PERCENTEQUAL    : '%=' ;
+
+AMPEREQUAL    : '&=' ;
+
+CIRCUMFLEXEQUAL    : '^=' ;
+
+LEFTSHIFTEQUAL    : '<<=' ;
+
+RIGHTSHIFTEQUAL    : '>>=' ;
+
+DOUBLESTAREQUAL    : '**=' ;
+
+DOUBLESLASHEQUAL    : '//=' ;
+
+DOT : '.' ;
+
+AT : '@' ;
+
+AND : 'and' ;
+
+OR : 'or' ;
+
+NOT : 'not' ;
+
+FLOAT
+    :   '.' DIGITS (Exponent)?
+    |   DIGITS '.' Exponent
+    |   DIGITS ('.' (DIGITS (Exponent)?)? | Exponent)
+    ;
+
+LONGINT
+    :   INT ('l'|'L')
+    ;
+
+fragment
+Exponent
+    :    ('e' | 'E') ( '+' | '-' )? DIGITS
+    ;
+
+INT :   // Hex
+        '0' ('x' | 'X') ( '0' .. '9' | 'a' .. 'f' | 'A' .. 'F' )+
+    |   // Octal
+        '0'  ( '0' .. '7' )*
+    |   '1'..'9' DIGITS*
+    ;
+
+COMPLEX
+    :   DIGITS+ ('j'|'J')
+    |   FLOAT ('j'|'J')
+    ;
+
+fragment
+DIGITS : ( '0' .. '9' )+ ;
+
+NAME:    ( 'a' .. 'z' | 'A' .. 'Z' | '_')
+        ( 'a' .. 'z' | 'A' .. 'Z' | '_' | '0' .. '9' )*
+    ;
+
+/** Match various string types.  Note that greedy=false implies '''
+ *  should make us exit loop not continue.
+ */
+STRING
+    :   ('r'|'u'|'ur'|'R'|'U'|'UR'|'uR'|'Ur')?
+        (   '\'\'\'' (options {greedy=false;}:TRIAPOS)* '\'\'\''
+        |   '"""' (options {greedy=false;}:TRIQUOTE)* '"""'
+        |   '"' (ESC|~('\\'|'\n'|'"'))* '"'
+        |   '\'' (ESC|~('\\'|'\n'|'\''))* '\''
+        ) {
+           if (state.tokenStartLine != input.getLine()) {
+               state.tokenStartLine = input.getLine();
+               state.tokenStartCharPositionInLine = -2;
+           }
+        }
+    ;
+
+STRINGPART
+    : ('r'|'u'|'ur'|'R'|'U'|'UR'|'uR'|'Ur')?
+        (   '\'\'\'' ~('\'\'\'')*
+        |   '"""' ~('"""')*
+        )
+    ;
+
+/** the two '"'? cause a warning -- is there a way to avoid that? */
+fragment
+TRIQUOTE
+    : '"'? '"'? (ESC|~('\\'|'"'))+
+    ;
+
+/** the two '\''? cause a warning -- is there a way to avoid that? */
+fragment
+TRIAPOS
+    : '\''? '\''? (ESC|~('\\'|'\''))+
+    ;
+
+fragment
+ESC
+    :    '\\' .
+    ;
+
+/** Consume a newline and any whitespace at start of next line
+ *  unless the next line contains only white space, in that case
+ *  emit a newline.
+ */
+CONTINUED_LINE
+    :    '\\' ('\r')? '\n' (' '|'\t')*  { $channel=HIDDEN; }
+         ( c1=COMMENT
+         | nl=NEWLINE
+         |
+         ) {
+               if (input.LA(1) == -1) {
+                   emit(new CommonToken(TRAILBACKSLASH,"\\"));
+               }
+           }
+    ;
+
+/** Treat a sequence of blank lines as a single blank line.  If
+ *  nested within a (..), {..}, or [..], then ignore newlines.
+ *  If the first newline starts in column one, they are to be ignored.
+ *
+ *  Frank Wierzbicki added: Also ignore FORMFEEDS (\u000C).
+ */
+NEWLINE
+@init {
+    int newlines = 0;
+}
+    :   (('\u000C')?('\r')? '\n' {newlines++; } )+ {
+         if ( startPos==0 || implicitLineJoiningLevel>0 )
+            $channel=HIDDEN;
+        }
+    ;
+
+WS  :    {startPos>0}?=> (' '|'\t'|'\u000C')+ {$channel=HIDDEN;}
+    ;
+    
+/** Grab everything before a real symbol.  Then if newline, kill it
+ *  as this is a blank line.  If whitespace followed by comment, kill it
+ *  as it's a comment on a line by itself.
+ *
+ *  Ignore leading whitespace when nested in [..], (..), {..}.
+ */
+LEADING_WS
+@init {
+    int spaces = 0;
+    int newlines = 0;
+}
+    :   {startPos==0}?=>
+        (   {implicitLineJoiningLevel>0}? ( ' ' | '\t' )+ {$channel=HIDDEN;}
+        |    (     ' '  { spaces++; }
+             |    '\t' { spaces += 8; spaces -= (spaces \% 8); }
+             )+
+             ( ('\r')? '\n' {newlines++; }
+             )* {
+                   if (input.LA(1) != -1 || newlines == 0) {
+                       // make a string of n spaces where n is column number - 1
+                       char[] indentation = new char[spaces];
+                       for (int i=0; i<spaces; i++) {
+                           indentation[i] = ' ';
+                       }
+                       CommonToken c = new CommonToken(LEADING_WS,new String(indentation));
+                       c.setLine(input.getLine());
+                       c.setCharPositionInLine(input.getCharPositionInLine());
+                       c.setStartIndex(input.index() - 1);
+                       c.setStopIndex(input.index() - 1);
+                       emit(c);
+                       // kill trailing newline if present and then ignore
+                       if (newlines != 0) {
+                           if (state.token!=null) {
+                               state.token.setChannel(HIDDEN);
+                           } else {
+                               $channel=HIDDEN;
+                           }
+                       }
+                   } else {
+                       // make a string of n newlines
+                       char[] nls = new char[newlines];
+                       for (int i=0; i<newlines; i++) {
+                           nls[i] = '\n';
+                       }
+                       CommonToken c = new CommonToken(NEWLINE,new String(nls));
+                       c.setLine(input.getLine());
+                       c.setCharPositionInLine(input.getCharPositionInLine());
+                       c.setStartIndex(input.index() - 1);
+                       c.setStopIndex(input.index() - 1);
+                       emit(c);
+                   }
+                }
+        )
+    ;
+
+/** Comments not on line by themselves are turned into newlines.
+
+    b = a # end of line comment
+
+    or
+
+    a = [1, # weird
+         2]
+
+    This rule is invoked directly by nextToken when the comment is in
+    first column or when comment is on end of nonwhitespace line.
+
+    Only match \n here if we didn't start on left edge; let NEWLINE return that.
+    Kill if newlines if we live on a line by ourselves
+    
+    Consume any leading whitespace if it starts on left edge.
+ */
+COMMENT
+@init {
+    $channel=HIDDEN;
+}
+    :    {startPos==0}?=> (' '|'\t')* '#' (~'\n')* '\n'+
+    |    '#' (~'\n')* // let NEWLINE handle \n unless char pos==0 for '#'
+    ;
+

Modified: trunk/jython/src/org/python/antlr/BaseParser.java
===================================================================
--- trunk/jython/src/org/python/antlr/BaseParser.java	2009-07-05 14:55:44 UTC (rev 6508)
+++ trunk/jython/src/org/python/antlr/BaseParser.java	2009-07-05 18:07:53 UTC (rev 6509)
@@ -41,7 +41,18 @@
             return super.nextToken();
         }
     }
-    
+
+    public static class PyPartialLexer extends PythonPartialLexer {
+        public PyPartialLexer(CharStream lexer) {
+            super(lexer);
+        }
+
+        public Token nextToken() {
+            startPos = getCharPositionInLine();
+            return super.nextToken();
+        }
+    }
+     
     private CharStream charStream(boolean single) {
         return charStream;
     }

Modified: trunk/jython/src/org/python/core/ParserFacade.java
===================================================================
--- trunk/jython/src/org/python/core/ParserFacade.java	2009-07-05 14:55:44 UTC (rev 6508)
+++ trunk/jython/src/org/python/core/ParserFacade.java	2009-07-05 18:07:53 UTC (rev 6509)
@@ -25,7 +25,8 @@
 import org.python.antlr.NoCloseReaderStream;
 import org.python.antlr.ParseException;
 import org.python.antlr.PythonLexer;
-import org.python.antlr.PythonPartial;
+import org.python.antlr.PythonPartialLexer;
+import org.python.antlr.PythonPartialParser;
 import org.python.antlr.PythonTokenSource;
 import org.python.antlr.PythonTree;
 import org.python.antlr.base.mod;
@@ -183,16 +184,15 @@
     }
 
     private static boolean validPartialSentence(BufferedReader bufreader, CompileMode kind, String filename) {
-        PythonLexer lexer = null;
+        PythonPartialLexer lexer = null;
         try {
             bufreader.reset();
             CharStream cs = new NoCloseReaderStream(bufreader);
-            lexer = new BaseParser.PyLexer(cs);
-            lexer.partial = true;
+            lexer = new BaseParser.PyPartialLexer(cs);
             CommonTokenStream tokens = new CommonTokenStream(lexer);
             PythonTokenSource indentedSource = new PythonTokenSource(tokens, filename);
             tokens = new CommonTokenStream(indentedSource);
-            PythonPartial parser = new PythonPartial(tokens);
+            PythonPartialParser parser = new PythonPartialParser(tokens);
             switch (kind) {
             case single:
                 parser.single_input();

Modified: trunk/jython/tests/java/org/python/antlr/PythonPartialTester.java
===================================================================
--- trunk/jython/tests/java/org/python/antlr/PythonPartialTester.java	2009-07-05 14:55:44 UTC (rev 6508)
+++ trunk/jython/tests/java/org/python/antlr/PythonPartialTester.java	2009-07-05 18:07:53 UTC (rev 6509)
@@ -14,12 +14,12 @@
         try {
             PythonTree result = null;
             CharStream input = new ANTLRFileStream(args[0]);
-            PythonLexer lexer = new BaseParser.PyLexer(input);
+            PythonPartialLexer lexer = new BaseParser.PyPartialLexer(input);
             CommonTokenStream tokens = new CommonTokenStream(lexer);
             //PythonTokenSource indentedSource = new PythonTokenSource(tokens);
             PythonTokenSource indentedSource = new PythonTokenSource(tokens, "<test>");
             tokens = new CommonTokenStream(indentedSource);
-            PythonPartial parser = new PythonPartial(tokens);
+            PythonPartialParser parser = new PythonPartialParser(tokens);
             parser.single_input();
             System.out.println("SUCCEED");
         } catch (ParseException e) {


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.