[Jython-checkins] SF.net SVN: jython:[4988] branches/asm

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Revision: 4988
          http://jython.svn.sourceforge.net/jython/?rev=4988&view=rev
Author:   fwierzbicki
Date:     2008-07-23 01:58:01 +0000 (Wed, 23 Jul 2008)

Log Message:
-----------
Made Str offsets better match CPython.
PythonTree now extends BaseTree (too much of CommonTree was getting changed anyway).
astdump.py repaired so that it is a useful testing ground for ast comparisons.

Modified Paths:
--------------
    branches/asm/ast/astdump.py
    branches/asm/grammar/Python.g
    branches/asm/src/org/python/antlr/PythonTree.java

Modified: branches/asm/ast/astdump.py
===================================================================

--- branches/asm/ast/astdump.py	2008-07-23 01:21:32 UTC (rev 4987)
+++ branches/asm/ast/astdump.py	2008-07-23 01:58:01 UTC (rev 4988)
@@ -29,9 +29,11 @@
         pyfiles = [code_path]
 
     for pyfile in pyfiles:
-        print "%s to %s" % (pyfile, output_dir)
         import pprint
-        fh = open(makepath(os.path.join(output_dir, pyfile)), 'w')
+        path = pyfile.split(os.path.sep)
+        print "%s to %s: %s" % (pyfile, output_dir, os.path.join(output_dir, *path))
+        fh = open(makepath(os.path.join(output_dir, *path)), 'w')
+        print fh
         pprint.pprint(astview.tree(pyfile), fh)
 
 if __name__ == '__main__':

Modified: branches/asm/grammar/Python.g
===================================================================
--- branches/asm/grammar/Python.g	2008-07-23 01:21:32 UTC (rev 4987)
+++ branches/asm/grammar/Python.g	2008-07-23 01:58:01 UTC (rev 4988)
@@ -393,8 +393,7 @@
     }
 
     Token extractStringToken(List s) {
-        //XXX: really we want the *last* one.
-        return (Token)s.get(0);
+        return (Token)s.get(s.size() - 1);
     }
 
  
@@ -956,7 +955,7 @@
      | LONGINT -> ^(NumTok<Num>[$LONGINT, makeInt($LONGINT)])
      | FLOAT -> ^(NumTok<Num>[$FLOAT, makeFloat($FLOAT)])
      | COMPLEX -> ^(NumTok<Num>[$COMPLEX, makeComplex($COMPLEX)])
-     | (S+=STRING)+ {debug("S+: " + $S);} 
+     | (S+=STRING)+ 
     -> ^(StrTok<Str>[extractStringToken($S), extractStrings($S)])
      ;
 
@@ -1299,7 +1298,12 @@
         |   '"""' (options {greedy=false;}:TRIQUOTE)* '"""'
         |   '"' (ESC|~('\\'|'\n'|'"'))* '"'
         |   '\'' (ESC|~('\\'|'\n'|'\''))* '\''
-        )
+        ) {
+           if (state.tokenStartLine != input.getLine()) {
+               state.tokenStartLine = input.getLine();
+               state.tokenStartCharPositionInLine = -2;
+           }
+        }
     ;
 
 /** the two '"'? cause a warning -- is there a way to avoid that? */

Modified: branches/asm/src/org/python/antlr/PythonTree.java
===================================================================
--- branches/asm/src/org/python/antlr/PythonTree.java	2008-07-23 01:21:32 UTC (rev 4987)
+++ branches/asm/src/org/python/antlr/PythonTree.java	2008-07-23 01:58:01 UTC (rev 4988)
@@ -1,7 +1,7 @@
 package org.python.antlr;
 
 import org.antlr.runtime.tree.BaseTree;
-import org.antlr.runtime.tree.CommonTree;
+import org.antlr.runtime.tree.Tree;
 import org.antlr.runtime.CommonToken;
 import org.antlr.runtime.Token;
 
@@ -10,14 +10,27 @@
 
 import org.python.antlr.ast.VisitorIF;
 
-public class PythonTree extends CommonTree implements AST {
+public class PythonTree extends BaseTree implements AST {
 
     public boolean from_future_checked = false;
     private int charStartIndex = -1;
     private int charStopIndex = -1;
 
+	/** A single token is the payload */
+	public Token token;
+
+	/** What token indexes bracket all tokens associated with this node
+	 *  and below?
+	 */
+	protected int startIndex=-1, stopIndex=-1;
+
+	/** Who is the parent node of this node; if null, implies node is root */
+	public PythonTree parent;
+
+	/** What index is this node in the child list? Range: 0..n-1 */
+	public int childIndex = -1;
+
     public PythonTree(int ttype, Token t) {
-        super();
         CommonToken c = new CommonToken(ttype, t.getText());
         c.setLine(t.getLine());
         c.setTokenIndex(t.getTokenIndex());
@@ -28,16 +41,109 @@
         token = c;
     }
 
-    public PythonTree(Token token) {
-        super(token);
+    public PythonTree(Token t) {
+        this.token = t;
     }
 
     public PythonTree(PythonTree node) {
-        super(node);
+		super(node);
+		token = node.token;
+		startIndex = node.startIndex;
+		stopIndex = node.stopIndex;
         charStartIndex = node.getCharStartIndex();
         charStopIndex = node.getCharStopIndex();
     }
+	
+	public Token getToken() {
+		return token;
+	}
 
+	public Tree dupNode() {
+		return new PythonTree(this);
+	}
+
+	public boolean isNil() {
+		return token==null;
+	}
+
+	public int getType() {
+		if (token==null) {
+			return Token.INVALID_TOKEN_TYPE;
+		}
+		return token.getType();
+	}
+
+	public String getText() {
+		if (token==null) {
+			return null;
+		}
+		return token.getText();
+	}
+
+	public int getLine() {
+		if (token==null || token.getLine()==0) {
+			if ( getChildCount()>0 ) {
+				return getChild(0).getLine();
+			}
+			return 0;
+		}
+		return token.getLine();
+	}
+
+	public int getCharPositionInLine() {
+		if (token==null || token.getCharPositionInLine()==-1) {
+			if (getChildCount()>0) {
+				return getChild(0).getCharPositionInLine();
+			}
+			return 0;
+		} else if (token != null && token.getCharPositionInLine() == -2) {
+            //XXX: yucky fix because CPython's ast uses -1 as a real value
+            //     for char pos in certain circumstances (for example, the
+            //     char pos of multi-line strings.  I would just use -1,
+            //     but ANTLR is using -1 in special ways also.
+            return -1;
+        }
+		return token.getCharPositionInLine();
+	}
+
+	public int getTokenStartIndex() {
+		if ( startIndex==-1 && token!=null ) {
+			return token.getTokenIndex();
+		}
+		return startIndex;
+	}
+
+	public void setTokenStartIndex(int index) {
+		startIndex = index;
+	}
+
+	public int getTokenStopIndex() {
+		if ( stopIndex==-1 && token!=null ) {
+			return token.getTokenIndex();
+		}
+		return stopIndex;
+	}
+
+	public void setTokenStopIndex(int index) {
+		stopIndex = index;
+	}
+
+	public int getChildIndex() {
+		return childIndex;
+	}
+
+	public Tree getParent() {
+		return parent;
+	}
+
+	public void setParent(Tree t) {
+		this.parent = (PythonTree)t;
+	}
+
+	public void setChildIndex(int index) {
+		this.childIndex = index;
+	}
+
     public int getCharStartIndex() {
         if (charStartIndex == -1 && token != null) {
             if (token instanceof CommonToken) {
@@ -54,13 +160,23 @@
         charStartIndex  = index;
     }
 
+    /*
+     * Adding one to stopIndex from Tokens.  ANTLR defines the char position as
+     * being the array index of the actual characters. Most tools these days
+     * define document offsets as the positions between the characters.  If you
+     * imagine drawing little boxes around each character and think of the
+     * numbers as pointing to either the left or right side of a character's
+     * box, then 0 is before the first character - and in a Document of 10
+     * characters, position 10 is after the last character.
+     */
     public int getCharStopIndex() {
+
         if (charStopIndex == -1 && token != null) {
             if (token instanceof CommonToken) {
-                return ((CommonToken)token).getStopIndex();
+                return ((CommonToken)token).getStopIndex() + 1;
             }
             if (token instanceof ImaginaryToken) {
-                return ((ImaginaryToken)token).getStopIndex();
+                return ((ImaginaryToken)token).getStopIndex() + 1;
             }
         }
         return charStopIndex;
@@ -74,6 +190,13 @@
         if (isNil()) {
             return "None";
         }
+		if ( getType()==Token.INVALID_TOKEN_TYPE ) {
+			return "<errornode>";
+		}
+		if ( token==null ) {
+			return null;
+		}
+
         return token.getText() + "(" + this.getLine() + "," + this.getCharPositionInLine() + ")";
     }
 


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.