[Jython-checkins] SF.net SVN: jython:[5551] trunk/jython

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 454-5900

Revision: 5551
          http://jython.svn.sourceforge.net/jython/?rev=5551&view=rev
Author:   pjenvey
Date:     2008-11-07 01:04:00 +0000 (Fri, 07 Nov 2008)

Log Message:
-----------
o make the parser aware of the source encoding to fix str handling when
encodings are specified. fixes test_doctest
o distinguish unicode source w/ PyCF_SOURCE_IS_UTF8 instead of the
Py.compile_flags byte[] variant. refactor __builtin__.compile
o simplify ParserFacade by re-encoding unicode input; avoids the two
prepBufReader code paths for InputStreams vs Readers. less efficient but
CPython does this anyway

Modified Paths:
--------------
    trunk/jython/Lib/test/test_builtin_jy.py
    trunk/jython/Lib/test/test_str_jy.py
    trunk/jython/Lib/test/test_unicode_jy.py
    trunk/jython/grammar/Python.g
    trunk/jython/src/org/python/antlr/BaseParser.java
    trunk/jython/src/org/python/antlr/ExpressionParser.java
    trunk/jython/src/org/python/antlr/GrammarActions.java
    trunk/jython/src/org/python/antlr/InteractiveParser.java
    trunk/jython/src/org/python/antlr/ModuleParser.java
    trunk/jython/src/org/python/core/ParserFacade.java
    trunk/jython/src/org/python/core/Py.java
    trunk/jython/src/org/python/core/__builtin__.java

Modified: trunk/jython/Lib/test/test_builtin_jy.py
===================================================================

--- trunk/jython/Lib/test/test_builtin_jy.py	2008-11-06 22:37:32 UTC (rev 5550)
+++ trunk/jython/Lib/test/test_builtin_jy.py	2008-11-07 01:04:00 UTC (rev 5551)
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 import sys
 import unittest
 import test.test_support
@@ -120,6 +121,36 @@
     def test_round_non_float(self):
         self.assertEqual(round(self.Foo(), 1), 3.1)
 
+class ExecEvalTest(unittest.TestCase):
+
+    bom = '\xef\xbb\xbf'
+    
+    def test_eval_bom(self):
+        self.assertEqual(eval(self.bom + '"foo"'), 'foo')
+        # Actual BOM ignored, so causes a SyntaxError
+        self.assertRaises(SyntaxError, eval,
+                          self.bom.decode('iso-8859-1') + '"foo"')
+
+    def test_parse_str_eval(self):
+        foo = 'föö'
+        for code in ("'%s'" % foo.decode('utf-8'),
+                     "# coding: utf-8\n'%s'" % foo,
+                     "%s'%s'" % (self.bom, foo)):
+            mod = compile(code, 'foo.py', 'eval')
+            bar = eval(mod)
+            self.assertEqual(foo, bar)
+            bar = eval(code)
+            self.assertEqual(foo, bar)
+
+    def test_parse_str_exec(self):
+        foo = 'föö'
+        for code in ("a = '%s'" % foo.decode('utf-8'),
+                     "# coding: utf-8\na = '%s'" % foo,
+                     "%sa = '%s'" % (self.bom, foo)):
+            ns = {}
+            exec code in ns
+            self.assertEqual(foo, ns['a'])
+
 def test_main():
     test.test_support.run_unittest(BuiltinTest,
                                    LoopTest,
@@ -129,7 +160,8 @@
                                    ReturnTest,
                                    ReprTest,
                                    CallableTest,
-                                   ConversionTest)
+                                   ConversionTest,
+                                   ExecEvalTest)
 
 if __name__ == "__main__":
     test_main()

Modified: trunk/jython/Lib/test/test_str_jy.py
===================================================================
--- trunk/jython/Lib/test/test_str_jy.py	2008-11-06 22:37:32 UTC (rev 5550)
+++ trunk/jython/Lib/test/test_str_jy.py	2008-11-07 01:04:00 UTC (rev 5551)
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 from test import test_support
 import unittest
 
@@ -130,12 +131,47 @@
         self.assertEqual(repr(test2), '"\'bar"')
         self.assertEqual(repr(unicode(test2)), 'u"\'bar"')
 
+
+class ParserTest(unittest.TestCase):
+
+    def test_parse_str(self):
+        foo = 'ą\n'
+        self.assertEqual(len(foo), 3, repr(foo))
+        self.assertEqual(repr(foo), "'\\xc4\\x85\\n'")
+        self.assertEqual(ord(foo[0]), 196)
+        self.assertEqual(ord(foo[1]), 133)
+        self.assertEqual(ord(foo[2]), 10)
+
+        bar = foo.decode('utf-8')
+        self.assertEqual(len(bar), 2)
+        self.assertEqual(repr(bar), "u'\\u0105\\n'")
+        self.assertEqual(ord(bar[0]), 261)
+        self.assertEqual(ord(bar[1]), 10)
+
+    def test_parse_raw_str(self):
+        foo = r'ą\n'
+        self.assertEqual(len(foo), 4, repr(foo))
+        self.assertEqual(repr(foo), "'\\xc4\\x85\\\\n'")
+        self.assertEqual(ord(foo[0]), 196)
+        self.assertEqual(ord(foo[1]), 133)
+        self.assertEqual(ord(foo[2]), 92)
+        self.assertEqual(ord(foo[3]), 110)
+
+        bar = foo.decode('utf-8')
+        self.assertEqual(len(bar), 3)
+        self.assertEqual(repr(bar), "u'\\u0105\\\\n'")
+        self.assertEqual(ord(bar[0]), 261)
+        self.assertEqual(ord(bar[1]), 92)
+        self.assertEqual(ord(bar[2]), 110)
+
 def test_main():
-    test_support.run_unittest(WrappedStrCmpTest,
+    test_support.run_unittest(
+        WrappedStrCmpTest,
         IntToStrTest,
         StringSlicingTest,
         FormatTest,
-        DisplayTest)
+        DisplayTest,
+        ParserTest)
 
 if __name__ == '__main__':
     test_main()

Modified: trunk/jython/Lib/test/test_unicode_jy.py
===================================================================
--- trunk/jython/Lib/test/test_unicode_jy.py	2008-11-06 22:37:32 UTC (rev 5550)
+++ trunk/jython/Lib/test/test_unicode_jy.py	2008-11-07 01:04:00 UTC (rev 5551)
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 """Misc unicode tests
 
 Made for Jython.
@@ -19,7 +20,37 @@
         self.assertEqual(chunker.match(quoted1, 1).groups(), (orig, u'"'))
         self.assertEqual(chunker.match(quoted2, 1).groups(), (orig, u'"'))
 
+    def test_parse_unicode(self):
+        foo = u'ą\n'
+        self.assertEqual(len(foo), 2, repr(foo))
+        self.assertEqual(repr(foo), "u'\\u0105\\n'")
+        self.assertEqual(ord(foo[0]), 261)
+        self.assertEqual(ord(foo[1]), 10)
 
+        bar = foo.encode('utf-8')
+        self.assertEqual(len(bar), 3)
+        self.assertEqual(repr(bar), "'\\xc4\\x85\\n'")
+        self.assertEqual(ord(bar[0]), 196)
+        self.assertEqual(ord(bar[1]), 133)
+        self.assertEqual(ord(bar[2]), 10)
+
+    def test_parse_raw_unicode(self):
+        foo = ur'ą\n'
+        self.assertEqual(len(foo), 3, repr(foo))
+        self.assertEqual(repr(foo), "u'\\u0105\\\\n'")
+        self.assertEqual(ord(foo[0]), 261)
+        self.assertEqual(ord(foo[1]), 92)
+        self.assertEqual(ord(foo[2]), 110)
+
+        bar = foo.encode('utf-8')
+        self.assertEqual(len(bar), 4)
+        self.assertEqual(repr(bar), "'\\xc4\\x85\\\\n'")
+        self.assertEqual(ord(bar[0]), 196)
+        self.assertEqual(ord(bar[1]), 133)
+        self.assertEqual(ord(bar[2]), 92)
+        self.assertEqual(ord(bar[3]), 110)
+
+
 def test_main():
     test_support.run_unittest(UnicodeTestCase)
 

Modified: trunk/jython/grammar/Python.g
===================================================================
--- trunk/jython/grammar/Python.g	2008-11-06 22:37:32 UTC (rev 5550)
+++ trunk/jython/grammar/Python.g	2008-11-07 01:04:00 UTC (rev 5551)
@@ -159,6 +159,8 @@
 
     private GrammarActions actions = new GrammarActions();
 
+    private String encoding;
+
     public void setErrorHandler(ErrorHandler eh) {
         this.errorHandler = eh;
         actions.setErrorHandler(eh);
@@ -180,6 +182,11 @@
         return super.recoverFromMismatchedToken(input, ttype, follow);
     }
 
+    public PythonParser(TokenStream input, String encoding) {
+        this(input);
+        this.encoding = encoding;
+    }
+
 }
 
 @rulecatch {
@@ -1235,7 +1242,7 @@
      | COMPLEX
     -> ^(COMPLEX<Num>[$COMPLEX, actions.makeComplex($COMPLEX)])
      | (S+=STRING)+ 
-    -> ^(STRING<Str>[actions.extractStringToken($S), actions.extractStrings($S)])
+    -> ^(STRING<Str>[actions.extractStringToken($S), actions.extractStrings($S, encoding)])
      ;
 
 //listmaker: test ( list_for | (',' test)* [','] )

Modified: trunk/jython/src/org/python/antlr/BaseParser.java
===================================================================
--- trunk/jython/src/org/python/antlr/BaseParser.java	2008-11-06 22:37:32 UTC (rev 5550)
+++ trunk/jython/src/org/python/antlr/BaseParser.java	2008-11-07 01:04:00 UTC (rev 5551)
@@ -29,6 +29,7 @@
     protected CharStream charStream;
     protected boolean partial;
     protected String filename;
+    protected String encoding;
     protected ErrorHandler errorHandler = new FailFastHandler();
 
     public void setAntlrErrorHandler(ErrorHandler eh) {

Modified: trunk/jython/src/org/python/antlr/ExpressionParser.java
===================================================================
--- trunk/jython/src/org/python/antlr/ExpressionParser.java	2008-11-06 22:37:32 UTC (rev 5550)
+++ trunk/jython/src/org/python/antlr/ExpressionParser.java	2008-11-07 01:04:00 UTC (rev 5551)
@@ -16,9 +16,10 @@
 
 public class ExpressionParser extends BaseParser {
 
-    public ExpressionParser(CharStream cs, String filename) {
+    public ExpressionParser(CharStream cs, String filename, String encoding) {
         this.charStream = cs;
         this.filename = filename;
+        this.encoding = encoding;
     }
 
     public modType parse() {
@@ -28,7 +29,7 @@
         CommonTokenStream tokens = new CommonTokenStream(lexer);
         PythonTokenSource indentedSource = new PythonTokenSource(tokens, filename);
         tokens = new CommonTokenStream(indentedSource);
-        PythonParser parser = new PythonParser(tokens);
+        PythonParser parser = new PythonParser(tokens, encoding);
         parser.setErrorHandler(errorHandler);
         parser.setTreeAdaptor(new PythonTreeAdaptor());
 

Modified: trunk/jython/src/org/python/antlr/GrammarActions.java
===================================================================
--- trunk/jython/src/org/python/antlr/GrammarActions.java	2008-11-06 22:37:32 UTC (rev 5550)
+++ trunk/jython/src/org/python/antlr/GrammarActions.java	2008-11-07 01:04:00 UTC (rev 5551)
@@ -414,14 +414,14 @@
         }
     }
 
-    PyString extractStrings(List s) {
+    PyString extractStrings(List s, String encoding) {
         boolean ustring = false;
         Token last = null;
         StringBuffer sb = new StringBuffer();
         Iterator iter = s.iterator();
         while (iter.hasNext()) {
             last = (Token)iter.next();
-            StringPair sp = extractString(last);
+            StringPair sp = extractString(last, encoding);
             if (sp.isUnicode()) {
                 ustring = true;
             }
@@ -433,40 +433,51 @@
         return new PyString(sb.toString());
     }
 
-    StringPair extractString(Token t) {
-        String s = t.getText();
-        char quoteChar = s.charAt(0);
-        int start=0;
+    StringPair extractString(Token t, String encoding) {
+        String string = t.getText();
+        char quoteChar = string.charAt(0);
+        int start = 0;
         boolean ustring = false;
         if (quoteChar == 'u' || quoteChar == 'U') {
             ustring = true;
             start++;
         }
-        quoteChar = s.charAt(start);
+        quoteChar = string.charAt(start);
         boolean raw = false;
         if (quoteChar == 'r' || quoteChar == 'R') {
             raw = true;
             start++;
         }
         int quotes = 3;
-        if (s.length() - start == 2) {
+        if (string.length() - start == 2) {
             quotes = 1;
         }
-        if (s.charAt(start) != s.charAt(start+1)) {
+        if (string.charAt(start) != string.charAt(start+1)) {
             quotes = 1;
         }
 
-        if (raw) {
-            return new StringPair(s.substring(quotes+start, s.length()-quotes), ustring);
+        // string is properly decoded according to the source encoding
+        String result;
+        int end = string.length() - quotes;
+        start = quotes + start;
+        // XXX: No need to re-encode when the encoding is iso-8859-1, but ParserFacade
+        // needs to normalize the encoding name
+        if (!ustring && encoding != null) {
+            // Plain strs with a specified encoding: First re-encode them back out
+            result = new PyUnicode(string.substring(start, end)).encode(encoding);
+            if (!raw) {
+                // Handle escapes in non-raw strs
+                result = PyString.decode_UnicodeEscape(result, 0, result.length(), "strict",
+                                                       ustring);
+            }
+        } else if (raw) {
+            // Raw str/unicode without an encoding (ascii): simply passthru
+            result = string.substring(start, end);
         } else {
-            StringBuffer sb = new StringBuffer(s.length());
-            char[] ca = s.toCharArray();
-            int n = ca.length-quotes;
-            int i=quotes+start;
-            int last_i=i;
-            return new StringPair(PyString.decode_UnicodeEscape(s, i, n, "strict", ustring), ustring);
-            //return decode_UnicodeEscape(s, i, n, "strict", ustring);
+            // Plain unicode: already decoded, just handle escapes
+            result = PyString.decode_UnicodeEscape(string, start, end, "strict", ustring);
         }
+        return new StringPair(result, ustring);
     }
 
     Token extractStringToken(List s) {

Modified: trunk/jython/src/org/python/antlr/InteractiveParser.java
===================================================================
--- trunk/jython/src/org/python/antlr/InteractiveParser.java	2008-11-06 22:37:32 UTC (rev 5550)
+++ trunk/jython/src/org/python/antlr/InteractiveParser.java	2008-11-07 01:04:00 UTC (rev 5551)
@@ -21,9 +21,10 @@
 
     private BufferedReader bufreader;
 
-    public InteractiveParser(BufferedReader br, String filename) {
+    public InteractiveParser(BufferedReader br, String filename, String encoding) {
         this.bufreader = br;
         this.filename = filename;
+        this.encoding = encoding;
     }
 
     public modType parse() throws IOException {
@@ -33,7 +34,7 @@
         CommonTokenStream tokens = new CommonTokenStream(lexer);
         PythonTokenSource indentedSource = new PythonTokenSource(tokens, filename, true);
         tokens = new CommonTokenStream(indentedSource);
-        PythonParser parser = new PythonParser(tokens);
+        PythonParser parser = new PythonParser(tokens, encoding);
         parser.setErrorHandler(errorHandler);
         parser.setTreeAdaptor(new PythonTreeAdaptor());
 

Modified: trunk/jython/src/org/python/antlr/ModuleParser.java
===================================================================
--- trunk/jython/src/org/python/antlr/ModuleParser.java	2008-11-06 22:37:32 UTC (rev 5550)
+++ trunk/jython/src/org/python/antlr/ModuleParser.java	2008-11-07 01:04:00 UTC (rev 5551)
@@ -15,14 +15,15 @@
 import org.python.antlr.ast.stmtType;
 
 public class ModuleParser extends BaseParser {
-    public ModuleParser(CharStream cs, String filename) {
-        this(cs, filename, false);
+    public ModuleParser(CharStream cs, String filename, String encoding) {
+        this(cs, filename, encoding, false);
     }
 
-    public ModuleParser(CharStream cs, String filename, boolean partial) {
-        this.partial = partial;
+    public ModuleParser(CharStream cs, String filename, String encoding, boolean partial) {
         this.charStream = cs;
         this.filename = filename;
+        this.encoding = encoding;
+        this.partial = partial;
     }
 
     public modType file_input() {
@@ -32,7 +33,7 @@
         CommonTokenStream tokens = new CommonTokenStream(lexer);
         PythonTokenSource indentedSource = new PythonTokenSource(tokens, filename);
         tokens = new CommonTokenStream(indentedSource);
-        PythonParser parser = new PythonParser(tokens);
+        PythonParser parser = new PythonParser(tokens, encoding);
         parser.setErrorHandler(errorHandler);
         parser.setTreeAdaptor(new PythonTreeAdaptor());
         try {

Modified: trunk/jython/src/org/python/core/ParserFacade.java
===================================================================
--- trunk/jython/src/org/python/core/ParserFacade.java	2008-11-06 22:37:32 UTC (rev 5550)
+++ trunk/jython/src/org/python/core/ParserFacade.java	2008-11-07 01:04:00 UTC (rev 5551)
@@ -7,9 +7,7 @@
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
-import java.io.LineNumberReader;
 import java.io.Reader;
-import java.io.StringReader;
 import java.io.UnsupportedEncodingException;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
@@ -34,11 +32,11 @@
 import org.python.core.io.StreamIO;
 import org.python.core.io.TextIOInputStream;
 import org.python.core.io.UniversalIOWrapper;
+import org.python.core.util.StringUtil;
 
 /**
  * Facade for the classes in the org.python.antlr package.
  */
-
 public class ParserFacade {
 
     private static int MARK_LIMIT = 100000;
@@ -107,14 +105,14 @@
                                  // validPartialSentence
         if (kind.equals("eval")) {
             CharStream cs = new NoCloseReaderStream(reader);
-            ExpressionParser e = new ExpressionParser(cs, filename);
+            ExpressionParser e = new ExpressionParser(cs, filename, cflags.encoding);
             return e.parse();
         } else if (kind.equals("single")) {
-            InteractiveParser i = new InteractiveParser(reader, filename);
+            InteractiveParser i = new InteractiveParser(reader, filename, cflags.encoding);
             return i.parse();
         } else if (kind.equals("exec")) {
             CharStream cs = new NoCloseReaderStream(reader);
-            ModuleParser g = new ModuleParser(cs, filename);
+            ModuleParser g = new ModuleParser(cs, filename, cflags.encoding);
             return g.file_input();
         } else {
             throw Py.ValueError("parse kind must be eval, exec, or single");
@@ -129,7 +127,7 @@
         try {
             // prepBufReader takes care of encoding detection and universal
             // newlines:
-            bufReader = prepBufreader(stream, cflags, filename);
+            bufReader = prepBufReader(stream, cflags, filename);
             return parse(bufReader, kind, filename, cflags );
         } catch (Throwable t) {
             throw fixParseError(bufReader, t, filename);
@@ -144,7 +142,7 @@
                                 CompilerFlags cflags) {
         BufferedReader bufReader = null;
         try {
-            bufReader = prepBufReader(string);
+            bufReader = prepBufReader(string, cflags, filename);
             return parse(bufReader, kind, filename, cflags);
         } catch (Throwable t) {
             throw fixParseError(bufReader, t, filename);
@@ -161,7 +159,7 @@
         // XXX: What's the idea of the stdprompt argument?
         BufferedReader reader = null;
         try {
-            reader = prepBufReader(string);
+            reader = prepBufReader(string, cflags, filename);
             return parse(reader, kind, filename, cflags);
         } catch (Throwable t) {
             PyException p = fixParseError(reader, t, filename);
@@ -194,75 +192,62 @@
             }
 
         } catch (Exception e) {
-            System.out.println(e);
             return lexer.eofWhileNested;
         }
         return true;
     }
 
-    private static BufferedReader prepBufreader(InputStream istream,
-                                                CompilerFlags cflags,
+    private static BufferedReader prepBufReader(InputStream input, CompilerFlags cflags,
                                                 String filename) throws IOException {
-        boolean bom = false;
-        String encoding = null;
-        InputStream bstream = new BufferedInputStream(istream);
-        bom = adjustForBOM(bstream);
-        encoding = readEncoding(bstream);
+        input = new BufferedInputStream(input);
+        boolean bom = adjustForBOM(input);
+        String encoding = readEncoding(input);
 
         if (encoding == null) {
             if (bom) {
-                encoding = "UTF-8";
+                encoding = "utf-8";
             } else if (cflags != null && cflags.encoding != null) {
                 encoding = cflags.encoding;
             }
         }
+        if (cflags.source_is_utf8) {
+            if (encoding != null) {
+                throw new ParseException("encoding declaration in Unicode string");
+            }
+            encoding = "utf-8";
+        }
+        cflags.encoding = encoding;
 
         // Enable universal newlines mode on the input
-        StreamIO rawIO = new StreamIO(bstream, true);
+        StreamIO rawIO = new StreamIO(input, true);
         org.python.core.io.BufferedReader bufferedIO =
                 new org.python.core.io.BufferedReader(rawIO, 0);
         UniversalIOWrapper textIO = new UniversalIOWrapper(bufferedIO);
-        bstream = new TextIOInputStream(textIO);
+        input = new TextIOInputStream(textIO);
 
         Reader reader;
-        if(encoding != null) {
-            try {
-                reader = new InputStreamReader(bstream, encoding);
-            } catch(UnsupportedEncodingException exc) {
-                throw new PySyntaxError("Encoding '" + encoding + "' isn't supported by this JVM.", 0, 0, "", filename);
-            }
-        } else {
-            try {
-                // Default to ISO-8859-1 to get bytes off the input stream since it leaves their values alone.
-                reader = new InputStreamReader(bstream, "ISO-8859-1");
-            } catch(UnsupportedEncodingException e) {
-                // This JVM is whacked, it doesn't even have iso-8859-1
-                throw Py.SystemError("Java couldn't find the ISO-8859-1 encoding");
-            }
+        try {
+            // Using iso-8859-1 for the raw bytes when no encoding was specified
+            reader = new InputStreamReader(input, encoding == null ? "iso-8859-1" : encoding);
+        } catch (UnsupportedEncodingException exc) {
+            throw new PySyntaxError("Unknown encoding: " + encoding, 1, 0, "", filename);
         }
-
-        BufferedReader bufreader = new BufferedReader(reader);
-        return bufreader;
+        return new BufferedReader(reader);
     }
 
-    private static BufferedReader prepBufReader(String string) throws IOException {
-        BufferedReader bufReader;
-
-        // LineNumberReader takes care of universal newlines
-        bufReader = new LineNumberReader(new StringReader(string));
-
-        // If the input is a decoded string (implied from the String argument
-        // for prepBufReader), it can't have an encoding declaration.
-        bufReader.mark(MARK_LIMIT);
-        if (findEncoding(bufReader) != null) {
-            throw new ParseException("encoding declaration in Unicode string");
+    private static BufferedReader prepBufReader(String string, CompilerFlags cflags,
+                                                String filename) throws IOException {
+        if (cflags.source_is_utf8) {
+            // Passed unicode, re-encode the String to raw bytes
+            // NOTE: This could be more efficient if we duplicate
+            // prepBufReader/adjustForBOM/readEncoding to work on Readers, instead of
+            // encoding
+            string = new PyUnicode(string).encode("utf-8");
         }
-        bufReader.reset();
-
-        return bufReader;
+        InputStream input = new ByteArrayInputStream(StringUtil.toBytes(string));
+        return prepBufReader(input, cflags, filename);
     }
 
-
     /**
      * Check for a BOM mark at the begginning of stream.  If there is a BOM
      * mark, advance the stream passed it.  If not, reset() to start at the

Modified: trunk/jython/src/org/python/core/Py.java
===================================================================
--- trunk/jython/src/org/python/core/Py.java	2008-11-06 22:37:32 UTC (rev 5550)
+++ trunk/jython/src/org/python/core/Py.java	2008-11-07 01:04:00 UTC (rev 5551)
@@ -1190,6 +1190,7 @@
 
     public static void exec(PyObject o, PyObject globals, PyObject locals) {
         PyCode code;
+        int flags = 0;
         if (o instanceof PyCode) {
             code = (PyCode) o;
             if (locals == null && o instanceof PyTableCode && ((PyTableCode) o).hasFreevars()) {
@@ -1198,6 +1199,9 @@
         } else {
             String contents = null;
             if (o instanceof PyString) {
+                if (o instanceof PyUnicode) {
+                    flags |= PyTableCode.PyCF_SOURCE_IS_UTF8;
+                }
                 contents = o.toString();
             } else if (o instanceof PyFile) {
                 PyFile fp = (PyFile) o;
@@ -1210,7 +1214,7 @@
                         "exec: argument 1 must be string, code or file object");
             }
             code = (PyCode)Py.compile_flags(contents, "<string>", "exec",
-                    Py.getCompilerFlags());
+                                            getCompilerFlags(flags, false));
         }
         Py.runCode(code, locals, globals);
     }
@@ -1683,31 +1687,6 @@
         return Py.compile_flags(node, filename, kind, cflags);
     }
 
-    /**
-     * Compiles python source code coming from bytestrings
-     */
-    public static PyObject compile_flags(byte[] bytes, String filename,
-                                         String kind, CompilerFlags cflags) {
-        for(int i = 0; i < bytes.length; i++) {
-            if (bytes[i] == 0) {
-                throw Py.TypeError("compile() expected string without null bytes");
-            }
-        }
-        byte[] data;
-        if (cflags != null && cflags.dont_imply_dedent) {
-            data = new byte[bytes.length + 1];
-            System.arraycopy(bytes, 0, data, 0, bytes.length);
-            data[data.length - 1] = '\n';
-        } else {
-            data = new byte[bytes.length + 2];
-            System.arraycopy(bytes, 0, data, 0, bytes.length);
-            data[data.length - 1] = data[data.length - 2] = '\n';
-        }
-        modType node = ParserFacade.parse(new ByteArrayInputStream(data), kind,
-                                          filename, cflags);
-        return Py.compile_flags(node, filename, kind, cflags);
-    }
-
     public static PyObject compile_command_flags(String string, String filename,
             String kind, CompilerFlags cflags, boolean stdprompt) {
         modType node = ParserFacade.partialParse(string + "\n", kind, filename,

Modified: trunk/jython/src/org/python/core/__builtin__.java
===================================================================
--- trunk/jython/src/org/python/core/__builtin__.java	2008-11-06 22:37:32 UTC (rev 5550)
+++ trunk/jython/src/org/python/core/__builtin__.java	2008-11-07 01:04:00 UTC (rev 5551)
@@ -1,8 +1,6 @@
 // Copyright (c) Corporation for National Research Initiatives
 package org.python.core;
 
-import java.io.ByteArrayInputStream;
-import java.io.InputStream;
 import java.util.Iterator;
 import java.util.Map;
 
@@ -264,26 +262,6 @@
 
     public PyObject fancyCall(PyObject[] args) {
         switch (this.index) {
-            case 44:
-                if (args.length > 5) {
-                    throw info.unexpectedCall(args.length, false);
-                }
-                int flags = 0;
-                if (args.length > 3) {
-                    flags = Py.py2int(args[3]);
-                }
-                boolean dont_inherit = false;
-                if (args.length > 4) {
-                    dont_inherit = Py.py2boolean(args[4]);
-                }
-                modType ast = py2node(args[0]);
-                if (ast != null) {
-                    return __builtin__.compile(ast, args[1].toString(), args[2].toString(), flags, dont_inherit);
-                }
-                if (args[0] instanceof PyUnicode) {
-                    flags += PyTableCode.PyCF_SOURCE_IS_UTF8;
-                }
-                return __builtin__.compile((PyString)args[0], args[1].toString(), args[2].toString(), flags, dont_inherit);
             case 29:
                 return __builtin__.map(args);
             case 43:
@@ -296,22 +274,6 @@
     public PyObject getModule() {
         return module;
     }
-
-    /**
-     * @returns modType if obj is a wrapper around an AST modType else returns
-     *          null
-     *
-     * XXX: Reaches into implementation details -- needs to be reviewed if our
-     *      java integration changes.
-     */
-    private static modType py2node(PyObject obj) {
-        Object node = obj.__tojava__(modType.class);
-        if (node == Py.NoConversion) {
-            return null;
-        }
-        return (modType)node;
-    }
-
 }
 
 /**
@@ -374,7 +336,6 @@
         dict.__setitem__("range", new BuiltinFunctions("range", 2, 1, 3));
         dict.__setitem__("sum", new BuiltinFunctions("sum", 12, 1, 2));
         dict.__setitem__("unichr", new BuiltinFunctions("unichr", 6, 1));
-        dict.__setitem__("compile", new BuiltinFunctions("compile", 44, 3, -1));
         dict.__setitem__("delattr", new BuiltinFunctions("delattr", 15, 2));
         dict.__setitem__("dir", new BuiltinFunctions("dir", 16, 0, 1));
         dict.__setitem__("divmod", new BuiltinFunctions("divmod", 17, 2));
@@ -402,6 +363,7 @@
         dict.__setitem__("setattr", new BuiltinFunctions("setattr", 39, 3));
         dict.__setitem__("vars", new BuiltinFunctions("vars", 41, 0, 1));
         dict.__setitem__("zip", new BuiltinFunctions("zip", 43, 0, -1));
+        dict.__setitem__("compile", new CompileFunction());
         dict.__setitem__("reversed", new BuiltinFunctions("reversed", 45, 1));
         dict.__setitem__("__import__", new ImportFunction());
         dict.__setitem__("sorted", new SortedFunction());
@@ -475,38 +437,6 @@
         throw Py.TypeError("number coercion failed");
     }
 
-    public static PyObject compile(PyString data, String filename, String kind) {
-        if (data instanceof PyUnicode) {
-            return Py.compile_flags(data.toString(), filename, kind, Py.getCompilerFlags());
-        } else {
-            return Py.compile_flags(data.toBytes(), filename, kind,  Py.getCompilerFlags());
-        }
-    }
-
-    public static PyObject compile(modType node, String filename, String kind) {
-        return Py.compile_flags(node, filename, kind, Py.getCompilerFlags());
-    }
-
-    public static PyObject compile(PyString data, String filename, String kind, int flags, boolean dont_inherit) {
-        if ((flags & ~PyTableCode.CO_ALL_FEATURES) != 0) {
-            throw Py.ValueError("compile(): unrecognised flags");
-        }
-        if (data instanceof PyUnicode) {
-            return Py.compile_flags(data.toString(), filename, kind,
-                                    Py.getCompilerFlags(flags, dont_inherit));
-        } else {
-            return Py.compile_flags(data.toBytes(), filename, kind,
-                                    Py.getCompilerFlags(flags, dont_inherit));
-        }
-    }
-
-    public static PyObject compile(modType node, String filename, String kind, int flags, boolean dont_inherit) {
-        if ((flags & ~PyTableCode.CO_ALL_FEATURES) != 0) {
-            throw Py.ValueError("compile(): unrecognised flags");
-        }
-        return Py.compile_flags(node, filename, kind, Py.getCompilerFlags(flags, dont_inherit));
-    }
-
     public static void delattr(PyObject o, String n) {
         o.__delattr__(n);
     }
@@ -562,7 +492,7 @@
             code = (PyCode) o;
         } else {
             if (o instanceof PyString) {
-                code = (PyCode)compile((PyString)o, "<string>", "eval");
+                code = (PyCode)CompileFunction.compile((PyString)o, "<string>", "eval");
             } else {
                 throw Py.TypeError("eval: argument 1 must be string or code object");
             }
@@ -1562,3 +1492,77 @@
         return new PyFloat(tmp / multiple);
     }
 }
+
+class CompileFunction extends PyBuiltinFunction {
+    CompileFunction() {
+        super("compile",
+              "compile(source, filename, mode[, flags[, dont_inherit]]) -> code object\n\n"
+              + "Compile the source string (a Python module, statement or expression)\n"
+              + "into a code object that can be executed by the exec statement or eval().\n"
+              + "The filename will be used for run-time error messages.\n"
+              + "The mode must be 'exec' to compile a module, 'single' to compile a\n"
+              + "single (interactive) statement, or 'eval' to compile an expression.\n"
+              + "The flags argument, if present, controls which future statements influence\n"
+              + "the compilation of the code.\n"
+              + "The dont_inherit argument, if non-zero, stops the compilation inheriting\n"
+              + "the effects of any future statements in effect in the code calling\n"
+              + "compile; if absent or zero these statements do influence the compilation,\n"
+              + "in addition to any features explicitly specified.");
+    }
+
+    public PyObject __call__(PyObject args[], String kwds[]) {
+        ArgParser ap = new ArgParser("compile", args, kwds,
+                                     new String[] {"source", "filename", "mode", "flags",
+                                                   "dont_inherit"},
+                                     3);
+        PyObject source = ap.getPyObject(0);
+        String filename = ap.getString(1);
+        String mode = ap.getString(2);
+        int flags = ap.getInt(3, 0);
+        boolean dont_inherit = ap.getPyObject(4, Py.False).__nonzero__();
+        return compile(source, filename, mode, flags, dont_inherit);
+    }
+
+    public static PyObject compile(PyObject source, String filename, String mode) {
+        return compile(source, filename, mode, 0, false);
+    }
+
+    public static PyObject compile(PyObject source, String filename, String mode, int flags,
+                                   boolean dont_inherit) {
+        if ((flags & ~PyTableCode.CO_ALL_FEATURES) != 0) {
+            throw Py.ValueError("compile(): unrecognised flags");
+        }
+        if (!mode.equals("exec") && !mode.equals("eval") && !mode.equals("single")) {
+            throw Py.ValueError("compile() arg 3 must be 'exec' or 'eval' or 'single'");
+        }
+
+        modType ast = py2node(source);
+        if (ast != null) {
+            return Py.compile_flags(ast, filename, mode, Py.getCompilerFlags(flags, dont_inherit));
+        }
+
+        if (!(source instanceof PyString)) {
+            throw Py.TypeError("expected a readable buffer object");
+        }
+        if (source instanceof PyUnicode) {
+            flags |= PyTableCode.PyCF_SOURCE_IS_UTF8;
+        }
+        return Py.compile_flags(((PyString)source).toString(), filename, mode,
+                                Py.getCompilerFlags(flags, dont_inherit));
+    }
+
+    /**
+     * @returns modType if obj is a wrapper around an AST modType else returns
+     *          null
+     *
+     * XXX: Reaches into implementation details -- needs to be reviewed if our
+     *      java integration changes.
+     */
+    private static modType py2node(PyObject obj) {
+        Object node = obj.__tojava__(modType.class);
+        if (node == Py.NoConversion) {
+            return null;
+        }
+        return (modType)node;
+    }
+}


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.