From: <cg...@us...> - 2009-09-11 06:55:49
|
Revision: 6785 http://jython.svn.sourceforge.net/jython/?rev=6785&view=rev Author: cgroves Date: 2009-09-11 06:55:34 +0000 (Fri, 11 Sep 2009) Log Message: ----------- Use the builtin codecs when nothing is found in the registry. Fixes issue 1458. Modified Paths: -------------- trunk/jython/NEWS trunk/jython/src/org/python/core/codecs.java Added Paths: ----------- trunk/jython/Lib/test/print_sans_lib.py trunk/jython/Lib/test/test_codecs_jy.py Added: trunk/jython/Lib/test/print_sans_lib.py =================================================================== --- trunk/jython/Lib/test/print_sans_lib.py (rev 0) +++ trunk/jython/Lib/test/print_sans_lib.py 2009-09-11 06:55:34 UTC (rev 6785) @@ -0,0 +1,4 @@ +import sys +sys.path = [path for path in sys.path if not path.startswith('/')] +encoded = u'hi'.encode("utf-8") +encoded.decode('utf-8') Added: trunk/jython/Lib/test/test_codecs_jy.py =================================================================== --- trunk/jython/Lib/test/test_codecs_jy.py (rev 0) +++ trunk/jython/Lib/test/test_codecs_jy.py 2009-09-11 06:55:34 UTC (rev 6785) @@ -0,0 +1,19 @@ +import subprocess +import sys +import test_support +import unittest + +class AccessBuiltinCodecs(unittest.TestCase): + def test_print_sans_lib(self): + '''Encodes and decodes using utf-8 after in an environment without the standard library + + Checks that the builtin utf-8 codec is always available: http://bugs.jython.org/issue1458''' + subprocess.call([sys.executable, "-J-Dpython.cachedir.skip=true", + "-J-Dpython.security.respectJavaAccessibility=false", + test_support.findfile('print_sans_lib.py')]) + +def test_main(): + test_support.run_unittest(AccessBuiltinCodecs) + +if __name__ == "__main__": + test_main() Modified: trunk/jython/NEWS =================================================================== --- trunk/jython/NEWS 2009-09-11 06:44:19 UTC (rev 6784) +++ trunk/jython/NEWS 2009-09-11 06:55:34 UTC (rev 6785) @@ -9,6 +9,7 @@ - [ 1382 ] __cmp__ on certain types raises ArrayStoreException - [ 1443 ] Can't update() hashlib.sha1() with array.array('c') - [ 1444 ] Can't zlib.compress() with array.array('c') + - [ 1458 ] Builtin codecs aren't available without standard lib Jython 2.5.1rc1 New Features Modified: trunk/jython/src/org/python/core/codecs.java =================================================================== --- trunk/jython/src/org/python/core/codecs.java 2009-09-11 06:44:19 UTC (rev 6784) +++ trunk/jython/src/org/python/core/codecs.java 2009-09-11 06:55:34 UTC (rev 6785) @@ -9,6 +9,7 @@ import java.nio.ByteBuffer; import java.nio.charset.Charset; + import java.util.ArrayList; import java.util.Iterator; @@ -113,8 +114,7 @@ } } - public static PyObject decode(PyString v, String encoding, - String errors) { + public static PyObject decode(PyString v, String encoding, String errors) { if (encoding == null) { encoding = getDefaultEncoding(); } else { @@ -127,11 +127,27 @@ /* Shortcut for ascii encoding */ if (encoding.equals("ascii")) { - return new PyUnicode(PyUnicode_DecodeASCII(v.toString(), v.__len__(), errors), true); + return wrapDecodeResult(PyUnicode_DecodeASCII(v.toString(), v.__len__(), errors)); } /* Decode via the codec registry */ - PyObject decoder = lookup(encoding).__getitem__(1); + PyObject decoder; + try { + decoder = lookup(encoding).__getitem__(1); + } catch (PyException ex) { + if (ex.match(Py.LookupError)) { + // If we couldn't find an encoding, see if we have a builtin + if (encoding.equals("utf-8")) { + return wrapDecodeResult(PyUnicode_DecodeUTF8(v.toString(), errors)); + } else if(encoding.equals("utf-7")) { + return wrapDecodeResult(PyUnicode_DecodeUTF7(v.toString(), errors)); + } else if(encoding.equals("latin-1")) { + return wrapDecodeResult(PyUnicode_DecodeLatin1(v.toString(), v.__len__(), + errors)); + } + } + throw ex; + } PyObject result; if (errors != null) { result = decoder.__call__(v, new PyString(errors)); @@ -145,6 +161,10 @@ return result.__getitem__(0); } + private static PyUnicode wrapDecodeResult(String result) { + return new PyUnicode(result, true); + } + public static String encode(PyString v, String encoding, String errors) { if (encoding == null) { @@ -165,8 +185,21 @@ return PyUnicode_EncodeASCII(v.toString(), v.__len__(), errors); } - /* Decode via the codec registry */ - PyObject encoder = lookup(encoding).__getitem__(0); + /* Encode via the codec registry */ + PyObject encoder; + try { + encoder = lookup(encoding).__getitem__(0); + } catch (PyException ex) { + if (ex.match(Py.LookupError)) { + // If we couldn't find an encoding, see if we have a builtin + if (encoding.equals("utf-8")) { + return PyUnicode_EncodeUTF8(v.toString(), errors); + } else if(encoding.equals("utf-7")) { + return codecs.PyUnicode_EncodeUTF7(v.toString(), false, false, errors); + } + } + throw ex; + } PyObject result; if (errors != null) { result = encoder.__call__(v, new PyString(errors)); @@ -181,7 +214,7 @@ if (encoded instanceof PyString) { return encoded.toString(); } else { - throw Py.TypeError("decoder did not return a string/unicode object (type=" + throw Py.TypeError("encoder did not return a string/unicode object (type=" + encoded.getType().fastGetName() + ")"); } } @@ -415,8 +448,7 @@ // note that we follow CPython 2.5 exactly here - it does not support surrogates, // but has to process as-if they are there for replacement purposes // fortunately no one really cares about utf-7 - public static String PyUnicode_DecodeUTF7(String str, - String errors) { + public static String PyUnicode_DecodeUTF7(String str, String errors) { int s = 0; int e = str.length(); boolean inShift = false; @@ -551,9 +583,9 @@ } public static String PyUnicode_EncodeUTF7(String str, - boolean encodeSetO, - boolean encodeWhiteSpace, - String errors) { + boolean encodeSetO, + boolean encodeWhiteSpace, + String errors) { int size = str.length(); if (size == 0) { @@ -786,13 +818,11 @@ return v.toString(); } - public static String PyUnicode_DecodeASCII(String str, int size, - String errors) { + public static String PyUnicode_DecodeASCII(String str, int size, String errors) { return PyUnicode_DecodeIntLimited(str, size, errors, "ascii", 128); } - public static String PyUnicode_DecodeLatin1(String str, int size, - String errors) { + public static String PyUnicode_DecodeLatin1(String str, int size, String errors) { return PyUnicode_DecodeIntLimited(str, size, errors, "latin-1", 256); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |