[Jython-checkins] SF.net SVN: jython:[7017] trunk/jython

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Revision: 7017
          http://jython.svn.sourceforge.net/jython/?rev=7017&view=rev
Author:   pjenvey
Date:     2010-04-11 17:37:29 +0000 (Sun, 11 Apr 2010)

Log Message:
-----------
fix unicode.translate not properly dealing in code points
fixes #1483

Modified Paths:
--------------
    trunk/jython/Lib/test/test_unicode_jy.py
    trunk/jython/NEWS
    trunk/jython/src/org/python/core/PyUnicode.java
    trunk/jython/src/org/python/modules/_codecs.java

Modified: trunk/jython/Lib/test/test_unicode_jy.py
===================================================================

--- trunk/jython/Lib/test/test_unicode_jy.py	2010-04-11 03:18:37 UTC (rev 7016)
+++ trunk/jython/Lib/test/test_unicode_jy.py	2010-04-11 17:37:29 UTC (rev 7017)
@@ -138,7 +138,17 @@
         self.assertEquals('\xe2\x82\xac', encoded_euro)
         self.assertEquals(EURO_SIGN, encoded_euro.decode('utf-8'))
 
+    def test_translate(self):
+        # http://bugs.jython.org/issue1483
+        self.assertEqual(
+            u'\u0443\u043a\u0430\u0437\u0430\u0442\u044c'.translate({}),
+            u'\u0443\u043a\u0430\u0437\u0430\u0442\u044c')
+        self.assertEqual(u'\u0443oo'.translate({0x443: 102}), u'foo')
+        self.assertEqual(
+            unichr(sys.maxunicode).translate({sys.maxunicode: 102}),
+            u'f')
 
+
 class UnicodeFormatTestCase(unittest.TestCase):
 
     def test_unicode_mapping(self):

Modified: trunk/jython/NEWS
===================================================================
--- trunk/jython/NEWS	2010-04-11 03:18:37 UTC (rev 7016)
+++ trunk/jython/NEWS	2010-04-11 17:37:29 UTC (rev 7017)
@@ -28,6 +28,7 @@
     - [ 1479 ] xml parser file lock
     - [ 1582 ] com.ziclix.python.sql.PyConnection leaks memory
     - [ 1520 ] os.listdir doesn't return unicode when requested
+    - [ 1483 ] optparse std module dies on non-ASCII unicode data
     - Fix runtime issues during exitfuncs triggered via SystemRestart (such as
       during Django or Pylons development mode reloading)
     - Fix pickling of collections.defaultdict objects

Modified: trunk/jython/src/org/python/core/PyUnicode.java
===================================================================
--- trunk/jython/src/org/python/core/PyUnicode.java	2010-04-11 03:18:37 UTC (rev 7016)
+++ trunk/jython/src/org/python/core/PyUnicode.java	2010-04-11 17:37:29 UTC (rev 7017)
@@ -1147,8 +1147,7 @@
 
     @ExposedMethod(doc = BuiltinDocs.unicode_translate_doc)
     final PyObject unicode_translate(PyObject table) {
-        String trans = _codecs.translate_charmap(string, "ignore", table, true).__getitem__(0).toString();
-        return new PyUnicode(trans);
+        return _codecs.translateCharmap(this, "ignore", table);
     }
 
     // these tests need to be UTF-16 aware because they are character-by-character tests,

Modified: trunk/jython/src/org/python/modules/_codecs.java
===================================================================
--- trunk/jython/src/org/python/modules/_codecs.java	2010-04-11 03:18:37 UTC (rev 7016)
+++ trunk/jython/src/org/python/modules/_codecs.java	2010-04-11 17:37:29 UTC (rev 7017)
@@ -9,6 +9,7 @@
 
 import java.nio.ByteBuffer;
 import java.nio.charset.Charset;
+import java.util.Iterator;
 
 import org.python.core.Py;
 import org.python.core.PyDictionary;
@@ -183,60 +184,38 @@
     }
 
     // parallel to CPython's PyUnicode_TranslateCharmap
-    public static PyTuple translate_charmap(String str,
-            String errors,
-            PyObject mapping, boolean ignoreUnmapped) {
+    public static PyObject translateCharmap(PyUnicode str, String errors, PyObject mapping) {
+        StringBuilder buf = new StringBuilder(str.toString().length());
 
-        int size = str.length();
-        StringBuilder v = new StringBuilder(size);
-        for (int i = 0; i < size; i++) {
-            char ch = str.charAt(i);
-            if (ch > 0xFF) {
-                i = codecs.insertReplacementAndGetResume(v,
-                        errors,
-                        "charmap",
-                        str,
-                        i,
-                        i + 1,
-                        "ordinal not in range(255)") - 1;
-                continue;
-            }
-            PyObject w = Py.newInteger(ch);
-            PyObject x = mapping.__finditem__(w);
-            if (x == null) {
-                if (ignoreUnmapped) {
-                    v.append(ch);
-                } else {
-                    i = codecs.insertReplacementAndGetResume(v, errors, "charmap", str, i, i + 1, "no mapping found") - 1;
-                }
-                continue;
-            }
-            /* Apply mapping */
-            if (x instanceof PyInteger) {
-                int value = ((PyInteger) x).getValue();
+        for (Iterator<Integer> iter = str.newSubsequenceIterator(); iter.hasNext();) {
+            int codePoint = iter.next();
+            PyObject result = mapping.__finditem__(Py.newInteger(codePoint));
+            if (result == null) {
+                // No mapping found means: use 1:1 mapping
+                buf.appendCodePoint(codePoint);
+            } else if (result == Py.None) {
+                // XXX: We don't support the fancier error handling CPython does here of
+                // capturing regions of chars removed by the None mapping to optionally
+                // pass to an error handler. Though we don't seem to even use this
+                // functionality anywhere either
+                ;
+            } else if (result instanceof PyInteger) {
+                int value = result.asInt();
                 if (value < 0 || value > PySystemState.maxunicode) {
-                    throw Py.TypeError("character mapping must return " + "integer greater than 0 and less than sys.maxunicode");
+                    throw Py.TypeError(String.format("character mapping must be in range(0x%x)",
+                                                     PySystemState.maxunicode + 1));
                 }
-                v.append((char) value);
-            } else if (x == Py.None) {
-                i = codecs.insertReplacementAndGetResume(v,
-                        errors,
-                        "charmap",
-                        str,
-                        i,
-                        i + 1,
-                        "character maps to <undefined>") - 1;
-            } else if (x instanceof PyUnicode) {
-                v.append(x.toString());
+                buf.appendCodePoint(value);
+            } else if (result instanceof PyUnicode) {
+                buf.append(result.toString());
             } else {
-                /* wrong return value */
-                throw Py.TypeError("character mapping must return " + "integer, None or unicode");
+                // wrong return value
+                throw Py.TypeError("character mapping must return integer, None or unicode");
             }
         }
-        return decode_tuple(v.toString(), size);
+        return new PyUnicode(buf.toString());
     }
-    
-    
+
     public static PyTuple charmap_encode(String str, String errors,
             PyObject mapping) {
         //Default to Latin-1


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.