From: <pj...@us...> - 2010-04-11 17:37:36
|
Revision: 7017 http://jython.svn.sourceforge.net/jython/?rev=7017&view=rev Author: pjenvey Date: 2010-04-11 17:37:29 +0000 (Sun, 11 Apr 2010) Log Message: ----------- fix unicode.translate not properly dealing in code points fixes #1483 Modified Paths: -------------- trunk/jython/Lib/test/test_unicode_jy.py trunk/jython/NEWS trunk/jython/src/org/python/core/PyUnicode.java trunk/jython/src/org/python/modules/_codecs.java Modified: trunk/jython/Lib/test/test_unicode_jy.py =================================================================== --- trunk/jython/Lib/test/test_unicode_jy.py 2010-04-11 03:18:37 UTC (rev 7016) +++ trunk/jython/Lib/test/test_unicode_jy.py 2010-04-11 17:37:29 UTC (rev 7017) @@ -138,7 +138,17 @@ self.assertEquals('\xe2\x82\xac', encoded_euro) self.assertEquals(EURO_SIGN, encoded_euro.decode('utf-8')) + def test_translate(self): + # http://bugs.jython.org/issue1483 + self.assertEqual( + u'\u0443\u043a\u0430\u0437\u0430\u0442\u044c'.translate({}), + u'\u0443\u043a\u0430\u0437\u0430\u0442\u044c') + self.assertEqual(u'\u0443oo'.translate({0x443: 102}), u'foo') + self.assertEqual( + unichr(sys.maxunicode).translate({sys.maxunicode: 102}), + u'f') + class UnicodeFormatTestCase(unittest.TestCase): def test_unicode_mapping(self): Modified: trunk/jython/NEWS =================================================================== --- trunk/jython/NEWS 2010-04-11 03:18:37 UTC (rev 7016) +++ trunk/jython/NEWS 2010-04-11 17:37:29 UTC (rev 7017) @@ -28,6 +28,7 @@ - [ 1479 ] xml parser file lock - [ 1582 ] com.ziclix.python.sql.PyConnection leaks memory - [ 1520 ] os.listdir doesn't return unicode when requested + - [ 1483 ] optparse std module dies on non-ASCII unicode data - Fix runtime issues during exitfuncs triggered via SystemRestart (such as during Django or Pylons development mode reloading) - Fix pickling of collections.defaultdict objects Modified: trunk/jython/src/org/python/core/PyUnicode.java =================================================================== --- trunk/jython/src/org/python/core/PyUnicode.java 2010-04-11 03:18:37 UTC (rev 7016) +++ trunk/jython/src/org/python/core/PyUnicode.java 2010-04-11 17:37:29 UTC (rev 7017) @@ -1147,8 +1147,7 @@ @ExposedMethod(doc = BuiltinDocs.unicode_translate_doc) final PyObject unicode_translate(PyObject table) { - String trans = _codecs.translate_charmap(string, "ignore", table, true).__getitem__(0).toString(); - return new PyUnicode(trans); + return _codecs.translateCharmap(this, "ignore", table); } // these tests need to be UTF-16 aware because they are character-by-character tests, Modified: trunk/jython/src/org/python/modules/_codecs.java =================================================================== --- trunk/jython/src/org/python/modules/_codecs.java 2010-04-11 03:18:37 UTC (rev 7016) +++ trunk/jython/src/org/python/modules/_codecs.java 2010-04-11 17:37:29 UTC (rev 7017) @@ -9,6 +9,7 @@ import java.nio.ByteBuffer; import java.nio.charset.Charset; +import java.util.Iterator; import org.python.core.Py; import org.python.core.PyDictionary; @@ -183,60 +184,38 @@ } // parallel to CPython's PyUnicode_TranslateCharmap - public static PyTuple translate_charmap(String str, - String errors, - PyObject mapping, boolean ignoreUnmapped) { + public static PyObject translateCharmap(PyUnicode str, String errors, PyObject mapping) { + StringBuilder buf = new StringBuilder(str.toString().length()); - int size = str.length(); - StringBuilder v = new StringBuilder(size); - for (int i = 0; i < size; i++) { - char ch = str.charAt(i); - if (ch > 0xFF) { - i = codecs.insertReplacementAndGetResume(v, - errors, - "charmap", - str, - i, - i + 1, - "ordinal not in range(255)") - 1; - continue; - } - PyObject w = Py.newInteger(ch); - PyObject x = mapping.__finditem__(w); - if (x == null) { - if (ignoreUnmapped) { - v.append(ch); - } else { - i = codecs.insertReplacementAndGetResume(v, errors, "charmap", str, i, i + 1, "no mapping found") - 1; - } - continue; - } - /* Apply mapping */ - if (x instanceof PyInteger) { - int value = ((PyInteger) x).getValue(); + for (Iterator<Integer> iter = str.newSubsequenceIterator(); iter.hasNext();) { + int codePoint = iter.next(); + PyObject result = mapping.__finditem__(Py.newInteger(codePoint)); + if (result == null) { + // No mapping found means: use 1:1 mapping + buf.appendCodePoint(codePoint); + } else if (result == Py.None) { + // XXX: We don't support the fancier error handling CPython does here of + // capturing regions of chars removed by the None mapping to optionally + // pass to an error handler. Though we don't seem to even use this + // functionality anywhere either + ; + } else if (result instanceof PyInteger) { + int value = result.asInt(); if (value < 0 || value > PySystemState.maxunicode) { - throw Py.TypeError("character mapping must return " + "integer greater than 0 and less than sys.maxunicode"); + throw Py.TypeError(String.format("character mapping must be in range(0x%x)", + PySystemState.maxunicode + 1)); } - v.append((char) value); - } else if (x == Py.None) { - i = codecs.insertReplacementAndGetResume(v, - errors, - "charmap", - str, - i, - i + 1, - "character maps to <undefined>") - 1; - } else if (x instanceof PyUnicode) { - v.append(x.toString()); + buf.appendCodePoint(value); + } else if (result instanceof PyUnicode) { + buf.append(result.toString()); } else { - /* wrong return value */ - throw Py.TypeError("character mapping must return " + "integer, None or unicode"); + // wrong return value + throw Py.TypeError("character mapping must return integer, None or unicode"); } } - return decode_tuple(v.toString(), size); + return new PyUnicode(buf.toString()); } - - + public static PyTuple charmap_encode(String str, String errors, PyObject mapping) { //Default to Latin-1 This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |