From: <zy...@us...> - 2008-10-19 22:04:59
|
Revision: 5470 http://jython.svn.sourceforge.net/jython/?rev=5470&view=rev Author: zyasoft Date: 2008-10-19 22:04:51 +0000 (Sun, 19 Oct 2008) Log Message: ----------- Make error handling in codecs so that it is also surrogate-aware. Fixes test_codeccallbacks Skip tests from test_threading not applicable to Jython. Modified Paths: -------------- trunk/jython/Lib/test/test_codeccallbacks.py trunk/jython/Lib/test/test_threading.py trunk/jython/Lib/unicodedata.py trunk/jython/src/org/python/core/codecs.java trunk/jython/src/org/python/core/exceptions.java Modified: trunk/jython/Lib/test/test_codeccallbacks.py =================================================================== --- trunk/jython/Lib/test/test_codeccallbacks.py 2008-10-19 20:51:56 UTC (rev 5469) +++ trunk/jython/Lib/test/test_codeccallbacks.py 2008-10-19 22:04:51 UTC (rev 5470) @@ -589,7 +589,7 @@ ("ascii", "\xff"), ("utf-8", "\xff"), ("utf-7", "+x-"), - ("unicode-internal", "\x00"), + # ("unicode-internal", "\x00"), - not valid for Jython because PyUnicode/PyString share internal representation ): self.assertRaises( TypeError, @@ -794,6 +794,9 @@ text.translate(charmap) def test_main(): + if test.test_support.is_jython: + del CodecCallbackTest.test_decodeunicodeinternal # PyUnicode/PyString share the same internal rep, so n/a + test.test_support.run_unittest(CodecCallbackTest) if __name__ == "__main__": Modified: trunk/jython/Lib/test/test_threading.py =================================================================== --- trunk/jython/Lib/test/test_threading.py 2008-10-19 20:51:56 UTC (rev 5469) +++ trunk/jython/Lib/test/test_threading.py 2008-10-19 22:04:51 UTC (rev 5470) @@ -1,7 +1,7 @@ # Very rudimentary test of threading module import test.test_support -from test.test_support import verbose +from test.test_support import verbose, is_jython import random import sys import threading @@ -118,7 +118,7 @@ # 3. This behavior doesn't make sense for Jython since any foreign # Java threads can use the same underlying locks, etc - def na_for_jython_test_foreign_thread(self): + def test_foreign_thread(self): # Check that a "foreign" thread can use the threading module. def f(mutex): # Acquiring an RLock forces an entry for the foreign @@ -208,7 +208,7 @@ t.join() # else the thread is still running, and we have no way to kill it - def na_for_jython_test_enumerate_after_join(self): + def test_enumerate_after_join(self): # Try hard to trigger #1703448: a thread is still returned in # threading.enumerate() after it has been join()ed. enum = threading.enumerate @@ -225,6 +225,10 @@ finally: sys.setcheckinterval(old_interval) +if is_jython: + del ThreadTests.test_enumerate_after_join + del ThreadTests.test_foreign_thread + del ThreadTests.test_PyThreadState_SetAsyncExc def test_main(): test.test_support.run_unittest(ThreadTests) Modified: trunk/jython/Lib/unicodedata.py =================================================================== --- trunk/jython/Lib/unicodedata.py 2008-10-19 20:51:56 UTC (rev 5469) +++ trunk/jython/Lib/unicodedata.py 2008-10-19 22:04:51 UTC (rev 5470) @@ -41,6 +41,10 @@ cols = row.split(';') codepoint = int(cols[0], 16) name = cols[1] + if name == '<CJK Ideograph, Last>': + lookup_name = 'CJK UNIFIED IDEOGRAPH' + else: + lookup_name = name data = ( cols[2], get_int(cols[3]), @@ -49,7 +53,9 @@ get_int(cols[6]), get_int(cols[7]), get_numeric(cols[8]), - get_yn(cols[9])) + get_yn(cols[9]), + lookup_name, + ) if name.find('First') >= 0: start = codepoint @@ -86,15 +92,27 @@ init(my_path) init_east_asian_width(my_path) +# xxx - need to normalize the segments, so +# <CJK Ideograph, Last> ==> CJK UNIFIED IDEOGRAPH; +# may need to do some sort of analysis against CPython for the normalization! + def name(unichr, default=None): - try: - return _codepoints[ord(unichr)].name - except KeyError: - if default is not None: + codepoint = get_codepoint(unichr, "name") + v = _codepoints.get(codepoint, None) + if v is None: + v = check_segments(codepoint, _segments) + if v is not None: + return "%s-%X" % (v[8], codepoint) + + if v is None: + if default is not Nonesuch: return default - else: - raise ValueError() + raise ValueError() + return v[8] +# xxx - also need to add logic here so that if it's CJK UNIFIED +# IDEOGRAPH-8000, we go against the segment to verify the prefix + def lookup(name): return _names[name] Modified: trunk/jython/src/org/python/core/codecs.java =================================================================== --- trunk/jython/src/org/python/core/codecs.java 2008-10-19 20:51:56 UTC (rev 5469) +++ trunk/jython/src/org/python/core/codecs.java 2008-10-19 22:04:51 UTC (rev 5470) @@ -243,9 +243,17 @@ ArgParser ap = new ArgParser("replace_errors", args, kws, "exc"); PyObject exc = ap.getPyObject(0); if (Py.isInstance(exc, Py.UnicodeDecodeError)) { + PyObject object = exc.__getattr__("object"); + if (!Py.isInstance(object, PyString.TYPE) || Py.isInstance(object, PyUnicode.TYPE)) { + throw Py.TypeError("object attribute must be str"); + } PyObject end = exc.__getattr__("end"); return new PyTuple(new PyUnicode(Py_UNICODE_REPLACEMENT_CHARACTER), end); } else if (Py.isInstance(exc, Py.UnicodeEncodeError)) { + PyObject object = exc.__getattr__("object"); + if (!Py.isInstance(object, PyUnicode.TYPE)) { + throw Py.TypeError("object attribute must be unicode"); + } PyObject end = exc.__getattr__("end"); return new PyTuple(Py.java2py("?"), end); } else if (Py.isInstance(exc, Py.UnicodeTranslateError)) { @@ -343,9 +351,9 @@ } private static void backslashreplace_internal(int start, int end, String object, StringBuilder replacement) { - for (int i = start; i < end; i++) { + for (Iterator<Integer> iter = new StringSubsequenceIterator(object, start, end, 1); iter.hasNext();) { + int c = iter.next(); replacement.append('\\'); - char c = object.charAt(i); if (c >= 0x00010000) { replacement.append('U'); replacement.append(hexdigits[(c >> 28) & 0xf]); @@ -1257,3 +1265,75 @@ } } } + + +class StringSubsequenceIterator implements Iterator { + + private final String s; + private int current, k, start, stop, step; + + StringSubsequenceIterator(String s, int start, int stop, int step) { +// System.out.println("s=" + s.length() + ",start=" + start + ",stop=" + stop); + this.s = s; + k = 0; + current = start; + this.start = start; + this.stop = stop; + this.step = step; + + // this bounds checking is necessary to convert between use of code units elsewhere, and codepoints here + // it would be nice if it were unnecessary! + int count = getCodePointCount(s); + if (start >= count) { + this.stop = -1; + } + else if (stop >= count) { + this.stop = count; + } + + for (int i = 0; i < start; i++) { + nextCodePoint(); + } + } + + StringSubsequenceIterator(String s) { + this(s, 0, getCodePointCount(s), 1); + } + + private static int getCodePointCount(String s) { + return s.codePointCount(0, s.length()); + } + + public boolean hasNext() { + return current < stop; + } + + public Object next() { + int codePoint = nextCodePoint(); + current += 1; + for (int j = 1; j < step && hasNext(); j++) { + nextCodePoint(); + current += 1; + } + return codePoint; + } + + private int nextCodePoint() { + int U; +// System.out.println("k=" + k); + int W1 = s.charAt(k); + if (W1 >= 0xD800 && W1 < 0xDC00) { + int W2 = s.charAt(k + 1); + U = (((W1 & 0x3FF) << 10) | (W2 & 0x3FF)) + 0x10000; + k += 2; + } else { + U = W1; + k += 1; + } + return U; + } + + public void remove() { + throw new UnsupportedOperationException("Not supported on String objects (immutable)"); + } +} Modified: trunk/jython/src/org/python/core/exceptions.java =================================================================== --- trunk/jython/src/org/python/core/exceptions.java 2008-10-19 20:51:56 UTC (rev 5469) +++ trunk/jython/src/org/python/core/exceptions.java 2008-10-19 22:04:51 UTC (rev 5470) @@ -381,7 +381,7 @@ if (end == (start + 1)) { PyObject object = self.__getattr__("object"); int badByte = (object.toString().charAt(start)) & 0xff; - result = String.format("'%.400s' codec can't decode byte 0x%s in position %d: %.400s", + result = String.format("'%.400s' codec can't decode byte 0x%x in position %d: %.400s", encoding, badByte, start, reason); } else { result = String.format("'%.400s' codec can't decode bytes in position %d-%d: %.400s", @@ -413,7 +413,7 @@ String result; if (end == (start + 1)) { PyObject object = self.__getattr__("object"); - int badchar = object.toString().charAt(start); + int badchar = object.toString().codePointAt(start); String badcharStr; if (badchar <= 0xff) { badcharStr = String.format("x%02x", badchar); @@ -460,7 +460,7 @@ String result; if (end == (start + 1)) { - int badchar = (self.__getattr__("object").toString().charAt(start)); + int badchar = (self.__getattr__("object").toString().codePointAt(start)); String badCharStr; if (badchar <= 0xff) { badCharStr = String.format("x%02x", badchar); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |