[Jython-checkins] SF.net SVN: jython:[5470] trunk/jython

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Revision: 5470
          http://jython.svn.sourceforge.net/jython/?rev=5470&view=rev
Author:   zyasoft
Date:     2008-10-19 22:04:51 +0000 (Sun, 19 Oct 2008)

Log Message:
-----------
Make error handling in codecs so that it is also surrogate-aware.
Fixes test_codeccallbacks

Skip tests from test_threading not applicable to Jython.

Modified Paths:
--------------
    trunk/jython/Lib/test/test_codeccallbacks.py
    trunk/jython/Lib/test/test_threading.py
    trunk/jython/Lib/unicodedata.py
    trunk/jython/src/org/python/core/codecs.java
    trunk/jython/src/org/python/core/exceptions.java

Modified: trunk/jython/Lib/test/test_codeccallbacks.py
===================================================================

--- trunk/jython/Lib/test/test_codeccallbacks.py	2008-10-19 20:51:56 UTC (rev 5469)
+++ trunk/jython/Lib/test/test_codeccallbacks.py	2008-10-19 22:04:51 UTC (rev 5470)
@@ -589,7 +589,7 @@
                 ("ascii", "\xff"),
                 ("utf-8", "\xff"),
                 ("utf-7", "+x-"),
-                ("unicode-internal", "\x00"),
+                # ("unicode-internal", "\x00"), - not valid for Jython because PyUnicode/PyString share internal representation
             ):
                 self.assertRaises(
                     TypeError,
@@ -794,6 +794,9 @@
             text.translate(charmap)
 
 def test_main():
+    if test.test_support.is_jython:
+        del CodecCallbackTest.test_decodeunicodeinternal # PyUnicode/PyString share the same internal rep, so n/a
+
     test.test_support.run_unittest(CodecCallbackTest)
 
 if __name__ == "__main__":

Modified: trunk/jython/Lib/test/test_threading.py
===================================================================
--- trunk/jython/Lib/test/test_threading.py	2008-10-19 20:51:56 UTC (rev 5469)
+++ trunk/jython/Lib/test/test_threading.py	2008-10-19 22:04:51 UTC (rev 5470)
@@ -1,7 +1,7 @@
 # Very rudimentary test of threading module
 
 import test.test_support
-from test.test_support import verbose
+from test.test_support import verbose, is_jython
 import random
 import sys
 import threading
@@ -118,7 +118,7 @@
     # 3. This behavior doesn't make sense for Jython since any foreign
     #    Java threads can use the same underlying locks, etc
 
-    def na_for_jython_test_foreign_thread(self):
+    def test_foreign_thread(self):
         # Check that a "foreign" thread can use the threading module.
         def f(mutex):
             # Acquiring an RLock forces an entry for the foreign
@@ -208,7 +208,7 @@
             t.join()
         # else the thread is still running, and we have no way to kill it
 
-    def na_for_jython_test_enumerate_after_join(self):
+    def test_enumerate_after_join(self):
         # Try hard to trigger #1703448: a thread is still returned in
         # threading.enumerate() after it has been join()ed.
         enum = threading.enumerate
@@ -225,6 +225,10 @@
         finally:
             sys.setcheckinterval(old_interval)
 
+if is_jython:
+    del ThreadTests.test_enumerate_after_join
+    del ThreadTests.test_foreign_thread
+    del ThreadTests.test_PyThreadState_SetAsyncExc
 
 def test_main():
     test.test_support.run_unittest(ThreadTests)

Modified: trunk/jython/Lib/unicodedata.py
===================================================================
--- trunk/jython/Lib/unicodedata.py	2008-10-19 20:51:56 UTC (rev 5469)
+++ trunk/jython/Lib/unicodedata.py	2008-10-19 22:04:51 UTC (rev 5470)
@@ -41,6 +41,10 @@
             cols = row.split(';')
             codepoint = int(cols[0], 16)
             name = cols[1]
+            if name == '<CJK Ideograph, Last>':
+                lookup_name = 'CJK UNIFIED IDEOGRAPH'
+            else:
+                lookup_name = name
             data = (
                 cols[2],
                 get_int(cols[3]),
@@ -49,7 +53,9 @@
                 get_int(cols[6]),
                 get_int(cols[7]),
                 get_numeric(cols[8]),
-                get_yn(cols[9]))
+                get_yn(cols[9]),
+                lookup_name,
+                )
 
             if name.find('First') >= 0:
                 start = codepoint
@@ -86,15 +92,27 @@
 init(my_path)
 init_east_asian_width(my_path)
 
+# xxx - need to normalize the segments, so
+# <CJK Ideograph, Last> ==> CJK UNIFIED IDEOGRAPH;
+# may need to do some sort of analysis against CPython for the normalization!
+
 def name(unichr, default=None):
-    try:
-        return _codepoints[ord(unichr)].name
-    except KeyError:
-        if default is not None:
+    codepoint = get_codepoint(unichr, "name")
+    v = _codepoints.get(codepoint, None)
+    if v is None:
+        v = check_segments(codepoint, _segments)
+        if v is not None:
+            return "%s-%X" % (v[8], codepoint) 
+
+    if v is None:
+        if default is not Nonesuch:
             return default
-        else:
-            raise ValueError()
+        raise ValueError()
+    return v[8]
 
+# xxx - also need to add logic here so that if it's CJK UNIFIED
+# IDEOGRAPH-8000, we go against the segment to verify the prefix
+
 def lookup(name):
     return _names[name]
 

Modified: trunk/jython/src/org/python/core/codecs.java
===================================================================
--- trunk/jython/src/org/python/core/codecs.java	2008-10-19 20:51:56 UTC (rev 5469)
+++ trunk/jython/src/org/python/core/codecs.java	2008-10-19 22:04:51 UTC (rev 5470)
@@ -243,9 +243,17 @@
         ArgParser ap = new ArgParser("replace_errors", args, kws, "exc");
         PyObject exc = ap.getPyObject(0);
         if (Py.isInstance(exc, Py.UnicodeDecodeError)) {
+            PyObject object = exc.__getattr__("object");
+            if (!Py.isInstance(object, PyString.TYPE) || Py.isInstance(object, PyUnicode.TYPE)) {
+                throw Py.TypeError("object attribute must be str");        
+            }
             PyObject end = exc.__getattr__("end");
             return new PyTuple(new PyUnicode(Py_UNICODE_REPLACEMENT_CHARACTER), end);
         } else if (Py.isInstance(exc, Py.UnicodeEncodeError)) {
+            PyObject object = exc.__getattr__("object");
+            if (!Py.isInstance(object, PyUnicode.TYPE)) {
+                throw Py.TypeError("object attribute must be unicode");        
+            }
             PyObject end = exc.__getattr__("end");
             return new PyTuple(Py.java2py("?"), end);
         } else if (Py.isInstance(exc, Py.UnicodeTranslateError)) {
@@ -343,9 +351,9 @@
     }
 
     private static void backslashreplace_internal(int start, int end, String object, StringBuilder replacement) {
-        for (int i = start; i < end; i++) {
+        for (Iterator<Integer> iter = new StringSubsequenceIterator(object, start, end, 1); iter.hasNext();) {
+            int c = iter.next();
             replacement.append('\\');
-            char c = object.charAt(i);
             if (c >= 0x00010000) {
                 replacement.append('U');
                 replacement.append(hexdigits[(c >> 28) & 0xf]);
@@ -1257,3 +1265,75 @@
         }
     }
 }
+
+
+class StringSubsequenceIterator implements Iterator {
+
+    private final String s;
+    private int current,  k,  start,  stop,  step;
+
+    StringSubsequenceIterator(String s, int start, int stop, int step) {
+//        System.out.println("s=" + s.length() + ",start=" + start + ",stop=" + stop);
+        this.s = s;
+        k = 0;
+        current = start;
+        this.start = start;
+        this.stop = stop;
+        this.step = step;
+      
+        // this bounds checking is necessary to convert between use of code units elsewhere, and codepoints here
+        // it would be nice if it were unnecessary!
+        int count = getCodePointCount(s);
+        if (start >= count) {
+            this.stop = -1;
+        }
+        else if (stop >= count) {
+            this.stop = count;
+        }
+        
+        for (int i = 0; i < start; i++) {
+            nextCodePoint();
+        }
+    }
+
+    StringSubsequenceIterator(String s) {
+        this(s, 0, getCodePointCount(s), 1);
+    }
+
+    private static int getCodePointCount(String s) {
+        return s.codePointCount(0, s.length());
+    }
+    
+    public boolean hasNext() {
+        return current < stop;
+    }
+
+    public Object next() {
+        int codePoint = nextCodePoint();
+        current += 1;
+        for (int j = 1; j < step && hasNext(); j++) {
+            nextCodePoint();
+            current += 1;
+        }
+        return codePoint;
+    }
+
+    private int nextCodePoint() {
+        int U;
+//        System.out.println("k=" + k);
+        int W1 = s.charAt(k);
+        if (W1 >= 0xD800 && W1 < 0xDC00) {
+            int W2 = s.charAt(k + 1);
+            U = (((W1 & 0x3FF) << 10) | (W2 & 0x3FF)) + 0x10000;
+            k += 2;
+        } else {
+            U = W1;
+            k += 1;
+        }
+        return U;
+    }
+
+    public void remove() {
+        throw new UnsupportedOperationException("Not supported on String objects (immutable)");
+    }
+}

Modified: trunk/jython/src/org/python/core/exceptions.java
===================================================================
--- trunk/jython/src/org/python/core/exceptions.java	2008-10-19 20:51:56 UTC (rev 5469)
+++ trunk/jython/src/org/python/core/exceptions.java	2008-10-19 22:04:51 UTC (rev 5470)
@@ -381,7 +381,7 @@
         if (end == (start + 1)) {
             PyObject object = self.__getattr__("object");
             int badByte = (object.toString().charAt(start)) & 0xff;
-            result = String.format("'%.400s' codec can't decode byte 0x%s in position %d: %.400s",
+            result = String.format("'%.400s' codec can't decode byte 0x%x in position %d: %.400s",
                                    encoding, badByte, start, reason);
         } else {
             result = String.format("'%.400s' codec can't decode bytes in position %d-%d: %.400s",
@@ -413,7 +413,7 @@
         String result;
         if (end == (start + 1)) {
             PyObject object = self.__getattr__("object");
-            int badchar = object.toString().charAt(start);
+            int badchar = object.toString().codePointAt(start);
             String badcharStr;
             if (badchar <= 0xff) {
                 badcharStr = String.format("x%02x", badchar);
@@ -460,7 +460,7 @@
 
         String result;
         if (end == (start + 1)) {
-            int badchar = (self.__getattr__("object").toString().charAt(start));
+            int badchar = (self.__getattr__("object").toString().codePointAt(start));
             String badCharStr;
             if (badchar <= 0xff) {
                 badCharStr = String.format("x%02x", badchar);


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.