From: Finn B. <bc...@us...> - 2001-01-21 14:02:25
|
Update of /cvsroot/jython/jython/org/python/core In directory usw-pr-cvs1:/tmp/cvs-serv11257/core Modified Files: codecs.java Log Message: Moved the RawUnicodeEscape from Modules/_codecs to core/codecs. The encoding is used by cPickle, and keeping it in _codecs makes is very difficult to use cPickle in frozen applications. Index: codecs.java =================================================================== RCS file: /cvsroot/jython/jython/org/python/core/codecs.java,v retrieving revision 2.3 retrieving revision 2.4 diff -C2 -r2.3 -r2.4 *** codecs.java 2000/12/11 18:45:25 2.3 --- codecs.java 2001/01/21 14:02:35 2.4 *************** *** 195,198 **** --- 195,273 ---- + /* --- RawUnicodeEscape Codec -------------------------------------------- */ + + private static char[] hexdigit = "0123456789ABCDEF".toCharArray(); + + // The modified flag is used by cPickle. + public static String PyUnicode_EncodeRawUnicodeEscape(String str, String errors, + boolean modifed) { + + int size = str.length(); + StringBuffer v = new StringBuffer(str.length()); + + for (int i = 0; i < size; i++) { + char ch = str.charAt(i); + if (ch >= 256 || (modifed && (ch == '\n' || ch == '\\'))) { + v.append("\\u"); + v.append(hexdigit[(ch >>> 12) & 0xF]); + v.append(hexdigit[(ch >>> 8) & 0xF]); + v.append(hexdigit[(ch >>> 4) & 0xF]); + v.append(hexdigit[ch & 0xF]); + } else + v.append(ch); + } + + return v.toString(); + } + + + public static String PyUnicode_DecodeRawUnicodeEscape(String str, String errors) { + int size = str.length(); + StringBuffer v = new StringBuffer(size); + + for (int i = 0; i < size; ) { + char ch = str.charAt(i); + + /* Non-escape characters are interpreted as Unicode ordinals */ + if (ch != '\\') { + v.append(ch); + i++; + continue; + } + + /* \\u-escapes are only interpreted iff the number of leading + backslashes is odd */ + int bs = i; + while (i < size) { + ch = str.charAt(i); + if (ch != '\\') + break; + v.append(ch); + i++; + } + if (((i - bs) & 1) == 0 || i >= size || ch != 'u') { + continue; + } + v.setLength(v.length() - 1); + i++; + + /* \\uXXXX with 4 hex digits */ + int x = 0; + for (int j = 0; j < 4; j++) { + ch = str.charAt(i+j); + int d = Character.digit(ch, 16); + if (d == -1) { + codecs.decoding_error("unicode escape", v, errors, + "truncated \\uXXXX"); + break; + } + x = ((x<<4) & ~0xF) + d; + } + i += 4; + v.append((char) x); + } + return v.toString(); + } + /* --- Utility methods -------------------------------------------- */ |