[Jython-checkins] CVS: jython/org/python/core PyString.java,2.28,2.29

SourceForge Headquarters 1320 Columbia Street Suite 310 San Diego, CA 92101 +1 (858) 422-6466

Update of /cvsroot/jython/jython/org/python/core
In directory slayer.i.sourceforge.net:/tmp/cvs-serv20935/core

Modified Files:
	PyString.java 
Log Message:
Moved the method _codecs.decodeUnicodeEscape() to 
PyString.decode_UnicodeEscape(). This is so the 
string parser in SimpleNopde can use it too.


Index: PyString.java
===================================================================
RCS file: /cvsroot/jython/jython/org/python/core/PyString.java,v
retrieving revision 2.28
retrieving revision 2.29
diff -C2 -r2.28 -r2.29
*** PyString.java	2000/11/30 09:03:48	2.28
--- PyString.java	2000/12/04 21:23:06	2.29
***************
*** 279,283 ****
          dict.__setitem__("atoi", null);
          dict.__setitem__("atol", null);
!         dict.__setitem__("unicodeescape", null);
      }
  
--- 279,284 ----
          dict.__setitem__("atoi", null);
          dict.__setitem__("atol", null);
!         dict.__setitem__("encode_UnicodeEscape", null);
!         dict.__setitem__("decode_UnicodeEscape", null);
      }
  
***************
*** 309,318 ****
  
      public PyString __repr__() {
!         return new PyString(unicodeescape(string, true));
      }
  
      private static char[] hexdigit = "0123456789ABCDEF".toCharArray();
  
!     public static String unicodeescape(String str, boolean use_quotes) {
          int size = str.length();
          StringBuffer v = new StringBuffer(str.length());
--- 310,319 ----
  
      public PyString __repr__() {
!         return new PyString(encode_UnicodeEscape(string, true));
      }
  
      private static char[] hexdigit = "0123456789ABCDEF".toCharArray();
  
!     public static String encode_UnicodeEscape(String str, boolean use_quotes) {
          int size = str.length();
          StringBuffer v = new StringBuffer(str.length());
***************
*** 366,369 ****
--- 367,540 ----
              v.append(quote);
          return v.toString();
+     }
+ 
+     private static ucnhashAPI pucnHash = null;
+ 
+     public static String decode_UnicodeEscape(String str, int start, int end, 
+                                               String errors, boolean unicode)
+     {
+         StringBuffer v = new StringBuffer(end-start);
+         for (int s = start; s < end; ) {
+             char ch = str.charAt(s);
+ 
+             /* Non-escape characters are interpreted as Unicode ordinals */
+             if (ch != '\\') {
+                 v.append(ch);
+                 s++;
+ 	        continue;
+             }
+     
+             /* \ - Escapes */
+             s++;
+             ch = str.charAt(s++);
+             switch (ch) {
+ 
+             /* \x escapes */
+             case '\n': break;
+             case '\\': v.append('\\'); break;
+             case '\'': v.append('\''); break;
+             case '\"': v.append('\"'); break;
+             case 'b': v.append('\b'); break;
+             case 'f': v.append('\014'); break; /* FF */
+             case 't': v.append('\t'); break;
+             case 'n': v.append('\n'); break;
+             case 'r': v.append('\r'); break;
+             case 'v': v.append('\013'); break; /* VT */
+             case 'a': v.append('\007'); break; /* BEL, not classic C */
+ 
+             /* \OOO (octal) escapes */
+             case '0': case '1': case '2': case '3':
+             case '4': case '5': case '6': case '7':
+ 
+                 int x = Character.digit(ch, 8);
+                 for (int j = 0; j < 2 && s < end; j++) {
+                     ch = str.charAt(s++);
+                     if (ch < '0' || ch > '7')
+                         break;
+                     x = (x<<3) + Character.digit(ch, 8);
+                 }
+                 v.append((char) x);
+                 break;
+ 
+             case 'x':
+                 int i;
+                 for (x = 0, i = 0; i < 2 && s < end; i++) {
+                     ch = str.charAt(s + i);
+                     int d = Character.digit(ch, 16);
+                     if (d == -1) {
+                         codecs.decoding_error("unicode escape", v, errors,
+                                                      "truncated \\xXX");
+                         i++;
+                         break;
+                     }
+ 
+                     x = ((x<<4) & ~0xF) + d;
+                 }
+                 s += i;
+                 v.append((char) x);
+                 break;
+ 
+             /* \ uXXXX with 4 hex digits */
+             case 'u':
+                 if (!unicode) {
+                     v.append('\\');
+                     v.append('u');
+                     break;
+                 }
+                 if (s+4 > end) {
+                     codecs.decoding_error("unicode escape", v, errors,
+                                               "truncated \\uXXXX");
+                     break;
+                 }
+                 for (x = 0, i = 0; i < 4; i++) {
+                     ch = str.charAt(s + i);
+                     int d  = Character.digit(ch, 16);
+                     if (d == -1) {
+                         codecs.decoding_error("unicode escape", v, errors,
+                                               "truncated \\uXXXX");
+                         break;
+                     }
+                     x = ((x<<4) & ~0xF) + d;
+                 }
+                 s += i;
+                 v.append((char) x);
+                 break;
+ 
+             case 'N':
+                 if (!unicode) {
+                     v.append('\\');
+                     v.append('N');
+                     break;
+                 }
+                 /* Ok, we need to deal with Unicode Character Names now,
+                  * make sure we've imported the hash table data...
+                  */
+                 if (pucnHash == null) {
+                      PyObject mod = imp.importName("ucnhash", true);
+                      mod = mod.__call__();
+                      pucnHash = (ucnhashAPI) mod.__tojava__(Object.class);
+                      if (pucnHash.getCchMax() < 0)
+                          codecs.decoding_error("unicode escape", v, errors, 
+                                  "Unicode names not loaded");
+                 }
+ 
+                 if (str.charAt(s) == '{') {
+                     int startName = s + 1;
+                     int endBrace = startName;
+ 
+                     /* look for either the closing brace, or we
+                      * exceed the maximum length of the unicode character names
+                      */
+                     int maxLen = pucnHash.getCchMax();
+                     while (endBrace < end && str.charAt(endBrace) != '}' 
+                            && (endBrace - startName) <= maxLen) {
+                         endBrace++;
+                     }
+                     if (endBrace != end && str.charAt(endBrace) == '}') {
+                          int value = pucnHash.getValue(str, startName, endBrace);
+                          if (value < 0) {
+                              codecs.decoding_error("unicode escape", v, errors, 
+                                  "Invalid Unicode Character Name");
+                              v.append('\\');
+                              v.append(str.charAt(s-1));
+                              break;
+                          }
+ 
+                          if (value < 1<<16) {
+                              /* In UCS-2 range, easy solution.. */
+                              v.append((char) value);
+                          } else {
+                              /* Oops, its in UCS-4 space, */
+                              /*  compute and append the two surrogates: */
+                              /*  translate from 10000..10FFFF to 0..FFFFF */
+                              value -= 0x10000;
+ 
+                              /* high surrogate = top 10 bits added to D800 */
+                              v.append((char) (0xD800 + (value >> 10)));
+ 
+                              /* low surrogate  = bottom 10 bits added to DC00*/
+                              v.append((char) (0xDC00 + (value & ~0xFC00)));
+                         }
+                         s = endBrace + 1;
+                     } else {
+                          codecs.decoding_error("unicode escape", v, errors, 
+                               "Unicode name missing closing brace");
+                          v.append('\\');
+                          v.append(str.charAt(s-1));
+                          break;
+                     }
+                     break;
+                 }
+                 codecs.decoding_error("unicode escape", v, errors, 
+                      "Missing opening brace for Unicode Character Name escape");
+  
+                 /* fall through on purpose */
+            default:
+                v.append('\\');
+                v.append(str.charAt(s-1));
+                break;
+            }
+        }
+        return v.toString();
      }