koco-cvs Mailing List for Python Korean Codecs (Page 5)
Brought to you by:
perky
You can subscribe to this list here.
2002 |
Jan
|
Feb
|
Mar
|
Apr
(88) |
May
(5) |
Jun
|
Jul
(27) |
Aug
|
Sep
|
Oct
(5) |
Nov
|
Dec
|
---|---|---|---|---|---|---|---|---|---|---|---|---|
2003 |
Jan
(77) |
Feb
(3) |
Mar
|
Apr
(22) |
May
(123) |
Jun
(80) |
Jul
(83) |
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
From: Hye-Shik C. <pe...@us...> - 2003-06-19 17:49:07
|
perky 03/06/19 10:49:01 Modified: tools genmap_tchinese.py Log: Fix some big5 mappings to give more roundtrip compatibility. Revision Changes Path 1.4 +18 -1 cjkcodecs/tools/genmap_tchinese.py Index: genmap_tchinese.py =================================================================== RCS file: /cvsroot/koco/cjkcodecs/tools/genmap_tchinese.py,v retrieving revision 1.3 retrieving revision 1.4 diff -u -r1.3 -r1.4 --- genmap_tchinese.py 10 Jun 2003 11:12:12 -0000 1.3 +++ genmap_tchinese.py 19 Jun 2003 17:49:01 -0000 1.4 @@ -26,7 +26,7 @@ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # -# $Id: genmap_tchinese.py,v 1.3 2003/06/10 11:12:12 perky Exp $ +# $Id: genmap_tchinese.py,v 1.4 2003/06/19 17:49:01 perky Exp $ # from genmap_support import * @@ -54,6 +54,19 @@ print "Loading Mapping File..." cp950decmap = loadmap(cp950map) big5decmap = loadmap(big5map) + +# big5 mapping fix (see doc/NOTES.big5) +for m in """\ +0xA15A 0x2574 +0xA1C3 0xFFE3 +0xA1C5 0x02CD +0xA1FE 0xFF0F +0xA240 0xFF3C +0xA2CC 0x5341 +0xA2CE 0x5345""".splitlines(): + bcode, ucode = map(eval, m.split()) + big5decmap[bcode >> 8][bcode & 0xff] = ucode + big5encmap, cp950encmap = {}, {} for c1, m in cp950decmap.items(): for c2, code in m.items(): @@ -73,6 +86,10 @@ del cp950encmap[code >> 8][code & 0xff] if not cp950encmap[code >> 8]: del cp950encmap[code >>8] + +# fix unicode->big5 duplicated mapping priority +big5encmap[0x53][0x41] = 0xA451 +big5encmap[0x53][0x45] = 0xA4CA omap = open('map_big5.h', 'w') printcopyright(omap) |
From: Hye-Shik C. <pe...@us...> - 2003-06-19 17:49:04
|
perky 03/06/19 10:49:01 Modified: src/maps map_big5.h map_cp950ext.h Log: Fix some big5 mappings to give more roundtrip compatibility. Revision Changes Path 1.3 +10 -13 cjkcodecs/src/maps/map_big5.h Index: map_big5.h =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/maps/map_big5.h,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- map_big5.h 17 May 2003 20:58:58 -0000 1.2 +++ map_big5.h 19 Jun 2003 17:49:01 -0000 1.3 @@ -1,12 +1,12 @@ /* - * $Id: map_big5.h,v 1.2 2003/05/17 20:58:58 perky Exp $ + * $Id: map_big5.h,v 1.3 2003/06/19 17:49:01 perky Exp $ */ static const Py_UNICODE __big5_decmap[16702] = { 0x3000, 0xff0c, 0x3001, 0x3002, 0xff0e, 0x2022, 0xff1b, 0xff1a, 0xff1f, 0xff01, 0xfe30, 0x2026, 0x2025, 0xfe50, 0xff64, 0xfe52, 0x00b7, 0xfe54, 0xfe55, 0xfe56, 0xfe57, 0xff5c, 0x2013, 0xfe31, - 0x2014, 0xfe33, 0xfffd, 0xfe34, 0xfe4f, 0xff08, 0xff09, 0xfe35, + 0x2014, 0xfe33, 0x2574, 0xfe34, 0xfe4f, 0xff08, 0xff09, 0xfe35, 0xfe36, 0xff5b, 0xff5d, 0xfe37, 0xfe38, 0x3014, 0x3015, 0xfe39, 0xfe3a, 0x3010, 0x3011, 0xfe3b, 0xfe3c, 0x300a, 0x300b, 0xfe3d, 0xfe3e, 0x3008, 0x3009, 0xfe3f, 0xfe40, 0x300c, 0x300d, 0xfe41, @@ -19,14 +19,14 @@ 0x201d, 0x301d, 0x301e, 0x2035, 0x2032, 0xff03, 0xff06, 0xff0a, 0x203b, 0x00a7, 0x3003, 0x25cb, 0x25cf, 0x25b3, 0x25b2, 0x25ce, 0x2606, 0x2605, 0x25c7, 0x25c6, 0x25a1, 0x25a0, 0x25bd, 0x25bc, - 0x32a3, 0x2105, 0x203e, 0xfffd, 0xff3f, 0xfffd, 0xfe49, 0xfe4a, + 0x32a3, 0x2105, 0x203e, 0xffe3, 0xff3f, 0x02cd, 0xfe49, 0xfe4a, 0xfe4d, 0xfe4e, 0xfe4b, 0xfe4c, 0xfe5f, 0xfe60, 0xfe61, 0xff0b, 0xff0d, 0x00d7, 0x00f7, 0x00b1, 0x221a, 0xff1c, 0xff1e, 0xff1d, 0x2266, 0x2267, 0x2260, 0x221e, 0x2252, 0x2261, 0xfe62, 0xfe63, 0xfe64, 0xfe65, 0xfe66, 0x223c, 0x2229, 0x222a, 0x22a5, 0x2220, 0x221f, 0x22bf, 0x33d2, 0x33d1, 0x222b, 0x222e, 0x2235, 0x2234, 0x2640, 0x2642, 0x2641, 0x2609, 0x2191, 0x2193, 0x2190, 0x2192, - 0x2196, 0x2197, 0x2199, 0x2198, 0x2225, 0x2223, 0xfffd, 0xfffd, + 0x2196, 0x2197, 0x2199, 0x2198, 0x2225, 0x2223, 0xff0f, 0xff3c, 0xff0f, 0xff3c, 0xff04, 0x00a5, 0x3012, 0x00a2, 0x00a3, 0xff05, 0xff20, 0x2103, 0x2109, 0xfe69, 0xfe6a, 0xfe6b, 0x33d5, 0x339c, 0x339d, 0x339e, 0x33ce, 0x33a1, 0x338e, 0x338f, 0x33c4, 0x00b0, @@ -44,7 +44,7 @@ 0xff12, 0xff13, 0xff14, 0xff15, 0xff16, 0xff17, 0xff18, 0xff19, 0x2160, 0x2161, 0x2162, 0x2163, 0x2164, 0x2165, 0x2166, 0x2167, 0x2168, 0x2169, 0x3021, 0x3022, 0x3023, 0x3024, 0x3025, 0x3026, - 0x3027, 0x3028, 0x3029, 0xfffd, 0x5344, 0xfffd, 0xff21, 0xff22, + 0x3027, 0x3028, 0x3029, 0x5341, 0x5344, 0x5345, 0xff21, 0xff22, 0xff23, 0xff24, 0xff25, 0xff26, 0xff27, 0xff28, 0xff29, 0xff2a, 0xff2b, 0xff2c, 0xff2d, 0xff2e, 0xff2f, 0xff30, 0xff31, 0xff32, 0xff33, 0xff34, 0xff35, 0xff36, 0xff37, 0xff38, 0xff39, 0xff3a, @@ -2352,7 +2352,7 @@ /* 0xFF */ {0, 0, 0}, }; -static const DBCHAR __big5_encmap[21790] = { +static const DBCHAR __big5_encmap[21764] = { 0xa246, 0xa247, NOCHAR, 0xa244, NOCHAR, 0xa1b1, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, 0xa258, 0xa1d3, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, 0xa150, NOCHAR, NOCHAR, @@ -2364,7 +2364,7 @@ NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, 0xa1d2, 0xa3be, NOCHAR, - 0xa3bc, 0xa3bd, 0xa3bf, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, + 0xa3bc, 0xa3bd, 0xa3bf, NOCHAR, 0xa1c5, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, 0xa3bb, 0xa344, 0xa345, 0xa346, 0xa347, 0xa348, 0xa349, 0xa34a, 0xa34b, 0xa34c, 0xa34d, 0xa34e, 0xa34f, 0xa350, 0xa351, 0xa352, @@ -2446,7 +2446,7 @@ NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, 0xa2a5, NOCHAR, NOCHAR, 0xa2a7, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, 0xa2a6, NOCHAR, NOCHAR, 0xa27e, - 0xa2a1, 0xa2a3, 0xa2a2, 0xa2ac, 0xa2ad, 0xa2ae, NOCHAR, NOCHAR, + 0xa2a1, 0xa2a3, 0xa2a2, 0xa2ac, 0xa2ad, 0xa2ae, 0xa15a, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, 0xa262, 0xa263, 0xa264, 0xa265, 0xa266, 0xa267, 0xa268, 0xa269, 0xa270, 0xa26f, 0xa26e, 0xa26d, 0xa26c, @@ -5073,10 +5073,7 @@ NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, - NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, - NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, - NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, - NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, 0xa2ce, + NOCHAR, NOCHAR, NOCHAR, 0xa1c3, }; static const struct unim_index big5_encmap[256] = { @@ -5335,6 +5332,6 @@ /* 0xFC */ {0, 0, 0}, /* 0xFD */ {0, 0, 0}, /* 0xFE */ {__big5_encmap+21477, 0x30, 0x6b}, -/* 0xFF */ {__big5_encmap+21537, 0x01, 0xfd}, +/* 0xFF */ {__big5_encmap+21537, 0x01, 0xe3}, }; 1.4 +41 -71 cjkcodecs/src/maps/map_cp950ext.h Index: map_cp950ext.h =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/maps/map_cp950ext.h,v retrieving revision 1.3 retrieving revision 1.4 diff -u -r1.3 -r1.4 --- map_cp950ext.h 28 May 2003 06:51:34 -0000 1.3 +++ map_cp950ext.h 19 Jun 2003 17:49:01 -0000 1.4 @@ -1,11 +1,10 @@ /* - * $Id: map_cp950ext.h,v 1.3 2003/05/28 06:51:34 perky Exp $ + * $Id: map_cp950ext.h,v 1.4 2003/06/19 17:49:01 perky Exp $ */ -static const Py_UNICODE __cp950ext_decmap[371] = { +static const Py_UNICODE __cp950ext_decmap[224] = { 0x2027, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, 0xfe51, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, - UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, 0x2574, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, @@ -18,38 +17,20 @@ UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, - UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, 0x00af, 0xffe3, UNIINV, - 0x02cd, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, - UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, - UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, - UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, 0xff5e, UNIINV, - UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, - UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, 0x2295, 0x2299, UNIINV, - UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, - UNIINV, 0xff0f, 0xff3c, 0x2215, 0xfe68, UNIINV, 0xffe5, UNIINV, - 0xffe0, 0xffex00af, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, + UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, 0xff5e, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, - UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, 0x5341, UNIINV, - 0x5345, 0x20ac, 0x7881, 0x92b9, 0x88cf, 0x58bb, 0x6052, 0x7ca7, - 0x5afa, 0x2554, 0x2566, 0x2557, 0x2560, 0x256c, 0x2563, 0x255a, - 0x2569, 0x255d, 0x2552, 0x2564, 0x2555, 0x255e, 0x256a, 0x2561, - 0x2558, 0x2567, 0x255b, 0x2553, 0x2565, 0x2556, 0x255f, 0x256b, - 0x2562, 0x2559, 0x2568, 0x255c, 0x2551, 0x2550, 0x256d, 0x256e, - 0x2570, 0x256f, 0x2593, + UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, 0x2295, 0x2299, 0x2215, + 0xfe68, UNIINV, 0xffe5, UNIINV, 0xffe0, 0xffe1, 0x20ac, 0x7881, + 0x92b9, 0x88cf, 0x58bb, 0x6052, 0x7ca7, 0x5afa, 0x2554, 0x2566, + 0x2557, 0x2560, 0x256c, 0x2563, 0x255a, 0x2569, 0x255d, 0x2552, + 0x2564, 0x2555, 0x255e, 0x256a, 0x2561, 0x2558, 0x2567, 0x255b, + 0x2553, 0x2565, 0x2556, 0x255f, 0x256b, 0x2562, 0x2559, 0x2568, + 0x255c, 0x2551, 0x2550, 0x256d, 0x256e, 0x2570, 0x256f, 0x2593, }; static const struct dbcs_index cp950ext_decmap[256] = { @@ -214,9 +195,9 @@ /* 0x9E */ {0, 0, 0}, /* 0x9F */ {0, 0, 0}, /* 0xA0 */ {0, 0, 0}, -/* 0xA1 */ {__cp950ext_decmap+0, 0x45, 0xfe}, -/* 0xA2 */ {__cp950ext_decmap+186, 0x40, 0xce}, -/* 0xA3 */ {__cp950ext_decmap+329, 0xe1, 0xe1}, +/* 0xA1 */ {__cp950ext_decmap+0, 0x45, 0xf3}, +/* 0xA2 */ {__cp950ext_decmap+175, 0x41, 0x47}, +/* 0xA3 */ {__cp950ext_decmap+182, 0xe1, 0xe1}, /* 0xA4 */ {0, 0, 0}, /* 0xA5 */ {0, 0, 0}, /* 0xA6 */ {0, 0, 0}, @@ -302,7 +283,7 @@ /* 0xF6 */ {0, 0, 0}, /* 0xF7 */ {0, 0, 0}, /* 0xF8 */ {0, 0, 0}, -/* 0xF9 */ {__cp950ext_decmap+330, 0xd6, 0xfe}, +/* 0xF9 */ {__cp950ext_decmap+183, 0xd6, 0xfe}, /* 0xFA */ {0, 0, 0}, /* 0xFB */ {0, 0, 0}, /* 0xFC */ {0, 0, 0}, @@ -311,10 +292,8 @@ /* 0xFF */ {0, 0, 0}, }; -static const DBCHAR __cp950ext_encmap[587] = { - 0xa1c2, 0xa1c5, 0xa145, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, - NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, - NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, +static const DBCHAR __cp950ext_encmap[502] = { + 0xa1c2, 0xa145, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, @@ -328,10 +307,9 @@ NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, - NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, 0xa3e1, - 0xa241, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, + NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, 0xa3e1, 0xa241, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, @@ -345,30 +323,26 @@ NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, - 0xa1f2, NOCHAR, NOCHAR, NOCHAR, 0xa1f3, 0xf9f8, 0xf9e6, 0xf9ef, - 0xf9dd, 0xf9e8, 0xf9f1, 0xf9df, 0xf9ec, 0xf9f5, 0xf9e3, 0xf9ee, - 0xf9f7, 0xf9e5, NOCHAR, 0xf9f2, 0xf9e0, NOCHAR, 0xf9f4, 0xf9e2, - 0xf9e7, 0xf9f0, 0xf9de, 0xf9ed, 0xf9f6, 0xf9e4, NOCHAR, 0xf9f3, - 0xf9e1, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, - 0xa15a, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, - NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, 0xf9fe, - 0xa2cc, NOCHAR, NOCHAR, NOCHAR, 0xa2ce, 0xf9d9, 0xf9dc, 0xf9da, - 0xf9d6, 0xf9db, 0xf9d8, 0xf9d7, 0xa14e, NOCHAR, NOCHAR, NOCHAR, + NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, 0xa1f2, + NOCHAR, NOCHAR, NOCHAR, 0xa1f3, 0xf9f8, 0xf9e6, 0xf9ef, 0xf9dd, + 0xf9e8, 0xf9f1, 0xf9df, 0xf9ec, 0xf9f5, 0xf9e3, 0xf9ee, 0xf9f7, + 0xf9e5, NOCHAR, 0xf9f2, 0xf9e0, NOCHAR, 0xf9f4, 0xf9e2, 0xf9e7, + 0xf9f0, 0xf9de, 0xf9ed, 0xf9f6, 0xf9e4, NOCHAR, 0xf9f3, 0xf9e1, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, - NOCHAR, NOCHAR, NOCHAR, 0xa242, 0xa1fe, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, + NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, 0xf9fe, 0xf9d9, + 0xf9dc, 0xf9da, 0xf9d6, 0xf9db, 0xf9d8, 0xf9d7, 0xa14e, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, + NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, 0xa242, 0xa1e3, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, - NOCHAR, 0xa240, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, - NOCHAR, NOCHAR, NOCHAR, 0xa1exa246, 0xa247, NOCHAR, - 0xa1c3, NOCHAR, 0xa244, + 0xa246, 0xa247, NOCHAR, NOCHAR, NOCHAR, 0xa244, }; static const struct unim_index cp950ext_encmap[256] = { /* 0x00 */ {__cp950ext_encmap+0, 0xaf, 0xaf}, /* 0x01 */ {0, 0, 0}, -/* 0x02 */ {__cp950ext_encmap+1, 0xcd, 0xcd}, +/* 0x02 */ {0, 0, 0}, /* 0x03 */ {0, 0, 0}, /* 0x04 */ {0, 0, 0}, /* 0x05 */ {0, 0, 0}, @@ -421,12 +391,12 @@ /* 0x1D */ {0, 0, 0}, /* 0x1E */ {0, 0, 0}, /* 0x1F */ {0, 0, 0}, -/* 0x20 */ {__cp950ext_encmap+2, 0x27, 0xac}, +/* 0x20 */ {__cp950ext_encmap+1, 0x27, 0xac}, /* 0x21 */ {0, 0, 0}, -/* 0x22 */ {__cp950ext_encmap+136, 0x15, 0x99}, +/* 0x22 */ {__cp950ext_encmap+135, 0x15, 0x99}, /* 0x23 */ {0, 0, 0}, /* 0x24 */ {0, 0, 0}, -/* 0x25 */ {__cp950ext_encmap+269, 0x51, 0x93}, +/* 0x25 */ {__cp950ext_encmap+268, 0x51, 0x93}, /* 0x26 */ {0, 0, 0}, /* 0x27 */ {0, 0, 0}, /* 0x28 */ {0, 0, 0}, @@ -472,20 +442,20 @@ /* 0x50 */ {0, 0, 0}, /* 0x51 */ {0, 0, 0}, /* 0x52 */ {0, 0, 0}, -/* 0x53 */ {__cp950ext_encmap+336, 0x41, 0x45}, +/* 0x53 */ {0, 0, 0}, /* 0x54 */ {0, 0, 0}, /* 0x55 */ {0, 0, 0}, /* 0x56 */ {0, 0, 0}, /* 0x57 */ {0, 0, 0}, -/* 0x58 */ {__cp950ext_encmap+341, 0xbb, 0xbb}, +/* 0x58 */ {__cp950ext_encmap+335, 0xbb, 0xbb}, /* 0x59 */ {0, 0, 0}, -/* 0x5A */ {__cp950ext_encmap+342, 0xfa, 0xfa}, +/* 0x5A */ {__cp950ext_encmap+336, 0xfa, 0xfa}, /* 0x5B */ {0, 0, 0}, /* 0x5C */ {0, 0, 0}, /* 0x5D */ {0, 0, 0}, /* 0x5E */ {0, 0, 0}, /* 0x5F */ {0, 0, 0}, -/* 0x60 */ {__cp950ext_encmap+343, 0x52, 0x52}, +/* 0x60 */ {__cp950ext_encmap+337, 0x52, 0x52}, /* 0x61 */ {0, 0, 0}, /* 0x62 */ {0, 0, 0}, /* 0x63 */ {0, 0, 0}, @@ -509,11 +479,11 @@ /* 0x75 */ {0, 0, 0}, /* 0x76 */ {0, 0, 0}, /* 0x77 */ {0, 0, 0}, -/* 0x78 */ {__cp950ext_encmap+344, 0x81, 0x81}, +/* 0x78 */ {__cp950ext_encmap+338, 0x81, 0x81}, /* 0x79 */ {0, 0, 0}, /* 0x7A */ {0, 0, 0}, /* 0x7B */ {0, 0, 0}, -/* 0x7C */ {__cp950ext_encmap+345, 0xa7, 0xa7}, +/* 0x7C */ {__cp950ext_encmap+339, 0xa7, 0xa7}, /* 0x7D */ {0, 0, 0}, /* 0x7E */ {0, 0, 0}, /* 0x7F */ {0, 0, 0}, @@ -525,7 +495,7 @@ /* 0x85 */ {0, 0, 0}, /* 0x86 */ {0, 0, 0}, /* 0x87 */ {0, 0, 0}, -/* 0x88 */ {__cp950ext_encmap+346, 0xcf, 0xcf}, +/* 0x88 */ {__cp950ext_encmap+340, 0xcf, 0xcf}, /* 0x89 */ {0, 0, 0}, /* 0x8A */ {0, 0, 0}, /* 0x8B */ {0, 0, 0}, @@ -535,7 +505,7 @@ /* 0x8F */ {0, 0, 0}, /* 0x90 */ {0, 0, 0}, /* 0x91 */ {0, 0, 0}, -/* 0x92 */ {__cp950ext_encmap+347, 0xb9, 0xb9}, +/* 0x92 */ {__cp950ext_encmap+341, 0xb9, 0xb9}, /* 0x93 */ {0, 0, 0}, /* 0x94 */ {0, 0, 0}, /* 0x95 */ {0, 0, 0}, @@ -643,7 +613,7 @@ /* 0xFB */ {0, 0, 0}, /* 0xFC */ {0, 0, 0}, /* 0xFD */ {0, 0, 0}, -/* 0xFE */ {__cp950ext_encmap+348, 0x51, 0x68}, -/* 0xFF */ {__cp950ext_encmap+372, 0x0f, 0xe5}, +/* 0xFE */ {__cp950ext_encmap+342, 0x51, 0x68}, +/* 0xFF */ {__cp950ext_encmap+366, 0x5e, 0xe5}, }; |
From: Hye-Shik C. <pe...@us...> - 2003-06-19 17:49:03
|
perky 03/06/19 10:49:01 Added: . CHANGES NOTES.big5 Log: Fix some big5 mappings to give more roundtrip compatibility. Revision Changes Path 1.1 cjkcodecs/CHANGES Index: CHANGES =================================================================== Changes with CJKCodecs 1.0 *) Changes a few characters of a big5 codepoint mapping to cp950's rather than 0xfffd. (documented on NOTES.big5) 1.1 cjkcodecs/NOTES.big5 Index: NOTES.big5 =================================================================== big5 codec maps the following characters as cp950 does rather than following Unicode.org's mapping. BIG5 Unicode Description 0xA15A 0x2574 SPACING UNDERSCORE 0xA1C3 0xFFE3 SPACING HEAVY OVERSCORE 0xA1C5 0x02CD SPACING HEAVY UNDERSCORE 0xA1FE 0xFF0F LT DIAG UP RIGHT TO LOW LEFT 0xA240 0xFF3C LT DIAG UP LEFT TO LOW RIGHT 0xA2CC 0x5341 HANGZHOU NUMERAL TEN 0xA2CE 0x5345 HANGZHOU NUMERAL THIRTY Because unicode 0x5341, 0x5345 is mapped to another big5 codes already, a roundtrip compatibility is not guaranteed for them. |
From: Hye-Shik C. <pe...@us...> - 2003-06-16 19:13:45
|
perky 03/06/16 12:13:44 Modified: . README setup.py Log: Mark this version as 1.1.1 Revision Changes Path 1.4 +3 -3 iconvcodec/README Index: README =================================================================== RCS file: /cvsroot/koco/iconvcodec/README,v retrieving revision 1.3 retrieving revision 1.4 diff -u -r1.3 -r1.4 --- README 11 Jun 2003 12:08:40 -0000 1.3 +++ README 16 Jun 2003 19:13:44 -0000 1.4 @@ -1,8 +1,8 @@ -Python Iconv Codec version 1.1 -============================== +Python Iconv Codec version 1.1.1 +================================ Copyright(C) Hye-Shik Chang, 2003. -$Id: README,v 1.3 2003/06/11 12:08:40 perky Exp $ +$Id: README,v 1.4 2003/06/16 19:13:44 perky Exp $ 1.5 +2 -2 iconvcodec/setup.py Index: setup.py =================================================================== RCS file: /cvsroot/koco/iconvcodec/setup.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -r1.4 -r1.5 --- setup.py 11 Jun 2003 12:08:40 -0000 1.4 +++ setup.py 16 Jun 2003 19:13:44 -0000 1.5 @@ -25,7 +25,7 @@ # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # -# $Id: setup.py,v 1.4 2003/06/11 12:08:40 perky Exp $ +# $Id: setup.py,v 1.5 2003/06/16 19:13:44 perky Exp $ # import sys @@ -75,7 +75,7 @@ org_install_lib or self.install_purelib setup (name = "iconvcodec", - version = "1.1", + version = "1.1.1", author = "Hye-Shik Chang", author_email = "pe...@Fr...", cmdclass = {'install': Install}, |
From: Hye-Shik C. <pe...@us...> - 2003-06-16 19:12:46
|
perky 03/06/16 12:12:41 Modified: . _iconv_codec.c Log: Minor code clean ups Revision Changes Path 1.15 +43 -49 iconvcodec/_iconv_codec.c Index: _iconv_codec.c =================================================================== RCS file: /cvsroot/koco/iconvcodec/_iconv_codec.c,v retrieving revision 1.14 retrieving revision 1.15 diff -u -r1.14 -r1.15 --- _iconv_codec.c 12 Jun 2003 05:51:33 -0000 1.14 +++ _iconv_codec.c 16 Jun 2003 19:12:41 -0000 1.15 @@ -24,7 +24,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: _iconv_codec.c,v 1.14 2003/06/12 05:51:33 perky Exp $ + * $Id: _iconv_codec.c,v 1.15 2003/06/16 19:12:41 perky Exp $ */ #include "Python.h" @@ -127,10 +127,10 @@ const char *unicode_encoding; int unitype; size_t (*iconvwrap)( - iconv_t cd, iconv_arg2_t inbuf, size_t * inbytesleft, - char* *outbuf, size_t * outbytesleft); + iconv_t cd, iconv_arg2_t inbuf, size_t *inbytesleft, + char **outbuf, size_t *outbytesleft); size_t (*iconvwrap2)(iconv_t cd, IconvDecoderBuffer *buf, - int inleft, int outleft); + size_t inleft, size_t outleft); /* an alternative wrapper: for utf8 backend */ } IconvDecoderObject; @@ -187,9 +187,6 @@ { PyObject *v, *w; - if (unicode == NULL) - return NULL; - v = PyTuple_New(2); if (v == NULL) { Py_DECREF(unicode); @@ -225,7 +222,7 @@ } #define UTF8NEXTCHAR(p) \ - if (*(p) < 128) (p)++; \ + if (*(p) < 0x80) (p)++; \ else if (*(p) < 0xe0) (p) += 2; \ else if (*(p) < 0xf0) (p) += 3; \ else if (*(p) < 0xf8) (p) += 4; \ @@ -374,7 +371,7 @@ #if Py_UNICODE_SIZE == 2 "'%s' codec can't encode byte '\\u%04x' in position %d: %s", #else - "'%s' codec can't encode byte '\\u%08lx' in position %d: %s", + "'%s' codec can't encode byte '\\U%08lx' in position %d: %s", #endif self->encoding, *buf->inbuf, start, reason); else @@ -391,14 +388,11 @@ start, end, reason); if (buf->excobj == NULL) goto errorexit; - } else { - if (PyUnicodeEncodeError_SetStart(buf->excobj, start) != 0) - goto errorexit; - if (PyUnicodeEncodeError_SetEnd(buf->excobj, end) != 0) - goto errorexit; - if (PyUnicodeEncodeError_SetReason(buf->excobj, reason) != 0) - goto errorexit; - } + } else + if (PyUnicodeEncodeError_SetStart(buf->excobj, start) || + PyUnicodeEncodeError_SetEnd(buf->excobj, end) || + PyUnicodeEncodeError_SetReason(buf->excobj, reason)) + goto errorexit; if (errors == ERROR_STRICT) { PyCodec_StrictErrors(buf->excobj); @@ -739,9 +733,9 @@ iconvencoder_makestream(IconvEncoderObject *self, PyObject *args, PyObject *kwargs) { - static char *stream_kwarglist[] = {"stream", "errors", NULL}; - PyObject *stream; - char *errors = NULL; + static char *stream_kwarglist[] = {"stream", "errors", NULL}; + PyObject *stream; + char *errors = NULL; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|s:makestream", stream_kwarglist, &stream, &errors)) @@ -820,7 +814,7 @@ static int expand_decodebuffer(IconvDecoderBuffer *buf, int esize) { - int orgpos, orgsize; + int orgpos, orgsize; orgpos = (int)(buf->outbuf - PyUnicode_AS_UNICODE(buf->outobj)); orgsize = PyUnicode_GET_SIZE(buf->outobj); @@ -906,14 +900,11 @@ (size_t)(buf->inbuf_end - buf->inbuf_top), start, end, reason); if (buf->excobj == NULL) goto errorexit; - } else { - if (PyUnicodeDecodeError_SetStart(buf->excobj, start) != 0) - goto errorexit; - if (PyUnicodeDecodeError_SetEnd(buf->excobj, end) != 0) - goto errorexit; - if (PyUnicodeDecodeError_SetReason(buf->excobj, reason) != 0) - goto errorexit; - } + } else + if (PyUnicodeDecodeError_SetStart(buf->excobj, start) || + PyUnicodeDecodeError_SetEnd(buf->excobj, end) || + PyUnicodeDecodeError_SetReason(buf->excobj, reason)) + goto errorexit; if (errors == ERROR_STRICT) { PyCodec_StrictErrors(buf->excobj); @@ -983,7 +974,8 @@ } static size_t -iconvwrap_utf8(iconv_t ic, IconvDecoderBuffer *buf, int inleft, int outleft) +iconvwrap_utf8(iconv_t ic, IconvDecoderBuffer *buf, + size_t inleft, size_t outleft) { unsigned char *ubuf, *ubuf_top, *ubuf_end; size_t r; @@ -994,7 +986,7 @@ return -1; ubuf_top = ubuf; - if (inleft != -1) + if (inleft > 0) r = iconv(ic, (iconv_arg2_t)&(buf->inbuf), &inleft, (char **)&ubuf, &outleft); else @@ -1100,7 +1092,7 @@ iconvdecoder_conv(IconvDecoderObject *self, iconv_t ic, IconvDecoderBuffer *buf, PyObject *errors) { - size_t r, inleft, outleft; + size_t r, inleft, outleft; for (;;) { inleft = (size_t)(buf->inbuf_end - buf->inbuf); @@ -1133,7 +1125,7 @@ iconv_t ic, IconvDecoderBuffer *buf, PyObject *errors) { - size_t r, outleft; + size_t r, outleft; if (buf->inbuf < buf->inbuf_end) /* assumes as left by EINVAL */ if (iconvdecoder_error(self, ic, buf, errors, EINVAL, @@ -1147,7 +1139,7 @@ r = self->iconvwrap(ic, NULL, NULL, (char **)&(buf->outbuf), &outleft); else - r = self->iconvwrap2(ic, buf, -1, outleft); + r = self->iconvwrap2(ic, buf, 0, outleft); if (r == (size_t)-1) { if (errno == E2BIG) { @@ -1242,9 +1234,9 @@ iconvdecoder_makestream(IconvDecoderObject *self, PyObject *args, PyObject *kwargs) { - static char *stream_kwarglist[] = {"stream", "errors", NULL}; - PyObject *stream; - char *errors = NULL; + static char *stream_kwarglist[] = {"stream", "errors", NULL}; + PyObject *stream; + char *errors = NULL; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|s:makestream", stream_kwarglist, &stream, &errors)) @@ -1325,8 +1317,8 @@ const char *method, int sizehint) { IconvDecoderBuffer buf; - PyObject *cres; - int rsize, r, finalsize = 0; + PyObject *cres; + int rsize, r, finalsize = 0; if (sizehint == 0) return PyUnicode_FromUnicode(NULL, 0); @@ -1848,9 +1840,9 @@ iconvcodec_makeencoder(PyObject *spam, PyObject *args) { IconvEncoderObject *self; - iconv_t ic; - char *encoding; - int i; + iconv_t ic; + char *encoding; + int i; if (!PyArg_ParseTuple(args, "s:makeencoder", &encoding)) return NULL; @@ -1898,9 +1890,9 @@ iconvcodec_makedecoder(PyObject *spam, PyObject *args) { IconvDecoderObject *self; - iconv_t ic; - char *encoding; - int i; + iconv_t ic; + char *encoding; + int i; if (!PyArg_ParseTuple(args, "s:makedecoder", &encoding)) return NULL; @@ -1930,9 +1922,11 @@ self->unitype = um[i].type; switch (self->unitype) { case UNIINTERNAL_UCS: - self->iconvwrap = iconv; break; + self->iconvwrap = iconv; + break; case UNIINTERNAL_UCS_SWAPPED: - self->iconvwrap = iconvwrap_ucsswapped; break; + self->iconvwrap = iconvwrap_ucsswapped; + break; case UNIINTERNAL_UTF_8: self->iconvwrap = NULL; self->iconvwrap2= iconvwrap_utf8; @@ -1968,8 +1962,8 @@ static void detect_iconv_endian(void) { - iconv_t ic; - int i; + iconv_t ic; + int i; #define um uniinternal_modes for (i = 0; um[i].encoding[0]; i++) { |
From: Hye-Shik C. <pe...@us...> - 2003-06-16 17:04:20
|
perky 03/06/16 10:04:18 Modified: . iconv_codec.py Log: Remove evil debug print Revision Changes Path 1.7 +1 -3 iconvcodec/iconv_codec.py Index: iconv_codec.py =================================================================== RCS file: /cvsroot/koco/iconvcodec/iconv_codec.py,v retrieving revision 1.6 retrieving revision 1.7 diff -u -r1.6 -r1.7 --- iconv_codec.py 11 Jun 2003 12:03:59 -0000 1.6 +++ iconv_codec.py 16 Jun 2003 17:04:18 -0000 1.7 @@ -24,7 +24,7 @@ # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # -# $Id: iconv_codec.py,v 1.6 2003/06/11 12:03:59 perky Exp $ +# $Id: iconv_codec.py,v 1.7 2003/06/16 17:04:18 perky Exp $ from __future__ import nested_scopes from _iconv_codec import makeencoder, makedecoder @@ -76,8 +76,6 @@ try: encoder, decoder = makeencoder(enc), makedecoder(enc) except (LookupError, RuntimeError): - import traceback - traceback.print_exc() enc = enc.replace('_', '-') try: encoder, decoder = makeencoder(enc), makedecoder(enc) |
From: Hye-Shik C. <pe...@us...> - 2003-06-12 05:51:35
|
perky 03/06/11 22:51:33 Modified: . _iconv_codec.c Log: Remove unreachable wastes. Revision Changes Path 1.14 +1 -3 iconvcodec/_iconv_codec.c Index: _iconv_codec.c =================================================================== RCS file: /cvsroot/koco/iconvcodec/_iconv_codec.c,v retrieving revision 1.13 retrieving revision 1.14 diff -u -r1.13 -r1.14 --- _iconv_codec.c 12 Jun 2003 04:11:19 -0000 1.13 +++ _iconv_codec.c 12 Jun 2003 05:51:33 -0000 1.14 @@ -24,7 +24,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: _iconv_codec.c,v 1.13 2003/06/12 04:11:19 perky Exp $ + * $Id: _iconv_codec.c,v 1.14 2003/06/12 05:51:33 perky Exp $ */ #include "Python.h" @@ -775,7 +775,6 @@ "<IconvEncoder from='%s' to='%s' mode='%s'>", self->unicode_encoding, self->encoding, uniinternal_type_names[self->unitype]); - return PyString_FromString("<IconvEncoder>"); } #endif @@ -1279,7 +1278,6 @@ "<IconvDecoder from='%s' to='%s' mode='%s'>", self->encoding, self->unicode_encoding, uniinternal_type_names[self->unitype]); - return PyString_FromString("<IconvDecoder>"); } #endif |
From: Hye-Shik C. <pe...@us...> - 2003-06-12 04:11:21
|
perky 03/06/11 21:11:20 Modified: . THANKS _iconv_codec.c _iconv_codec_compat.h Log: Add a workaround for mingw32 compilation. Submitted by: Young-Sik Won <mon...@dr...> Revision Changes Path 1.2 +1 -0 iconvcodec/THANKS Index: THANKS =================================================================== RCS file: /cvsroot/koco/iconvcodec/THANKS,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- THANKS 11 Jun 2003 05:52:50 -0000 1.1 +++ THANKS 12 Jun 2003 04:11:19 -0000 1.2 @@ -1 +1,2 @@ Changwoo Ryu <cw...@de...> reported a bugfix +Young-Sik Won <mon...@dr...> compilation fix on mingw32 1.13 +13 -69 iconvcodec/_iconv_codec.c Index: _iconv_codec.c =================================================================== RCS file: /cvsroot/koco/iconvcodec/_iconv_codec.c,v retrieving revision 1.12 retrieving revision 1.13 diff -u -r1.12 -r1.13 --- _iconv_codec.c 11 Jun 2003 12:40:13 -0000 1.12 +++ _iconv_codec.c 12 Jun 2003 04:11:19 -0000 1.13 @@ -24,7 +24,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: _iconv_codec.c,v 1.12 2003/06/11 12:40:13 perky Exp $ + * $Id: _iconv_codec.c,v 1.13 2003/06/12 04:11:19 perky Exp $ */ #include "Python.h" @@ -757,13 +757,7 @@ {NULL, NULL}, }; -#ifdef OLD_STYLE_TYPE -static PyObject * -iconvencoder_getattr(PyObject *self, char *name) -{ - return Py_FindMethod(iconvencoder_methods, self, name); -} -#endif +OLD_GETATTR_DEF(iconvencoder) static void iconvencoder_dealloc(IconvEncoderObject *self) @@ -794,11 +788,7 @@ /* methods */ (destructor)iconvencoder_dealloc, /*tp_dealloc*/ 0, /*tp_print*/ -#ifdef OLD_STYLE_TYPE - iconvencoder_getattr, /*tp_getattr*/ -#else - 0, /*tp_getattr*/ -#endif + GETATTR_FUNC(iconvencoder), /*tp_getattr*/ 0, /*tp_setattr*/ 0, /*tp_compare*/ #ifndef LACKS_PYSTRING_FROMFORMAT @@ -812,11 +802,7 @@ 0, /*tp_hash*/ (ternaryfunc)iconvencoder_call, /*tp_call*/ 0, /*tp_str*/ -#ifdef OLD_STYLE_TYPE - 0, /*tp_getattro*/ -#else - PyObject_GenericGetAttr, /*tp_getattro*/ -#endif + GETATTRO_FUNC, /*tp_getattro*/ 0, /*tp_setattro*/ 0, /*tp_as_buffer*/ Py_TPFLAGS_DEFAULT, /*tp_flags*/ @@ -1275,13 +1261,7 @@ {NULL, NULL}, }; -#ifdef OLD_STYLE_TYPE -static PyObject * -iconvdecoder_getattr(PyObject *self, char *name) -{ - return Py_FindMethod(iconvdecoder_methods, self, name); -} -#endif +OLD_GETATTR_DEF(iconvdecoder) static void iconvdecoder_dealloc(IconvDecoderObject *self) @@ -1312,11 +1292,7 @@ /* methods */ (destructor)iconvdecoder_dealloc, /*tp_dealloc*/ 0, /*tp_print*/ -#ifdef OLD_STYLE_TYPE - iconvdecoder_getattr, /*tp_getattr*/ -#else - 0, /*tp_getattr*/ -#endif + GETATTR_FUNC(iconvdecoder), /*tp_getattr*/ 0, /*tp_setattr*/ 0, /*tp_compare*/ #ifndef LACKS_PYSTRING_FROMFORMAT @@ -1330,11 +1306,7 @@ 0, /*tp_hash*/ (ternaryfunc)iconvdecoder_call, /*tp_call*/ 0, /*tp_str*/ -#ifdef OLD_STYLE_TYPE - 0, /*tp_getattro*/ -#else - PyObject_GenericGetAttr, /*tp_getattro*/ -#endif + GETATTRO_FUNC, /*tp_getattro*/ 0, /*tp_setattro*/ 0, /*tp_as_buffer*/ Py_TPFLAGS_DEFAULT, /*tp_flags*/ @@ -1555,13 +1527,7 @@ {NULL, NULL}, }; -#ifdef OLD_STYLE_TYPE -static PyObject * -iconvstreamreader_getattr(PyObject *self, char *name) -{ - return Py_FindMethod(iconvstreamreader_methods, self, name); -} -#endif +OLD_GETATTR_DEF(iconvstreamreader) static void iconvstreamreader_dealloc(IconvStreamReaderObject *self) @@ -1597,11 +1563,7 @@ /* methods */ (destructor)iconvstreamreader_dealloc, /*tp_dealloc*/ 0, /*tp_print*/ -#ifdef OLD_STYLE_TYPE - iconvstreamreader_getattr, /*tp_getattr*/ -#else - 0, /*tp_getattr*/ -#endif + GETATTR_FUNC(iconvstreamreader),/*tp_getattr*/ 0, /*tp_setattr*/ 0, /*tp_compare*/ #ifndef LACKS_PYSTRING_FROMFORMAT @@ -1615,11 +1577,7 @@ 0, /*tp_hash*/ 0, /*tp_call*/ 0, /*tp_str*/ -#ifdef OLD_STYLE_TYPE - 0, /*tp_getattro*/ -#else - PyObject_GenericGetAttr, /*tp_getattro*/ -#endif + GETATTRO_FUNC, /*tp_getattro*/ 0, /*tp_setattro*/ 0, /*tp_as_buffer*/ Py_TPFLAGS_DEFAULT, /*tp_flags*/ @@ -1754,13 +1712,7 @@ {NULL, NULL}, }; -#ifdef OLD_STYLE_TYPE -static PyObject * -iconvstreamwriter_getattr(PyObject *self, char *name) -{ - return Py_FindMethod(iconvstreamwriter_methods, self, name); -} -#endif +OLD_GETATTR_DEF(iconvstreamwriter) static void iconvstreamwriter_dealloc(IconvStreamWriterObject *self) @@ -1796,11 +1748,7 @@ /* methods */ (destructor)iconvstreamwriter_dealloc, /*tp_dealloc*/ 0, /*tp_print*/ -#ifdef OLD_STYLE_TYPE - iconvstreamwriter_getattr, /*tp_getattr*/ -#else - 0, /*tp_getattr*/ -#endif + GETATTR_FUNC(iconvstreamwriter),/*tp_getattr*/ 0, /*tp_setattr*/ 0, /*tp_compare*/ #ifndef LACKS_PYSTRING_FROMFORMAT @@ -1814,11 +1762,7 @@ 0, /*tp_hash*/ 0, /*tp_call*/ 0, /*tp_str*/ -#ifdef OLD_STYLE_TYPE - 0, /*tp_getattro*/ -#else - PyObject_GenericGetAttr, /*tp_getattro*/ -#endif + GETATTRO_FUNC, /*tp_getattro*/ 0, /*tp_setattro*/ 0, /*tp_as_buffer*/ Py_TPFLAGS_DEFAULT, /*tp_flags*/ 1.3 +23 -1 iconvcodec/_iconv_codec_compat.h Index: _iconv_codec_compat.h =================================================================== RCS file: /cvsroot/koco/iconvcodec/_iconv_codec_compat.h,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- _iconv_codec_compat.h 20 Apr 2003 20:45:34 -0000 1.2 +++ _iconv_codec_compat.h 12 Jun 2003 04:11:19 -0000 1.3 @@ -24,7 +24,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: _iconv_codec_compat.h,v 1.2 2003/04/20 20:45:34 perky Exp $ + * $Id: _iconv_codec_compat.h,v 1.3 2003/06/12 04:11:19 perky Exp $ */ /* We don't support 2.0 and older */ @@ -51,7 +51,29 @@ # define LACKS_PYSTRING_FROMFORMAT 1 # define OLD_STYLE_TYPE 1 # define METH_NOARGS METH_VARARGS +# define OLD_GETATTR_DEF(prefix) \ + static PyObject * \ + prefix##_getattr(PyObject *self, char *name) \ + { \ + return Py_FindMethod(prefix##_methods, self, name); \ + } +# define GETATTR_FUNC(prefix) prefix##_getattr +# define GETATTRO_FUNC 0 +#else +# define OLD_GETATTR_DEF(prefix) +# define GETATTR_FUNC(prefix) 0 +# ifdef __MINGW32__ +__inline static PyObject* __dummy_getattro(PyObject* self, PyObject* args) +{ + return PyObject_GenericGetAttr(self, args); +} +# define GETATTRO_FUNC __dummy_getattro +# else +# define GETATTRO_FUNC PyObject_GenericGetAttr +# endif #endif + + /* * ex: ts=8 sts=4 et |
From: Hye-Shik C. <pe...@us...> - 2003-06-11 12:40:19
|
perky 03/06/11 05:40:14 Modified: . _iconv_codec.c Log: Remove craps Revision Changes Path 1.12 +2 -4 iconvcodec/_iconv_codec.c Index: _iconv_codec.c =================================================================== RCS file: /cvsroot/koco/iconvcodec/_iconv_codec.c,v retrieving revision 1.11 retrieving revision 1.12 diff -u -r1.11 -r1.12 --- _iconv_codec.c 11 Jun 2003 12:35:23 -0000 1.11 +++ _iconv_codec.c 11 Jun 2003 12:40:13 -0000 1.12 @@ -24,7 +24,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: _iconv_codec.c,v 1.11 2003/06/11 12:35:23 perky Exp $ + * $Id: _iconv_codec.c,v 1.12 2003/06/11 12:40:13 perky Exp $ */ #include "Python.h" @@ -2073,11 +2073,9 @@ void init_iconv_codec(void) { - PyObject *m; - detect_iconv_endian(); - m = Py_InitModule("_iconv_codec", _iconv_codec_methods); + Py_InitModule("_iconv_codec", _iconv_codec_methods); if (PyErr_Occurred()) Py_FatalError("can't initialize the _iconv_codec module"); |
From: Hye-Shik C. <pe...@us...> - 2003-06-11 12:35:31
|
perky 03/06/11 05:35:30 Modified: . _iconv_codec.c Log: Count 0xfe and 0xff as single-byte sequence. Revision Changes Path 1.11 +4 -3 iconvcodec/_iconv_codec.c Index: _iconv_codec.c =================================================================== RCS file: /cvsroot/koco/iconvcodec/_iconv_codec.c,v retrieving revision 1.10 retrieving revision 1.11 diff -u -r1.10 -r1.11 --- _iconv_codec.c 11 Jun 2003 12:30:38 -0000 1.10 +++ _iconv_codec.c 11 Jun 2003 12:35:23 -0000 1.11 @@ -24,7 +24,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: _iconv_codec.c,v 1.10 2003/06/11 12:30:38 perky Exp $ + * $Id: _iconv_codec.c,v 1.11 2003/06/11 12:35:23 perky Exp $ */ #include "Python.h" @@ -230,7 +230,8 @@ else if (*(p) < 0xf0) (p) += 3; \ else if (*(p) < 0xf8) (p) += 4; \ else if (*(p) < 0xfc) (p) += 5; \ - else (p) += 6; + else if (*(p) < 0xfe) (p) += 6; \ + else (p)++; static const unsigned char * skipchars_utf8(const unsigned char *st, int n) @@ -1066,7 +1067,7 @@ | ((ucs4_t)(ubuf[3] ^ 0x80) << 6) | (ucs4_t)(ubuf[4] ^ 0x80); ubuf += 5; - } else if (*ubuf < 0xff) { + } else if (*ubuf < 0xfe) { if (uleft < 6 || !((ubuf[1] ^ 0x80) < 0x40 && (ubuf[2] ^ 0x80) < 0x40 && (ubuf[3] ^ 0x80) < 0x40 && (ubuf[4] ^ 0x80) < 0x40 && (ubuf[5] ^ 0x80) < 0x40 && |
From: Hye-Shik C. <pe...@us...> - 2003-06-11 12:30:43
|
perky 03/06/11 05:30:40 Modified: . _iconv_codec.c Log: Use a correct format string on python-ucs4 Revision Changes Path 1.10 +5 -1 iconvcodec/_iconv_codec.c Index: _iconv_codec.c =================================================================== RCS file: /cvsroot/koco/iconvcodec/_iconv_codec.c,v retrieving revision 1.9 retrieving revision 1.10 diff -u -r1.9 -r1.10 --- _iconv_codec.c 11 Jun 2003 12:07:01 -0000 1.9 +++ _iconv_codec.c 11 Jun 2003 12:30:38 -0000 1.10 @@ -24,7 +24,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: _iconv_codec.c,v 1.9 2003/06/11 12:07:01 perky Exp $ + * $Id: _iconv_codec.c,v 1.10 2003/06/11 12:30:38 perky Exp $ */ #include "Python.h" @@ -370,7 +370,11 @@ #ifdef LACKS_ERROR_CALLBACKS if (esize == 1) PyErr_Format(PyExc_UnicodeError, +#if Py_UNICODE_SIZE == 2 "'%s' codec can't encode byte '\\u%04x' in position %d: %s", +#else + "'%s' codec can't encode byte '\\u%08lx' in position %d: %s", +#endif self->encoding, *buf->inbuf, start, reason); else PyErr_Format(PyExc_UnicodeError, |
From: Hye-Shik C. <pe...@us...> - 2003-06-11 12:08:43
|
perky 03/06/11 05:08:40 Modified: . README setup.py Log: It will be known as 1.1 Revision Changes Path 1.3 +2 -2 iconvcodec/README Index: README =================================================================== RCS file: /cvsroot/koco/iconvcodec/README,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- README 20 Apr 2003 22:18:02 -0000 1.2 +++ README 11 Jun 2003 12:08:40 -0000 1.3 @@ -1,8 +1,8 @@ -Python Iconv Codec version 1.0 +Python Iconv Codec version 1.1 ============================== Copyright(C) Hye-Shik Chang, 2003. -$Id: README,v 1.2 2003/04/20 22:18:02 perky Exp $ +$Id: README,v 1.3 2003/06/11 12:08:40 perky Exp $ 1.4 +2 -2 iconvcodec/setup.py Index: setup.py =================================================================== RCS file: /cvsroot/koco/iconvcodec/setup.py,v retrieving revision 1.3 retrieving revision 1.4 diff -u -r1.3 -r1.4 --- setup.py 20 Apr 2003 21:09:00 -0000 1.3 +++ setup.py 11 Jun 2003 12:08:40 -0000 1.4 @@ -25,7 +25,7 @@ # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # -# $Id: setup.py,v 1.3 2003/04/20 21:09:00 perky Exp $ +# $Id: setup.py,v 1.4 2003/06/11 12:08:40 perky Exp $ # import sys @@ -75,7 +75,7 @@ org_install_lib or self.install_purelib setup (name = "iconvcodec", - version = "1.0", + version = "1.1", author = "Hye-Shik Chang", author_email = "pe...@Fr...", cmdclass = {'install': Install}, |
From: Hye-Shik C. <pe...@us...> - 2003-06-11 12:07:02
|
perky 03/06/11 05:07:01 Modified: . _iconv_codec.c Log: Use ucs4_t than Py_UNICODE in internal calculations Revision Changes Path 1.9 +21 -21 iconvcodec/_iconv_codec.c Index: _iconv_codec.c =================================================================== RCS file: /cvsroot/koco/iconvcodec/_iconv_codec.c,v retrieving revision 1.8 retrieving revision 1.9 diff -u -r1.8 -r1.9 --- _iconv_codec.c 11 Jun 2003 12:01:57 -0000 1.8 +++ _iconv_codec.c 11 Jun 2003 12:07:01 -0000 1.9 @@ -24,7 +24,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: _iconv_codec.c,v 1.8 2003/06/11 12:01:57 perky Exp $ + * $Id: _iconv_codec.c,v 1.9 2003/06/11 12:07:01 perky Exp $ */ #include "Python.h" @@ -1027,17 +1027,17 @@ } else if (*ubuf < 0xe0) { if (uleft < 2 || !((ubuf[1] ^ 0x80) < 0x40)) goto ilseq; - code = ((Py_UNICODE)(ubuf[0] & 0x1f) << 6) - | (Py_UNICODE)(ubuf[1] ^ 0x80); + code = ((ucs4_t)(ubuf[0] & 0x1f) << 6) + | (ucs4_t)(ubuf[1] ^ 0x80); ubuf += 2; } else if (*ubuf < 0xf0) { if (uleft < 3 || !((ubuf[1] ^ 0x80) < 0x40 && (ubuf[2] ^ 0x80) < 0x40 && (ubuf[0] >= 0xe1 || ubuf[1] >= 0xa0))) goto ilseq; - code = ((Py_UNICODE)(ubuf[0] & 0x0f) << 12) - | ((Py_UNICODE)(ubuf[1] ^ 0x80) << 6) - | (Py_UNICODE)(ubuf[2] ^ 0x80); + code = ((ucs4_t)(ubuf[0] & 0x0f) << 12) + | ((ucs4_t)(ubuf[1] ^ 0x80) << 6) + | (ucs4_t)(ubuf[2] ^ 0x80); ubuf += 3; } else if (*ubuf < 0xf8) { @@ -1045,10 +1045,10 @@ (ubuf[2] ^ 0x80) < 0x40 && (ubuf[3] ^ 0x80) < 0x40 && (ubuf[0] >= 0xf1 || ubuf[1] >= 0x90))) goto ilseq; - code = ((Py_UNICODE)(ubuf[0] & 0x07) << 18) - | ((Py_UNICODE)(ubuf[1] ^ 0x80) << 12) - | ((Py_UNICODE)(ubuf[2] ^ 0x80) << 6) - | (Py_UNICODE)(ubuf[3] ^ 0x80); + code = ((ucs4_t)(ubuf[0] & 0x07) << 18) + | ((ucs4_t)(ubuf[1] ^ 0x80) << 12) + | ((ucs4_t)(ubuf[2] ^ 0x80) << 6) + | (ucs4_t)(ubuf[3] ^ 0x80); ubuf += 4; } else if (*ubuf < 0xfc) { if (uleft < 5 || !((ubuf[1] ^ 0x80) < 0x40 && @@ -1056,11 +1056,11 @@ (ubuf[4] ^ 0x80) < 0x40 && (ubuf[0] >= 0xf9 || ubuf[1] >= 0x88))) goto ilseq; - code = ((Py_UNICODE)(ubuf[0] & 0x03) << 24) - | ((Py_UNICODE)(ubuf[1] ^ 0x80) << 18) - | ((Py_UNICODE)(ubuf[2] ^ 0x80) << 12) - | ((Py_UNICODE)(ubuf[3] ^ 0x80) << 6) - | (Py_UNICODE)(ubuf[4] ^ 0x80); + code = ((ucs4_t)(ubuf[0] & 0x03) << 24) + | ((ucs4_t)(ubuf[1] ^ 0x80) << 18) + | ((ucs4_t)(ubuf[2] ^ 0x80) << 12) + | ((ucs4_t)(ubuf[3] ^ 0x80) << 6) + | (ucs4_t)(ubuf[4] ^ 0x80); ubuf += 5; } else if (*ubuf < 0xff) { if (uleft < 6 || !((ubuf[1] ^ 0x80) < 0x40 && @@ -1068,12 +1068,12 @@ (ubuf[4] ^ 0x80) < 0x40 && (ubuf[5] ^ 0x80) < 0x40 && (ubuf[0] >= 0xfd || ubuf[1] >= 0x84))) goto ilseq; - code = ((Py_UNICODE)(ubuf[0] & 0x01) << 30) - | ((Py_UNICODE)(ubuf[1] ^ 0x80) << 24) - | ((Py_UNICODE)(ubuf[2] ^ 0x80) << 18) - | ((Py_UNICODE)(ubuf[3] ^ 0x80) << 12) - | ((Py_UNICODE)(ubuf[4] ^ 0x80) << 6) - | (Py_UNICODE)(ubuf[5] ^ 0x80); + code = ((ucs4_t)(ubuf[0] & 0x01) << 30) + | ((ucs4_t)(ubuf[1] ^ 0x80) << 24) + | ((ucs4_t)(ubuf[2] ^ 0x80) << 18) + | ((ucs4_t)(ubuf[3] ^ 0x80) << 12) + | ((ucs4_t)(ubuf[4] ^ 0x80) << 6) + | (ucs4_t)(ubuf[5] ^ 0x80); ubuf += 6; } else goto ilseq; |
From: Hye-Shik C. <pe...@us...> - 2003-06-11 12:04:00
|
perky 03/06/11 05:03:59 Modified: . iconv_codec.py Log: Remove another debug print Revision Changes Path 1.6 +1 -2 iconvcodec/iconv_codec.py Index: iconv_codec.py =================================================================== RCS file: /cvsroot/koco/iconvcodec/iconv_codec.py,v retrieving revision 1.5 retrieving revision 1.6 diff -u -r1.5 -r1.6 --- iconv_codec.py 11 Jun 2003 11:12:52 -0000 1.5 +++ iconv_codec.py 11 Jun 2003 12:03:59 -0000 1.6 @@ -24,7 +24,7 @@ # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # -# $Id: iconv_codec.py,v 1.5 2003/06/11 11:12:52 perky Exp $ +# $Id: iconv_codec.py,v 1.6 2003/06/11 12:03:59 perky Exp $ from __future__ import nested_scopes from _iconv_codec import makeencoder, makedecoder @@ -82,7 +82,6 @@ try: encoder, decoder = makeencoder(enc), makedecoder(enc) except (LookupError, RuntimeError): - print "Error" return None class IconvCodec(codecs.Codec): |
From: Hye-Shik C. <pe...@us...> - 2003-06-11 12:01:59
|
perky 03/06/11 05:01:58 Modified: . _iconv_codec.c Log: Utilize UCS-2 Surrogate-Pair to support ISO-10646 extended planes Revision Changes Path 1.8 +67 -37 iconvcodec/_iconv_codec.c Index: _iconv_codec.c =================================================================== RCS file: /cvsroot/koco/iconvcodec/_iconv_codec.c,v retrieving revision 1.7 retrieving revision 1.8 diff -u -r1.7 -r1.8 --- _iconv_codec.c 11 Jun 2003 11:06:50 -0000 1.7 +++ _iconv_codec.c 11 Jun 2003 12:01:57 -0000 1.8 @@ -24,7 +24,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: _iconv_codec.c,v 1.7 2003/06/11 11:06:50 perky Exp $ + * $Id: _iconv_codec.c,v 1.8 2003/06/11 12:01:57 perky Exp $ */ #include "Python.h" @@ -57,6 +57,14 @@ typedef const char **iconv_arg2_t; #endif +#ifndef ucs4_t +# ifdef uint32_t +typedef uint32_t ucs4_t; +# else +typedef unsigned long ucs4_t; +# endif +#endif + #define ERROR_STRICT (PyObject *)(1) #define ERROR_IGNORE (PyObject *)(2) #define ERROR_REPLACE (PyObject *)(3) @@ -570,22 +578,30 @@ return -1; buf->rinbuf_top = buf->rinbuf = rinbuf; for (; buf->inbuf < buf->inbuf_end; buf->inbuf++) { - Py_UNICODE code = *buf->inbuf; + ucs4_t code = *buf->inbuf; int size; if (code < 0x80) size = 1; else if (code < 0x800) size = 2; + else { #if Py_UNICODE_SIZE == 2 - else size = 3; /* XXX put surrogate characters for EMP! */ -#else - else if (code < 0x10000) size = 3; - else if (code < 0x200000) size = 4; - else if (code < 0x4000000) size = 5; - else size = 6; -#endif + /* Unfold a Surrogate-Pair */ + if (code >= 0xd800 && code < 0xdc00 && + buf->inbuf+1 < buf->inbuf_end && + buf->inbuf[1] >= 0xdc00 && + buf->inbuf[1] < 0xe000) { + code = 0x10000 + ((code - 0xd800) << 10) + + (buf->inbuf[1] - 0xdc00); + buf->inbuf++; + } +#endif + if (code < 0x10000) size = 3; + else if (code < 0x200000) size = 4; + else if (code < 0x4000000) size = 5; + else size = 6; + } switch (size) { -#if Py_UNICODE_SIZE == 4 case 6: rinbuf[5] = 0x80 | (code & 0x3f); code = code >> 6; @@ -601,7 +617,6 @@ code = code >> 6; code |= 0x10000; /* FALLTHROUGH */ -#endif case 3: rinbuf[2] = 0x80 | (code & 0x3f); code = code >> 6; @@ -1000,10 +1015,11 @@ if (nch > 0) RESERVE_DECODEBUFFER(buf, nch) for (ubuf = ubuf_top; ubuf < ubuf_end;) { - int uleft = (int)(ubuf_end - ubuf); + int uleft = (int)(ubuf_end - ubuf); + ucs4_t code; if (*ubuf < 0x80) { - *buf->outbuf++ = (unsigned char)*ubuf++; + code = (unsigned char)*ubuf++; } else if (*ubuf < 0xc2) { ilseq: PyErr_SetString(PyExc_RuntimeError, "iconv returned illegal utf-8 sequence"); @@ -1011,32 +1027,28 @@ } else if (*ubuf < 0xe0) { if (uleft < 2 || !((ubuf[1] ^ 0x80) < 0x40)) goto ilseq; - *buf->outbuf++ = ((Py_UNICODE)(ubuf[0] & 0x1f) << 6) - | (Py_UNICODE)(ubuf[1] ^ 0x80); + code = ((Py_UNICODE)(ubuf[0] & 0x1f) << 6) + | (Py_UNICODE)(ubuf[1] ^ 0x80); ubuf += 2; } else if (*ubuf < 0xf0) { if (uleft < 3 || !((ubuf[1] ^ 0x80) < 0x40 && (ubuf[2] ^ 0x80) < 0x40 && (ubuf[0] >= 0xe1 || ubuf[1] >= 0xa0))) goto ilseq; - *buf->outbuf++ = ((Py_UNICODE)(ubuf[0] & 0x0f) << 12) - | ((Py_UNICODE)(ubuf[1] ^ 0x80) << 6) - | (Py_UNICODE)(ubuf[2] ^ 0x80); + code = ((Py_UNICODE)(ubuf[0] & 0x0f) << 12) + | ((Py_UNICODE)(ubuf[1] ^ 0x80) << 6) + | (Py_UNICODE)(ubuf[2] ^ 0x80); ubuf += 3; } -#if Py_UNICODE_SIZE == 2 - else /* XXX: put surrogate characters here! */ - goto ilseq; -#else else if (*ubuf < 0xf8) { if (uleft < 4 || !((ubuf[1] ^ 0x80) < 0x40 && (ubuf[2] ^ 0x80) < 0x40 && (ubuf[3] ^ 0x80) < 0x40 && (ubuf[0] >= 0xf1 || ubuf[1] >= 0x90))) goto ilseq; - *buf->outbuf++ = ((Py_UNICODE)(ubuf[0] & 0x07) << 18) - | ((Py_UNICODE)(ubuf[1] ^ 0x80) << 12) - | ((Py_UNICODE)(ubuf[2] ^ 0x80) << 6) - | (Py_UNICODE)(ubuf[3] ^ 0x80); + code = ((Py_UNICODE)(ubuf[0] & 0x07) << 18) + | ((Py_UNICODE)(ubuf[1] ^ 0x80) << 12) + | ((Py_UNICODE)(ubuf[2] ^ 0x80) << 6) + | (Py_UNICODE)(ubuf[3] ^ 0x80); ubuf += 4; } else if (*ubuf < 0xfc) { if (uleft < 5 || !((ubuf[1] ^ 0x80) < 0x40 && @@ -1044,11 +1056,11 @@ (ubuf[4] ^ 0x80) < 0x40 && (ubuf[0] >= 0xf9 || ubuf[1] >= 0x88))) goto ilseq; - *buf->outbuf++ = ((Py_UNICODE)(ubuf[0] & 0x03) << 24) - | ((Py_UNICODE)(ubuf[1] ^ 0x80) << 18) - | ((Py_UNICODE)(ubuf[2] ^ 0x80) << 12) - | ((Py_UNICODE)(ubuf[3] ^ 0x80) << 6) - | (Py_UNICODE)(ubuf[4] ^ 0x80); + code = ((Py_UNICODE)(ubuf[0] & 0x03) << 24) + | ((Py_UNICODE)(ubuf[1] ^ 0x80) << 18) + | ((Py_UNICODE)(ubuf[2] ^ 0x80) << 12) + | ((Py_UNICODE)(ubuf[3] ^ 0x80) << 6) + | (Py_UNICODE)(ubuf[4] ^ 0x80); ubuf += 5; } else if (*ubuf < 0xff) { if (uleft < 6 || !((ubuf[1] ^ 0x80) < 0x40 && @@ -1056,16 +1068,34 @@ (ubuf[4] ^ 0x80) < 0x40 && (ubuf[5] ^ 0x80) < 0x40 && (ubuf[0] >= 0xfd || ubuf[1] >= 0x84))) goto ilseq; - *buf->outbuf++ = ((Py_UNICODE)(ubuf[0] & 0x01) << 30) - | ((Py_UNICODE)(ubuf[1] ^ 0x80) << 24) - | ((Py_UNICODE)(ubuf[2] ^ 0x80) << 18) - | ((Py_UNICODE)(ubuf[3] ^ 0x80) << 12) - | ((Py_UNICODE)(ubuf[4] ^ 0x80) << 6) - | (Py_UNICODE)(ubuf[5] ^ 0x80); + code = ((Py_UNICODE)(ubuf[0] & 0x01) << 30) + | ((Py_UNICODE)(ubuf[1] ^ 0x80) << 24) + | ((Py_UNICODE)(ubuf[2] ^ 0x80) << 18) + | ((Py_UNICODE)(ubuf[3] ^ 0x80) << 12) + | ((Py_UNICODE)(ubuf[4] ^ 0x80) << 6) + | (Py_UNICODE)(ubuf[5] ^ 0x80); ubuf += 6; } else goto ilseq; + +#if Py_UNICODE_SIZE == 2 + if (code >= 0x10000) { + if (code >= 0x110000) + goto ilseq; + + if (buf->outbuf_end <= buf->outbuf + 1) { + RESERVE_DECODEBUFFER(buf, -1) + } + *buf->outbuf++ = 0xd800 + ((code - 0x10000) >> 10); + *buf->outbuf++ = 0xdc00 + ((code - 0x10000) & 0x3ff); + } else #endif + { + if (buf->outbuf_end <= buf->outbuf) { + RESERVE_DECODEBUFFER(buf, -1) + } + *buf->outbuf++ = (Py_UNICODE)code; + } } PyMem_Del(ubuf_top); |
From: Hye-Shik C. <pe...@us...> - 2003-06-11 11:12:53
|
perky 03/06/11 04:12:52 Modified: . iconv_codec.py Log: Remove debug print Revision Changes Path 1.5 +1 -2 iconvcodec/iconv_codec.py Index: iconv_codec.py =================================================================== RCS file: /cvsroot/koco/iconvcodec/iconv_codec.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -r1.4 -r1.5 --- iconv_codec.py 11 Jun 2003 11:05:09 -0000 1.4 +++ iconv_codec.py 11 Jun 2003 11:12:52 -0000 1.5 @@ -24,7 +24,7 @@ # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # -# $Id: iconv_codec.py,v 1.4 2003/06/11 11:05:09 perky Exp $ +# $Id: iconv_codec.py,v 1.5 2003/06/11 11:12:52 perky Exp $ from __future__ import nested_scopes from _iconv_codec import makeencoder, makedecoder @@ -72,7 +72,6 @@ def lookup(enc): if enc.startswith('iconvcodec.'): enc = enc[11:] - print ">>", enc enc = aliases.get(enc.replace('-', '_'), enc) try: encoder, decoder = makeencoder(enc), makedecoder(enc) |
From: Hye-Shik C. <pe...@us...> - 2003-06-11 11:06:53
|
perky 03/06/11 04:06:50 Modified: . _iconv_codec.c Log: Use 'UTF-16' internal encodings when Py_UNICODE_SIZE == 2. Revision Changes Path 1.7 +10 -6 iconvcodec/_iconv_codec.c Index: _iconv_codec.c =================================================================== RCS file: /cvsroot/koco/iconvcodec/_iconv_codec.c,v retrieving revision 1.6 retrieving revision 1.7 diff -u -r1.6 -r1.7 --- _iconv_codec.c 11 Jun 2003 05:55:28 -0000 1.6 +++ _iconv_codec.c 11 Jun 2003 11:06:50 -0000 1.7 @@ -24,7 +24,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: _iconv_codec.c,v 1.6 2003/06/11 05:55:28 perky Exp $ + * $Id: _iconv_codec.c,v 1.7 2003/06/11 11:06:50 perky Exp $ */ #include "Python.h" @@ -33,11 +33,9 @@ #ifdef Py_USING_UNICODE # if Py_UNICODE_SIZE == 2 -# define UCS_N "UCS-2" # define MBENCODED_LENGTH_MAX 4 # define _Py_UNICODE_SWAP(c) (Py_UNICODE)((c)>>8 | (c)<<8) # elif Py_UNICODE_SIZE == 4 -# define UCS_N "UCS-4" # define MBENCODED_LENGTH_MAX 6 # define _Py_UNICODE_SWAP(c) (Py_UNICODE)((c)>>24 | \ ((c)&0x00ff0000)>>8 | \ @@ -81,9 +79,15 @@ const char *encoding; uniinternal_type_t type; } uniinternal_modes[] = { - {UCS_N "-INTERNAL", UNIINTERNAL_UCS}, /* GNU libiconv, FreeBSD, APR */ - {UCS_N ENDIANSUFX, UNIINTERNAL_UCS}, /* SunOS */ - {UCS_N, UNIINTERNAL_UCS}, /* GLIBC */ +#if Py_UNICODE_SIZE == 2 +/* Py_UNICODE* may contain surrogate characters */ + {"UTF-16" ENDIANSUFX, UNIINTERNAL_UCS}, + {"UTF16" ENDIANSUFX, UNIINTERNAL_UCS}, +#else + {"UCS-4-INTERNAL", UNIINTERNAL_UCS}, /* GNU libiconv, FreeBSD, APR */ + {"UCS-4" ENDIANSUFX, UNIINTERNAL_UCS}, /* SunOS */ + {"UCS-4", UNIINTERNAL_UCS}, /* GLIBC */ +#endif {"UTF-8", UNIINTERNAL_UTF_8}, /* SunOS(CJK) */ {"\0", UNIINTERNAL_DONTUSE}, }; |
From: Hye-Shik C. <pe...@us...> - 2003-06-11 11:05:12
|
perky 03/06/11 04:05:10 Modified: . iconv_codec.py Log: Correct len('iconvcodec.') Revision Changes Path 1.4 +6 -2 iconvcodec/iconv_codec.py Index: iconv_codec.py =================================================================== RCS file: /cvsroot/koco/iconvcodec/iconv_codec.py,v retrieving revision 1.3 retrieving revision 1.4 diff -u -r1.3 -r1.4 --- iconv_codec.py 11 Jun 2003 10:42:12 -0000 1.3 +++ iconv_codec.py 11 Jun 2003 11:05:09 -0000 1.4 @@ -24,7 +24,7 @@ # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # -# $Id: iconv_codec.py,v 1.3 2003/06/11 10:42:12 perky Exp $ +# $Id: iconv_codec.py,v 1.4 2003/06/11 11:05:09 perky Exp $ from __future__ import nested_scopes from _iconv_codec import makeencoder, makedecoder @@ -71,15 +71,19 @@ def lookup(enc): if enc.startswith('iconvcodec.'): - enc = enc[6:] + enc = enc[11:] + print ">>", enc enc = aliases.get(enc.replace('-', '_'), enc) try: encoder, decoder = makeencoder(enc), makedecoder(enc) except (LookupError, RuntimeError): + import traceback + traceback.print_exc() enc = enc.replace('_', '-') try: encoder, decoder = makeencoder(enc), makedecoder(enc) except (LookupError, RuntimeError): + print "Error" return None class IconvCodec(codecs.Codec): |
From: Hye-Shik C. <pe...@us...> - 2003-06-11 10:42:15
|
perky 03/06/11 03:42:12 Modified: . iconv_codec.py Log: Enable specific module name starting with 'iconvcodec.' Revision Changes Path 1.3 +3 -1 iconvcodec/iconv_codec.py Index: iconv_codec.py =================================================================== RCS file: /cvsroot/koco/iconvcodec/iconv_codec.py,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- iconv_codec.py 20 Apr 2003 20:45:34 -0000 1.2 +++ iconv_codec.py 11 Jun 2003 10:42:12 -0000 1.3 @@ -24,7 +24,7 @@ # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # -# $Id: iconv_codec.py,v 1.2 2003/04/20 20:45:34 perky Exp $ +# $Id: iconv_codec.py,v 1.3 2003/06/11 10:42:12 perky Exp $ from __future__ import nested_scopes from _iconv_codec import makeencoder, makedecoder @@ -70,6 +70,8 @@ }) def lookup(enc): + if enc.startswith('iconvcodec.'): + enc = enc[6:] enc = aliases.get(enc.replace('-', '_'), enc) try: encoder, decoder = makeencoder(enc), makedecoder(enc) |
From: Hye-Shik C. <pe...@us...> - 2003-06-11 09:14:15
|
perky 03/06/11 02:14:12 Modified: . THANKS Log: Thanks to Young-Sik Won for his mingw32 compilation fixes. Revision Changes Path 1.2 +1 -0 cjkcodecs/THANKS Index: THANKS =================================================================== RCS file: /cvsroot/koco/cjkcodecs/THANKS,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- THANKS 10 Jun 2003 11:25:52 -0000 1.1 +++ THANKS 11 Jun 2003 09:14:10 -0000 1.2 @@ -1,2 +1,3 @@ Kazuhiro ABE <abe...@ni...> advice on JIS X 0213 Yoshiki Ohshima <Yos...@ac...> advice on JIS X 0213 +Young-Sik Won <mon...@dr...> mingw32 compilation fix |
From: Hye-Shik C. <pe...@us...> - 2003-06-11 05:55:30
|
perky 03/06/10 22:55:29 Modified: . _iconv_codec.c Log: Sigh, it's not cjkcodecs here! :S Revision Changes Path 1.6 +2 -2 iconvcodec/_iconv_codec.c Index: _iconv_codec.c =================================================================== RCS file: /cvsroot/koco/iconvcodec/_iconv_codec.c,v retrieving revision 1.5 retrieving revision 1.6 diff -u -r1.5 -r1.6 --- _iconv_codec.c 11 Jun 2003 05:51:11 -0000 1.5 +++ _iconv_codec.c 11 Jun 2003 05:55:28 -0000 1.6 @@ -24,7 +24,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: _iconv_codec.c,v 1.5 2003/06/11 05:51:11 perky Exp $ + * $Id: _iconv_codec.c,v 1.6 2003/06/11 05:55:28 perky Exp $ */ #include "Python.h" @@ -1060,7 +1060,7 @@ | (Py_UNICODE)(ubuf[5] ^ 0x80); ubuf += 6; } else - return 1; + goto ilseq; #endif } |
From: Hye-Shik C. <pe...@us...> - 2003-06-11 05:52:52
|
perky 03/06/10 22:52:51 Added: . THANKS Log: Fix unbalanced parenthesis when Py_UNICODE_SIZE == 4. Submitted by: Changwoo Ryu <cw...@de...> Revision Changes Path 1.1 iconvcodec/THANKS Index: THANKS =================================================================== Changwoo Ryu <cw...@de...> reported a bugfix |
From: Hye-Shik C. <pe...@us...> - 2003-06-11 05:51:13
|
perky 03/06/10 22:51:11 Modified: . MANIFEST.in _iconv_codec.c Log: Fix unbalanced parenthesis when Py_UNICODE_SIZE == 4. Submitted by: Changwoo Ryu <cw...@de...> Revision Changes Path 1.2 +2 -2 iconvcodec/MANIFEST.in Index: MANIFEST.in =================================================================== RCS file: /cvsroot/koco/iconvcodec/MANIFEST.in,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- MANIFEST.in 20 Apr 2003 21:09:00 -0000 1.1 +++ MANIFEST.in 11 Jun 2003 05:51:11 -0000 1.2 @@ -1,4 +1,4 @@ -# $Id: MANIFEST.in,v 1.1 2003/04/20 21:09:00 perky Exp $ +# $Id: MANIFEST.in,v 1.2 2003/06/11 05:51:11 perky Exp $ -include COPYRIGHT MANIFEST.in AUTHORS README +include COPYRIGHT MANIFEST.in AUTHORS README THANKS include test_iconv_codec.py _iconv_codec_compat.h 1.5 +11 -8 iconvcodec/_iconv_codec.c Index: _iconv_codec.c =================================================================== RCS file: /cvsroot/koco/iconvcodec/_iconv_codec.c,v retrieving revision 1.4 retrieving revision 1.5 diff -u -r1.4 -r1.5 --- _iconv_codec.c 20 Apr 2003 21:40:05 -0000 1.4 +++ _iconv_codec.c 11 Jun 2003 05:51:11 -0000 1.5 @@ -24,7 +24,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: _iconv_codec.c,v 1.4 2003/04/20 21:40:05 perky Exp $ + * $Id: _iconv_codec.c,v 1.5 2003/06/11 05:51:11 perky Exp $ */ #include "Python.h" @@ -572,7 +572,7 @@ if (code < 0x80) size = 1; else if (code < 0x800) size = 2; #if Py_UNICODE_SIZE == 2 - else size = 3; + else size = 3; /* XXX put surrogate characters for EMP! */ #else else if (code < 0x10000) size = 3; else if (code < 0x200000) size = 4; @@ -1000,7 +1000,7 @@ if (*ubuf < 0x80) { *buf->outbuf++ = (unsigned char)*ubuf++; - } else if (*ubuf < 0xc2 || *ubuf == 0xff) { + } else if (*ubuf < 0xc2) { ilseq: PyErr_SetString(PyExc_RuntimeError, "iconv returned illegal utf-8 sequence"); goto errorexit; @@ -1021,10 +1021,10 @@ ubuf += 3; } #if Py_UNICODE_SIZE == 2 - else + else /* XXX: put surrogate characters here! */ goto ilseq; #else - } else if (*ubuf < 0xf8) { + else if (*ubuf < 0xf8) { if (uleft < 4 || !((ubuf[1] ^ 0x80) < 0x40 && (ubuf[2] ^ 0x80) < 0x40 && (ubuf[3] ^ 0x80) < 0x40 && (ubuf[0] >= 0xf1 || ubuf[1] >= 0x90))) @@ -1046,7 +1046,7 @@ | ((Py_UNICODE)(ubuf[3] ^ 0x80) << 6) | (Py_UNICODE)(ubuf[4] ^ 0x80); ubuf += 5; - } else { /* 0xff is excluded above */ + } else if (*ubuf < 0xff) { if (uleft < 6 || !((ubuf[1] ^ 0x80) < 0x40 && (ubuf[2] ^ 0x80) < 0x40 && (ubuf[3] ^ 0x80) < 0x40 && (ubuf[4] ^ 0x80) < 0x40 && (ubuf[5] ^ 0x80) < 0x40 && @@ -1059,7 +1059,8 @@ | ((Py_UNICODE)(ubuf[4] ^ 0x80) << 6) | (Py_UNICODE)(ubuf[5] ^ 0x80); ubuf += 6; - } + } else + return 1; #endif } @@ -2033,9 +2034,11 @@ void init_iconv_codec(void) { + PyObject *m; + detect_iconv_endian(); - Py_InitModule("_iconv_codec", _iconv_codec_methods); + m = Py_InitModule("_iconv_codec", _iconv_codec_methods); if (PyErr_Occurred()) Py_FatalError("can't initialize the _iconv_codec module"); |
From: Hye-Shik C. <pe...@us...> - 2003-06-10 11:31:36
|
perky 03/06/10 04:31:35 Modified: . ROADMAP Log: Include JIS X 0213 encodings in 1.0 and hkscs, Mac encodings on 1.1 Revision Changes Path 1.4 +13 -13 cjkcodecs/ROADMAP Index: ROADMAP =================================================================== RCS file: /cvsroot/koco/cjkcodecs/ROADMAP,v retrieving revision 1.3 retrieving revision 1.4 diff -u -r1.3 -r1.4 --- ROADMAP 5 Jun 2003 09:58:51 -0000 1.3 +++ ROADMAP 10 Jun 2003 11:31:35 -0000 1.4 @@ -1,26 +1,26 @@ -Scheduled new encodings for each versions +Planned Updates -Authority 0.9 1.0 1.1 1.2 -============================================================================== +Authority 0.9 1.0 1.1 +================================================================== China (PRC) gb2312 iso-2022-cn gbk(cp936) iso-2022-cn-ext gb18030 hz -Hong Kong hkscs +Hong Kong hkscs -Japan shift-jis iso-2022-jp-2 euc-jisx0213 iso-2022-int-1 - euc-jp shift-jisx0213 mac_japanese - cp932 iso-2022-jp-3 - iso-2022-jp +Japan shift-jis iso-2022-jp-2 iso-2022-int-1 + euc-jp iso-2022-jp-3 mac_japanese + cp932 euc-jisx0213 + iso-2022-jp shift-jisx0213 iso-2022-jp-1 -Korea (ROK) euc-kr (ksx1001:2002) mac_korean - cp949(uhc) unijohab - johab +Korea (ROK) euc-kr (ksx1001:2002) + cp949(uhc) mac_korean + johab unijohab iso-2022-kr -Korea (DPRK) euc-kp +Korea (DPRK) euc-kp Taiwan big5 iso-2022-cn cp950 iso-2022-cn-ext @@ -30,5 +30,5 @@ utf-16 -# $Id: ROADMAP,v 1.3 2003/06/05 09:58:51 perky Exp $ +# $Id: ROADMAP,v 1.4 2003/06/10 11:31:35 perky Exp $ # ex: ts=8 sts=4 et |
From: Hye-Shik C. <pe...@us...> - 2003-06-10 11:25:54
|
perky 03/06/10 04:25:53 Modified: . MANIFEST.in Added: . THANKS Log: Thanks to Kazuhiro ABE and Yoshiki Ohshima for their advices on JIS X 0213 Revision Changes Path 1.3 +2 -2 cjkcodecs/MANIFEST.in Index: MANIFEST.in =================================================================== RCS file: /cvsroot/koco/cjkcodecs/MANIFEST.in,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- MANIFEST.in 6 Jun 2003 07:01:14 -0000 1.2 +++ MANIFEST.in 10 Jun 2003 11:25:52 -0000 1.3 @@ -1,6 +1,6 @@ -# $Id: MANIFEST.in,v 1.2 2003/06/06 07:01:14 perky Exp $ +# $Id: MANIFEST.in,v 1.3 2003/06/10 11:25:52 perky Exp $ -include README ROADMAP AUTHORS COPYRIGHT +include README ROADMAP AUTHORS COPYRIGHT THANKS include MANIFEST.in recursive-include src *.h *.c 1.1 cjkcodecs/THANKS Index: THANKS =================================================================== Kazuhiro ABE <abe...@ni...> advice on JIS X 0213 Yoshiki Ohshima <Yos...@ac...> advice on JIS X 0213 |