koco-cvs Mailing List for Python Korean Codecs (Page 10)
Brought to you by:
perky
You can subscribe to this list here.
2002 |
Jan
|
Feb
|
Mar
|
Apr
(88) |
May
(5) |
Jun
|
Jul
(27) |
Aug
|
Sep
|
Oct
(5) |
Nov
|
Dec
|
---|---|---|---|---|---|---|---|---|---|---|---|---|
2003 |
Jan
(77) |
Feb
(3) |
Mar
|
Apr
(22) |
May
(123) |
Jun
(80) |
Jul
(83) |
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
From: Hye-Shik C. <pe...@us...> - 2003-05-20 11:20:57
|
perky 03/05/20 04:20:56 Modified: src _cp949.c _euc_kr.c _gb18030.c _gb2312.c _gbk.c Added: src _big5.c _cp950.c Log: Add big5 and cp950 codec and Fix programming error (UNIINV -> NOCHAR) Revision Changes Path 1.5 +2 -2 cjkcodecs/src/_cp949.c Index: _cp949.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/_cp949.c,v retrieving revision 1.4 retrieving revision 1.5 diff -u -r1.4 -r1.5 --- _cp949.c 20 May 2003 10:08:47 -0000 1.4 +++ _cp949.c 20 May 2003 11:20:56 -0000 1.5 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: _cp949.c,v 1.4 2003/05/20 10:08:47 perky Exp $ + * $Id: _cp949.c,v 1.5 2003/05/20 11:20:56 perky Exp $ */ #include "codeccommon.h" @@ -57,7 +57,7 @@ map = &cp949encmap[c >> 8]; clow = c & 0xff; if (map->map == NULL || clow < map->bottom || clow > map->top || - (code = map->map[clow - map->bottom]) == UNIINV) + (code = map->map[clow - map->bottom]) == NOCHAR) return 1; (*outbuf)[0] = (code >> 8) | 0x80; 1.8 +2 -2 cjkcodecs/src/_euc_kr.c Index: _euc_kr.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/_euc_kr.c,v retrieving revision 1.7 retrieving revision 1.8 diff -u -r1.7 -r1.8 --- _euc_kr.c 20 May 2003 10:08:47 -0000 1.7 +++ _euc_kr.c 20 May 2003 11:20:56 -0000 1.8 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: _euc_kr.c,v 1.7 2003/05/20 10:08:47 perky Exp $ + * $Id: _euc_kr.c,v 1.8 2003/05/20 11:20:56 perky Exp $ */ #include "codeccommon.h" @@ -56,7 +56,7 @@ map = &cp949encmap[c >> 8]; clow = c & 0xff; if (map->map == NULL || clow < map->bottom || clow > map->top || - (code = map->map[clow - map->bottom]) == UNIINV) + (code = map->map[clow - map->bottom]) == NOCHAR) return 1; if (code & 0x8000) /* MSB set: CP949 */ return 1; 1.2 +3 -3 cjkcodecs/src/_gb18030.c Index: _gb18030.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/_gb18030.c,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- _gb18030.c 20 May 2003 10:59:08 -0000 1.1 +++ _gb18030.c 20 May 2003 11:20:56 -0000 1.2 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: _gb18030.c,v 1.1 2003/05/20 10:59:08 perky Exp $ + * $Id: _gb18030.c,v 1.2 2003/05/20 11:20:56 perky Exp $ */ #include "codeccommon.h" @@ -85,11 +85,11 @@ map = &gbcommonencmap[c >> 8]; clow = c & 0xff; if (map->map == NULL || clow < map->bottom || clow > map->top || - (code = map->map[clow - map->bottom]) == UNIINV) { + (code = map->map[clow - map->bottom]) == NOCHAR) { map = &gb18030extencmap[c >> 8]; clow = c & 0xff; if (map->map == NULL || clow < map->bottom || clow > map->top || - (code = map->map[clow - map->bottom]) == UNIINV) { + (code = map->map[clow - map->bottom]) == NOCHAR) { const struct _gb18030_to_unibmp_ranges *utrrange; if (outleft < 4) 1.3 +2 -2 cjkcodecs/src/_gb2312.c Index: _gb2312.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/_gb2312.c,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- _gb2312.c 20 May 2003 10:08:47 -0000 1.2 +++ _gb2312.c 20 May 2003 11:20:56 -0000 1.3 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: _gb2312.c,v 1.2 2003/05/20 10:08:47 perky Exp $ + * $Id: _gb2312.c,v 1.3 2003/05/20 11:20:56 perky Exp $ */ #include "codeccommon.h" @@ -56,7 +56,7 @@ map = &gbcommonencmap[c >> 8]; clow = c & 0xff; if (map->map == NULL || clow < map->bottom || clow > map->top || - (code = map->map[clow - map->bottom]) == UNIINV) + (code = map->map[clow - map->bottom]) == NOCHAR) return 1; if (code & 0x8000) /* MSB set: GBK */ return 1; 1.2 +2 -2 cjkcodecs/src/_gbk.c Index: _gbk.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/_gbk.c,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- _gbk.c 20 May 2003 10:59:08 -0000 1.1 +++ _gbk.c 20 May 2003 11:20:56 -0000 1.2 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: _gbk.c,v 1.1 2003/05/20 10:59:08 perky Exp $ + * $Id: _gbk.c,v 1.2 2003/05/20 11:20:56 perky Exp $ */ #include "codeccommon.h" @@ -61,7 +61,7 @@ map = &gbcommonencmap[c >> 8]; clow = c & 0xff; if (map->map == NULL || clow < map->bottom || clow > map->top || - (code = map->map[clow - map->bottom]) == UNIINV) + (code = map->map[clow - map->bottom]) == NOCHAR) return 1; } 1.1 cjkcodecs/src/_big5.c Index: _big5.c =================================================================== /* * _big5.c: the Big5 codec * * Copyright (C) 2003 Hye-Shik Chang <pe...@Fr...>. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * $Id: _big5.c,v 1.1 2003/05/20 11:20:56 perky Exp $ */ #include "codeccommon.h" ENCMAP(big5) DECMAP(big5) ENCODER(big5) { while (inleft > 0) { const encode_map *map; Py_UNICODE c = **inbuf, clow; DBCHAR code; if (c < 0x80) { if (outleft < 1) return MBERR_TOOSMALL; **outbuf = c; (*inbuf)++; inleft--; (*outbuf)++; outleft--; continue; } UCS4INVALID(c) if (outleft < 2) return MBERR_TOOSMALL; map = &big5encmap[c >> 8]; clow = c & 0xff; if (map->map == NULL || clow < map->bottom || clow > map->top || (code = map->map[clow - map->bottom]) == NOCHAR) return 1; (*outbuf)[0] = (code >> 8) | 0x80; (*outbuf)[1] = (code & 0xFF) | 0x80; (*outbuf) += 2; outleft -= 2; (*inbuf)++; inleft--; } return 0; } DECODER(big5) { while (inleft > 0) { const decode_map *map; unsigned char c = **inbuf, c2; Py_UNICODE code; if (outleft < 1) return MBERR_TOOSMALL; if (c < 0x80) { **outbuf = c; (*inbuf)++; inleft--; (*outbuf)++; outleft--; continue; } if (inleft < 2) return MBERR_TOOFEW; c2 = (*inbuf)[1]; map = &big5decmap[c]; if (map->map == NULL || c2 < map->bottom || c2 > map->top || (code = map->map[c2 - map->bottom]) == UNIINV) return 2; **outbuf = code; (*outbuf)++; outleft--; (*inbuf) += 2; inleft -= 2; } return 0; } CODECDEF(big5) NOMETHODS(__methods) void init_big5(void) { PyObject *codec; PyObject *m = NULL, *mod = NULL, *o = NULL; m = Py_InitModule("_big5", __methods); /* Import mapdata */ MAPOPEN(mod, "zh_TW") if (IMPORTMAP(mod, big5, &big5encmap, &big5decmap)) goto errorexit; MAPCLOSE(mod) /* Create Codec Instances */ MULTIBYTECODEC_OPEN(mod, o) REGISTERCODEC(m, o, codec) MULTIBYTECODEC_CLOSE(mod, o) if (PyErr_Occurred()) Py_FatalError("can't initialize the _big5 module"); return; errorexit: Py_XDECREF(m); Py_XDECREF(mod); Py_XDECREF(o); } /* * ex: ts=8 sts=4 et */ 1.1 cjkcodecs/src/_cp950.c Index: _cp950.c =================================================================== /* * _cp950.c: the CP950 codec * * Copyright (C) 2003 Hye-Shik Chang <pe...@Fr...>. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * $Id: _cp950.c,v 1.1 2003/05/20 11:20:56 perky Exp $ */ #include "codeccommon.h" ENCMAP(big5) ENCMAP(cp950ext) DECMAP(big5) DECMAP(cp950ext) ENCODER(cp950) { while (inleft > 0) { const encode_map *map; Py_UNICODE c = **inbuf, clow; DBCHAR code; if (c < 0x80) { if (outleft < 1) return MBERR_TOOSMALL; **outbuf = c; (*inbuf)++; inleft--; (*outbuf)++; outleft--; continue; } UCS4INVALID(c) if (outleft < 2) return MBERR_TOOSMALL; map = &cp950extencmap[c >> 8]; clow = c & 0xff; if (map->map == NULL || clow < map->bottom || clow > map->top || (code = map->map[clow - map->bottom]) == NOCHAR) { map = &big5encmap[c >> 8]; if (map->map == NULL || clow < map->bottom || clow > map->top || (code = map->map[clow - map->bottom]) == NOCHAR) return 1; } (*outbuf)[0] = (code >> 8) | 0x80; (*outbuf)[1] = (code & 0xFF); (*outbuf) += 2; outleft -= 2; (*inbuf)++; inleft--; } return 0; } DECODER(cp950) { while (inleft > 0) { const decode_map *map; unsigned char c = **inbuf, c2; Py_UNICODE code; if (outleft < 1) return MBERR_TOOSMALL; if (c < 0x80) { **outbuf = c; (*inbuf)++; inleft--; (*outbuf)++; outleft--; continue; } if (inleft < 2) return MBERR_TOOFEW; c2 = (*inbuf)[1]; map = &cp950extdecmap[c & 0x7f]; if (map->map == NULL || c2 < map->bottom || c2 > map->top || (code = map->map[c2 - map->bottom]) == UNIINV) { map = &big5decmap[c]; if (map->map == NULL || c2 < map->bottom || c2 > map->top || (code = map->map[c2 - map->bottom]) == UNIINV) return 2; } **outbuf = code; (*outbuf)++; outleft--; (*inbuf) += 2; inleft -= 2; } return 0; } CODECDEF(cp950) NOMETHODS(__methods) void init_cp950(void) { PyObject *codec; PyObject *m = NULL, *mod = NULL, *o = NULL; m = Py_InitModule("_cp950", __methods); /* Import mapdata */ MAPOPEN(mod, "zh_TW") if (IMPORTMAP(mod, big5, &big5encmap, &big5decmap) || IMPORTMAP(mod, cp950ext, &cp950extencmap, &cp950extdecmap)) goto errorexit; MAPCLOSE(mod) /* Create Codec Instances */ MULTIBYTECODEC_OPEN(mod, o) REGISTERCODEC(m, o, codec) MULTIBYTECODEC_CLOSE(mod, o) if (PyErr_Occurred()) Py_FatalError("can't initialize the _cp950 module"); return; errorexit: Py_XDECREF(m); Py_XDECREF(mod); Py_XDECREF(o); } /* * ex: ts=8 sts=4 et */ |
From: Hye-Shik C. <pe...@us...> - 2003-05-20 11:20:56
|
perky 03/05/20 04:20:56 Modified: . setup.py Log: Add big5 and cp950 codec and Fix programming error (UNIINV -> NOCHAR) Revision Changes Path 1.10 +2 -2 cjkcodecs/setup.py Index: setup.py =================================================================== RCS file: /cvsroot/koco/cjkcodecs/setup.py,v retrieving revision 1.9 retrieving revision 1.10 diff -u -r1.9 -r1.10 --- setup.py 20 May 2003 10:59:08 -0000 1.9 +++ setup.py 20 May 2003 11:20:56 -0000 1.10 @@ -27,7 +27,7 @@ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # -# $Id: setup.py,v 1.9 2003/05/20 10:59:08 perky Exp $ +# $Id: setup.py,v 1.10 2003/05/20 11:20:56 perky Exp $ # import sys @@ -39,7 +39,7 @@ 'ja_JP': [], # 'shift_jis', 'cp932', 'euc_jp' 'ko_KR': ['euc_kr', 'cp949'], 'zh_CN': ['gb2312', 'gbk', 'gb18030'], -'zh_TW': [], # 'big5', 'cp950' +'zh_TW': ['big5', 'cp950'] } locales = ['ja_JP', 'ko_KR', 'zh_CN', 'zh_TW'] |
From: Hye-Shik C. <pe...@us...> - 2003-05-20 10:59:09
|
perky 03/05/20 03:59:08 Modified: . setup.py Log: Add gb18030 and gbk codec. Revision Changes Path 1.9 +2 -2 cjkcodecs/setup.py Index: setup.py =================================================================== RCS file: /cvsroot/koco/cjkcodecs/setup.py,v retrieving revision 1.8 retrieving revision 1.9 diff -u -r1.8 -r1.9 --- setup.py 20 May 2003 08:32:15 -0000 1.8 +++ setup.py 20 May 2003 10:59:08 -0000 1.9 @@ -27,7 +27,7 @@ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # -# $Id: setup.py,v 1.8 2003/05/20 08:32:15 perky Exp $ +# $Id: setup.py,v 1.9 2003/05/20 10:59:08 perky Exp $ # import sys @@ -38,7 +38,7 @@ encodings = { 'ja_JP': [], # 'shift_jis', 'cp932', 'euc_jp' 'ko_KR': ['euc_kr', 'cp949'], -'zh_CN': ['gb2312'], # 'gbk', 'gb18030' +'zh_CN': ['gb2312', 'gbk', 'gb18030'], 'zh_TW': [], # 'big5', 'cp950' } locales = ['ja_JP', 'ko_KR', 'zh_CN', 'zh_TW'] |
From: Hye-Shik C. <pe...@us...> - 2003-05-20 10:08:48
|
perky 03/05/20 03:08:48 Modified: src _cp949.c _euc_kr.c _gb2312.c codeccommon.h Log: Add UCS4INVALID macro to avoid > 0xffff characters on ucs4 Revision Changes Path 1.4 +2 -1 cjkcodecs/src/_cp949.c Index: _cp949.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/_cp949.c,v retrieving revision 1.3 retrieving revision 1.4 diff -u -r1.3 -r1.4 --- _cp949.c 19 May 2003 23:12:28 -0000 1.3 +++ _cp949.c 20 May 2003 10:08:47 -0000 1.4 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: _cp949.c,v 1.3 2003/05/19 23:12:28 perky Exp $ + * $Id: _cp949.c,v 1.4 2003/05/20 10:08:47 perky Exp $ */ #include "codeccommon.h" @@ -50,6 +50,7 @@ (*outbuf)++; outleft--; continue; } + UCS4INVALID(c) if (outleft < 2) return MBERR_TOOSMALL; 1.7 +2 -1 cjkcodecs/src/_euc_kr.c Index: _euc_kr.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/_euc_kr.c,v retrieving revision 1.6 retrieving revision 1.7 diff -u -r1.6 -r1.7 --- _euc_kr.c 19 May 2003 23:34:54 -0000 1.6 +++ _euc_kr.c 20 May 2003 10:08:47 -0000 1.7 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: _euc_kr.c,v 1.6 2003/05/19 23:34:54 perky Exp $ + * $Id: _euc_kr.c,v 1.7 2003/05/20 10:08:47 perky Exp $ */ #include "codeccommon.h" @@ -49,6 +49,7 @@ (*outbuf)++; outleft--; continue; } + UCS4INVALID(c) if (outleft < 2) return MBERR_TOOSMALL; 1.2 +2 -1 cjkcodecs/src/_gb2312.c Index: _gb2312.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/_gb2312.c,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- _gb2312.c 20 May 2003 07:31:37 -0000 1.1 +++ _gb2312.c 20 May 2003 10:08:47 -0000 1.2 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: _gb2312.c,v 1.1 2003/05/20 07:31:37 perky Exp $ + * $Id: _gb2312.c,v 1.2 2003/05/20 10:08:47 perky Exp $ */ #include "codeccommon.h" @@ -49,6 +49,7 @@ (*outbuf)++; outleft--; continue; } + UCS4INVALID(c) if (outleft < 2) return MBERR_TOOSMALL; 1.5 +9 -1 cjkcodecs/src/codeccommon.h Index: codeccommon.h =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/codeccommon.h,v retrieving revision 1.4 retrieving revision 1.5 diff -u -r1.4 -r1.5 --- codeccommon.h 20 May 2003 04:11:44 -0000 1.4 +++ codeccommon.h 20 May 2003 10:08:47 -0000 1.5 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: codeccommon.h,v 1.4 2003/05/20 04:11:44 perky Exp $ + * $Id: codeccommon.h,v 1.5 2003/05/20 10:08:47 perky Exp $ */ #include "Python.h" @@ -79,6 +79,14 @@ if (codec == NULL) \ goto errorexit; \ PyModule_AddObject(m, "codec", codec); + +#if Py_UNICODE_SIZE == 4 +#define UCS4INVALID(code) \ + if ((code) > 0xFFFF) \ + return 1; +#else +#define UCS4INVALID(code) +#endif static int importmap(PyObject *mod, const char *symbol, |
From: Hye-Shik C. <pe...@us...> - 2003-05-20 08:32:17
|
perky 03/05/20 01:32:16 Added: cjkcodecs aliases.py Log: Add aliases Revision Changes Path 1.1 cjkcodecs/cjkcodecs/aliases.py Index: aliases.py =================================================================== # # aliases.py: Encoding Name Aliases for CJK Codecs # # Copyright (C) 2003 Hye-Shik Chang <pe...@Fr...>. # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # # $Id: aliases.py,v 1.1 2003/05/20 08:32:16 perky Exp $ # from encodings.aliases import aliases aliases.update({ # big5 codec 'big5' : 'cjkcodecs.big5', 'csbig5' : 'cjkcodecs.big5', # cp932 codec '932' : 'cjkcodecs.cp932', 'cp932' : 'cjkcodecs.cp932', 'ms932' : 'cjkcodecs.cp932', 'mskanji' : 'cjkcodecs.cp932', 'ms_kanji' : 'cjkcodecs.cp932', # cp949 codec '949' : 'cjkcodecs.cp949', 'cp949' : 'cjkcodecs.cp949', 'ms949' : 'cjkcodecs.cp949', 'uhc' : 'cjkcodecs.cp949', # cp950 codec '950' : 'cjkcodecs.cp950', 'cp950' : 'cjkcodecs.cp950', 'ms950' : 'cjkcodecs.cp950', # euc_jp codec 'eucjp' : 'cjkcodecs.euc_jp', 'euc_jp' : 'cjkcodecs.euc_jp', 'ujis' : 'cjkcodecs.euc_jp', 'u-jis' : 'cjkcodecs.euc_jp', # euc_kr codec 'euckr' : 'cjkcodecs.euc_kr', 'euc_kr' : 'cjkcodecs.euc_kr', 'korean' : 'cjkcodecs.euc_kr', 'ksc5601' : 'cjkcodecs.euc_kr', 'ks_c_5601' : 'cjkcodecs.euc_kr', 'ks_c_5601_1987' : 'cjkcodecs.euc_kr', 'ksx1001' : 'cjkcodecs.euc_kr', 'ks_x_1001' : 'cjkcodecs.euc_kr', # gb18030 codec 'gb18030' : 'cjkcodecs.gb18030', 'gb18030_2000' : 'cjkcodecs.gb18030', # gb2312 codec 'euccn' : 'cjkcodecs.gb2312', 'euc_cn' : 'cjkcodecs.gb2312', 'chinese' : 'cjkcodecs.gb2312', 'gb2312' : 'cjkcodecs.gb2312', 'gb2312_80' : 'cjkcodecs.gb2312', 'gb2312_1980' : 'cjkcodecs.gb2312', 'iso_ir_58' : 'cjkcodecs.gb2312', 'csISO58gb231280' : 'cjkcodecs.gb2312', # gbk codec '936' : 'cjkcodecs.gbk', 'cp936' : 'cjkcodecs.gbk', 'gbk' : 'cjkcodecs.gbk', 'ms936' : 'cjkcodecs.gbk', # shift_jis codec 'csshiftjis' : 'cjkcodecs.shift_jis', 'shiftjis' : 'cjkcodecs.shift_jis', 'shift_jis' : 'cjkcodecs.shift_jis', 'sjis' : 'cjkcodecs.shift_jis', 's_jis' : 'cjkcodecs.shift_jis', }) del aliases # ex: ts=8 sts=4 et |
From: Hye-Shik C. <pe...@us...> - 2003-05-20 08:32:16
|
perky 03/05/20 01:32:15 Modified: . setup.py Log: Add aliases Revision Changes Path 1.8 +19 -1 cjkcodecs/setup.py Index: setup.py =================================================================== RCS file: /cvsroot/koco/cjkcodecs/setup.py,v retrieving revision 1.7 retrieving revision 1.8 diff -u -r1.7 -r1.8 --- setup.py 20 May 2003 07:32:32 -0000 1.7 +++ setup.py 20 May 2003 08:32:15 -0000 1.8 @@ -27,11 +27,12 @@ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # -# $Id: setup.py,v 1.7 2003/05/20 07:32:32 perky Exp $ +# $Id: setup.py,v 1.8 2003/05/20 08:32:15 perky Exp $ # import sys from distutils.core import setup, Extension +from distutils.command.install import install extensions = [] encodings = { @@ -72,10 +73,27 @@ for enc in encodings[loc]: extensions.append(Extension('cjkcodecs._'+enc, ['src/_%s.c'%enc])) +class Install(install): + def initialize_options (self): + install.initialize_options(self) + if sys.hexversion >= 0x2010000: + self.extra_path = ("cjkcodecs", "import cjkcodecs.aliases") + else: + self.extra_path = "cjkcodecs" + def finalize_options (self): + org_install_lib = self.install_lib + install.finalize_options(self) + self.install_libbase = self.install_lib = \ + org_install_lib or self.install_purelib + setup (name = "cjkcodecs", version = "1.0", + description = "Python Unicode Codecs Collection for CJK Encodings", author = "Hye-Shik Chang", author_email = "pe...@Fr...", + url = "http://sourceforge.net/projects/koco", + cmdclass = {'install': Install}, + packages = ['cjkcodecs'], ext_modules = [Extension("cjkcodecs.multibytecodec", ["src/multibytecodec.c"])] + extensions |
From: Hye-Shik C. <pe...@us...> - 2003-05-20 08:14:19
|
perky 03/05/20 01:14:17 Removed: cjkcodecs cp936.py Log: Remove old codecs |
From: Hye-Shik C. <pe...@us...> - 2003-05-20 08:12:38
|
perky 03/05/20 01:12:37 Modified: cjkcodecs big5.py cp932.py cp949.py cp950.py euc_jp.py euc_kr.py gb18030.py gb2312.py Added: cjkcodecs Makefile gbk.py shift_jis.py xxcodec.py.in Removed: cjkcodecs shiftjis.py Log: Move to new style codec files. Revision Changes Path 1.2 +13 -7 cjkcodecs/cjkcodecs/big5.py Index: big5.py =================================================================== RCS file: /cvsroot/koco/cjkcodecs/cjkcodecs/big5.py,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- big5.py 20 Apr 2003 17:23:45 -0000 1.1 +++ big5.py 20 May 2003 08:12:36 -0000 1.2 @@ -1,5 +1,6 @@ +# ACHTUNG: This is generated file automatically. Please do not edit. # -# big5.py: Python Unicode Codec for Big5 +# big5.py: Python Unicode Codec for BIG5 # # Copyright (C) 2003 Hye-Shik Chang <pe...@Fr...>. # All rights reserved. @@ -26,27 +27,32 @@ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # -# $Id: big5.py,v 1.1 2003/04/20 17:23:45 perky Exp $ +# $Id: big5.py,v 1.2 2003/05/20 08:12:36 perky Exp $ # -from _zh_TW_codecs import big5_encode, big5_decode +from cjkcodecs._big5 import codec import codecs class Codec(codecs.Codec): - encode = big5_encode - decode = big5_decode + encode = codec.encode + decode = codec.decode class StreamReader(Codec, codecs.StreamReader): def __init__(self, stream, errors='strict'): codecs.StreamReader.__init__(self, stream, errors) - __codec = big5_decode.makestream(stream, errors) + __codec = codec.StreamReader(stream, errors) self.read = __codec.read self.readline = __codec.readline self.readlines = __codec.readlines self.reset = __codec.reset class StreamWriter(Codec, codecs.StreamWriter): - pass + def __init__(self, stream, errors='strict'): + codecs.StreamWriter.__init__(self, stream, errors) + __codec = codec.StreamWriter(stream, errors) + self.write = __codec.write + self.writelines = __codec.writelines + self.reset = __codec.reset def getregentry(): return (Codec().encode,Codec().decode,StreamReader,StreamWriter) 1.2 +13 -7 cjkcodecs/cjkcodecs/cp932.py Index: cp932.py =================================================================== RCS file: /cvsroot/koco/cjkcodecs/cjkcodecs/cp932.py,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- cp932.py 20 Apr 2003 17:23:45 -0000 1.1 +++ cp932.py 20 May 2003 08:12:36 -0000 1.2 @@ -1,5 +1,6 @@ +# ACHTUNG: This is generated file automatically. Please do not edit. # -# cp932.py: Python Unicode Codec for cp932 +# cp932.py: Python Unicode Codec for CP932 # # Copyright (C) 2003 Hye-Shik Chang <pe...@Fr...>. # All rights reserved. @@ -26,27 +27,32 @@ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # -# $Id: cp932.py,v 1.1 2003/04/20 17:23:45 perky Exp $ +# $Id: cp932.py,v 1.2 2003/05/20 08:12:36 perky Exp $ # -from _ja_codecs import cp932_encode, cp932_decode +from cjkcodecs._cp932 import codec import codecs class Codec(codecs.Codec): - encode = cp932_encode - decode = cp932_decode + encode = codec.encode + decode = codec.decode class StreamReader(Codec, codecs.StreamReader): def __init__(self, stream, errors='strict'): codecs.StreamReader.__init__(self, stream, errors) - __codec = cp932_decode.makestream(stream, errors) + __codec = codec.StreamReader(stream, errors) self.read = __codec.read self.readline = __codec.readline self.readlines = __codec.readlines self.reset = __codec.reset class StreamWriter(Codec, codecs.StreamWriter): - pass + def __init__(self, stream, errors='strict'): + codecs.StreamWriter.__init__(self, stream, errors) + __codec = codec.StreamWriter(stream, errors) + self.write = __codec.write + self.writelines = __codec.writelines + self.reset = __codec.reset def getregentry(): return (Codec().encode,Codec().decode,StreamReader,StreamWriter) 1.2 +13 -7 cjkcodecs/cjkcodecs/cp949.py Index: cp949.py =================================================================== RCS file: /cvsroot/koco/cjkcodecs/cjkcodecs/cp949.py,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- cp949.py 20 Apr 2003 17:23:45 -0000 1.1 +++ cp949.py 20 May 2003 08:12:36 -0000 1.2 @@ -1,5 +1,6 @@ +# ACHTUNG: This is generated file automatically. Please do not edit. # -# cp949.py: Python Unicode Codec for cp949 +# cp949.py: Python Unicode Codec for CP949 # # Copyright (C) 2003 Hye-Shik Chang <pe...@Fr...>. # All rights reserved. @@ -26,27 +27,32 @@ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # -# $Id: cp949.py,v 1.1 2003/04/20 17:23:45 perky Exp $ +# $Id: cp949.py,v 1.2 2003/05/20 08:12:36 perky Exp $ # -from _ko_codecs import cp949_encode, cp949_decode +from cjkcodecs._cp949 import codec import codecs class Codec(codecs.Codec): - encode = cp949_encode - decode = cp949_decode + encode = codec.encode + decode = codec.decode class StreamReader(Codec, codecs.StreamReader): def __init__(self, stream, errors='strict'): codecs.StreamReader.__init__(self, stream, errors) - __codec = cp949_decode.makestream(stream, errors) + __codec = codec.StreamReader(stream, errors) self.read = __codec.read self.readline = __codec.readline self.readlines = __codec.readlines self.reset = __codec.reset class StreamWriter(Codec, codecs.StreamWriter): - pass + def __init__(self, stream, errors='strict'): + codecs.StreamWriter.__init__(self, stream, errors) + __codec = codec.StreamWriter(stream, errors) + self.write = __codec.write + self.writelines = __codec.writelines + self.reset = __codec.reset def getregentry(): return (Codec().encode,Codec().decode,StreamReader,StreamWriter) 1.2 +13 -7 cjkcodecs/cjkcodecs/cp950.py Index: cp950.py =================================================================== RCS file: /cvsroot/koco/cjkcodecs/cjkcodecs/cp950.py,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- cp950.py 20 Apr 2003 17:23:45 -0000 1.1 +++ cp950.py 20 May 2003 08:12:36 -0000 1.2 @@ -1,5 +1,6 @@ +# ACHTUNG: This is generated file automatically. Please do not edit. # -# cp950.py: Python Unicode Codec for cp950 +# cp950.py: Python Unicode Codec for CP950 # # Copyright (C) 2003 Hye-Shik Chang <pe...@Fr...>. # All rights reserved. @@ -26,27 +27,32 @@ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # -# $Id: cp950.py,v 1.1 2003/04/20 17:23:45 perky Exp $ +# $Id: cp950.py,v 1.2 2003/05/20 08:12:36 perky Exp $ # -from _zh_TW_codecs import cp950_encode, cp950_decode +from cjkcodecs._cp950 import codec import codecs class Codec(codecs.Codec): - encode = cp950_encode - decode = cp950_decode + encode = codec.encode + decode = codec.decode class StreamReader(Codec, codecs.StreamReader): def __init__(self, stream, errors='strict'): codecs.StreamReader.__init__(self, stream, errors) - __codec = cp950_decode.makestream(stream, errors) + __codec = codec.StreamReader(stream, errors) self.read = __codec.read self.readline = __codec.readline self.readlines = __codec.readlines self.reset = __codec.reset class StreamWriter(Codec, codecs.StreamWriter): - pass + def __init__(self, stream, errors='strict'): + codecs.StreamWriter.__init__(self, stream, errors) + __codec = codec.StreamWriter(stream, errors) + self.write = __codec.write + self.writelines = __codec.writelines + self.reset = __codec.reset def getregentry(): return (Codec().encode,Codec().decode,StreamReader,StreamWriter) 1.2 +13 -7 cjkcodecs/cjkcodecs/euc_jp.py Index: euc_jp.py =================================================================== RCS file: /cvsroot/koco/cjkcodecs/cjkcodecs/euc_jp.py,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- euc_jp.py 20 Apr 2003 17:23:45 -0000 1.1 +++ euc_jp.py 20 May 2003 08:12:36 -0000 1.2 @@ -1,5 +1,6 @@ +# ACHTUNG: This is generated file automatically. Please do not edit. # -# euc_jp.py: Python Unicode Codec for EUC-JP +# euc_jp.py: Python Unicode Codec for EUC_JP # # Copyright (C) 2003 Hye-Shik Chang <pe...@Fr...>. # All rights reserved. @@ -26,27 +27,32 @@ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # -# $Id: euc_jp.py,v 1.1 2003/04/20 17:23:45 perky Exp $ +# $Id: euc_jp.py,v 1.2 2003/05/20 08:12:36 perky Exp $ # -from _ja_codecs import euc_jp_encode, euc_jp_decode +from cjkcodecs._euc_jp import codec import codecs class Codec(codecs.Codec): - encode = euc_jp_encode - decode = euc_jp_decode + encode = codec.encode + decode = codec.decode class StreamReader(Codec, codecs.StreamReader): def __init__(self, stream, errors='strict'): codecs.StreamReader.__init__(self, stream, errors) - __codec = euc_jp_decode.makestream(stream, errors) + __codec = codec.StreamReader(stream, errors) self.read = __codec.read self.readline = __codec.readline self.readlines = __codec.readlines self.reset = __codec.reset class StreamWriter(Codec, codecs.StreamWriter): - pass + def __init__(self, stream, errors='strict'): + codecs.StreamWriter.__init__(self, stream, errors) + __codec = codec.StreamWriter(stream, errors) + self.write = __codec.write + self.writelines = __codec.writelines + self.reset = __codec.reset def getregentry(): return (Codec().encode,Codec().decode,StreamReader,StreamWriter) 1.2 +13 -7 cjkcodecs/cjkcodecs/euc_kr.py Index: euc_kr.py =================================================================== RCS file: /cvsroot/koco/cjkcodecs/cjkcodecs/euc_kr.py,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- euc_kr.py 20 Apr 2003 17:23:46 -0000 1.1 +++ euc_kr.py 20 May 2003 08:12:36 -0000 1.2 @@ -1,5 +1,6 @@ +# ACHTUNG: This is generated file automatically. Please do not edit. # -# euc_kr.py: Python Unicode Codec for EUC-KR +# euc_kr.py: Python Unicode Codec for EUC_KR # # Copyright (C) 2003 Hye-Shik Chang <pe...@Fr...>. # All rights reserved. @@ -26,27 +27,32 @@ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # -# $Id: euc_kr.py,v 1.1 2003/04/20 17:23:46 perky Exp $ +# $Id: euc_kr.py,v 1.2 2003/05/20 08:12:36 perky Exp $ # -from _ko_codecs import euc_kr_encode, euc_kr_decode +from cjkcodecs._euc_kr import codec import codecs class Codec(codecs.Codec): - encode = euc_kr_encode - decode = euc_kr_decode + encode = codec.encode + decode = codec.decode class StreamReader(Codec, codecs.StreamReader): def __init__(self, stream, errors='strict'): codecs.StreamReader.__init__(self, stream, errors) - __codec = euc_kr_decode.makestream(stream, errors) + __codec = codec.StreamReader(stream, errors) self.read = __codec.read self.readline = __codec.readline self.readlines = __codec.readlines self.reset = __codec.reset class StreamWriter(Codec, codecs.StreamWriter): - pass + def __init__(self, stream, errors='strict'): + codecs.StreamWriter.__init__(self, stream, errors) + __codec = codec.StreamWriter(stream, errors) + self.write = __codec.write + self.writelines = __codec.writelines + self.reset = __codec.reset def getregentry(): return (Codec().encode,Codec().decode,StreamReader,StreamWriter) 1.2 +12 -6 cjkcodecs/cjkcodecs/gb18030.py Index: gb18030.py =================================================================== RCS file: /cvsroot/koco/cjkcodecs/cjkcodecs/gb18030.py,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- gb18030.py 20 Apr 2003 17:23:46 -0000 1.1 +++ gb18030.py 20 May 2003 08:12:36 -0000 1.2 @@ -1,3 +1,4 @@ +# ACHTUNG: This is generated file automatically. Please do not edit. # # gb18030.py: Python Unicode Codec for GB18030 # @@ -26,27 +27,32 @@ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # -# $Id: gb18030.py,v 1.1 2003/04/20 17:23:46 perky Exp $ +# $Id: gb18030.py,v 1.2 2003/05/20 08:12:36 perky Exp $ # -from _zh_CN_codecs import gb18030_encode, gb18030_decode +from cjkcodecs._gb18030 import codec import codecs class Codec(codecs.Codec): - encode = gb18030_encode - decode = gb18030_decode + encode = codec.encode + decode = codec.decode class StreamReader(Codec, codecs.StreamReader): def __init__(self, stream, errors='strict'): codecs.StreamReader.__init__(self, stream, errors) - __codec = gb18030_decode.makestream(stream, errors) + __codec = codec.StreamReader(stream, errors) self.read = __codec.read self.readline = __codec.readline self.readlines = __codec.readlines self.reset = __codec.reset class StreamWriter(Codec, codecs.StreamWriter): - pass + def __init__(self, stream, errors='strict'): + codecs.StreamWriter.__init__(self, stream, errors) + __codec = codec.StreamWriter(stream, errors) + self.write = __codec.write + self.writelines = __codec.writelines + self.reset = __codec.reset def getregentry(): return (Codec().encode,Codec().decode,StreamReader,StreamWriter) 1.2 +12 -6 cjkcodecs/cjkcodecs/gb2312.py Index: gb2312.py =================================================================== RCS file: /cvsroot/koco/cjkcodecs/cjkcodecs/gb2312.py,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- gb2312.py 20 Apr 2003 17:23:46 -0000 1.1 +++ gb2312.py 20 May 2003 08:12:36 -0000 1.2 @@ -1,3 +1,4 @@ +# ACHTUNG: This is generated file automatically. Please do not edit. # # gb2312.py: Python Unicode Codec for GB2312 # @@ -26,27 +27,32 @@ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # -# $Id: gb2312.py,v 1.1 2003/04/20 17:23:46 perky Exp $ +# $Id: gb2312.py,v 1.2 2003/05/20 08:12:36 perky Exp $ # -from _zh_CN_codecs import gb2312_encode, gb2312_decode +from cjkcodecs._gb2312 import codec import codecs class Codec(codecs.Codec): - encode = gb2312_encode - decode = gb2312_decode + encode = codec.encode + decode = codec.decode class StreamReader(Codec, codecs.StreamReader): def __init__(self, stream, errors='strict'): codecs.StreamReader.__init__(self, stream, errors) - __codec = gb2312_decode.makestream(stream, errors) + __codec = codec.StreamReader(stream, errors) self.read = __codec.read self.readline = __codec.readline self.readlines = __codec.readlines self.reset = __codec.reset class StreamWriter(Codec, codecs.StreamWriter): - pass + def __init__(self, stream, errors='strict'): + codecs.StreamWriter.__init__(self, stream, errors) + __codec = codec.StreamWriter(stream, errors) + self.write = __codec.write + self.writelines = __codec.writelines + self.reset = __codec.reset def getregentry(): return (Codec().encode,Codec().decode,StreamReader,StreamWriter) 1.1 cjkcodecs/cjkcodecs/Makefile Index: Makefile =================================================================== # Makefile: maintenance tool set # # Copyright (C) 2003 Hye-Shik Chang <pe...@Fr...>. # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # # $Id: Makefile,v 1.1 2003/05/20 08:12:36 perky Exp $ # GENERIC_ENCODINGS= gb2312 gbk gb18030 \ big5 cp950 \ cp932 shift_jis euc_jp \ cp949 euc_kr all: for cset in ${GENERIC_ENCODINGS}; do \ CSET=`echo $$cset|tr "[:lower:]" "[:upper:]"`; \ sed -e "s/%%ENCODING%%/$$CSET/g" \ -e "s/%%encoding%%/$$cset/g" \ -e "s/%%__%%/ACHTUNG: This is generated file automatically.\ Please do not edit./g" xxcodec.py.in \ > $$cset.py; \ done 1.1 cjkcodecs/cjkcodecs/gbk.py Index: gbk.py =================================================================== # ACHTUNG: This is generated file automatically. Please do not edit. # # gbk.py: Python Unicode Codec for GBK # # Copyright (C) 2003 Hye-Shik Chang <pe...@Fr...>. # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # # $Id: gbk.py,v 1.1 2003/05/20 08:12:36 perky Exp $ # from cjkcodecs._gbk import codec import codecs class Codec(codecs.Codec): encode = codec.encode decode = codec.decode class StreamReader(Codec, codecs.StreamReader): def __init__(self, stream, errors='strict'): codecs.StreamReader.__init__(self, stream, errors) __codec = codec.StreamReader(stream, errors) self.read = __codec.read self.readline = __codec.readline self.readlines = __codec.readlines self.reset = __codec.reset class StreamWriter(Codec, codecs.StreamWriter): def __init__(self, stream, errors='strict'): codecs.StreamWriter.__init__(self, stream, errors) __codec = codec.StreamWriter(stream, errors) self.write = __codec.write self.writelines = __codec.writelines self.reset = __codec.reset def getregentry(): return (Codec().encode,Codec().decode,StreamReader,StreamWriter) # ex: ts=8 sts=4 et 1.1 cjkcodecs/cjkcodecs/shift_jis.py Index: shift_jis.py =================================================================== # ACHTUNG: This is generated file automatically. Please do not edit. # # shift_jis.py: Python Unicode Codec for SHIFT_JIS # # Copyright (C) 2003 Hye-Shik Chang <pe...@Fr...>. # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # # $Id: shift_jis.py,v 1.1 2003/05/20 08:12:36 perky Exp $ # from cjkcodecs._shift_jis import codec import codecs class Codec(codecs.Codec): encode = codec.encode decode = codec.decode class StreamReader(Codec, codecs.StreamReader): def __init__(self, stream, errors='strict'): codecs.StreamReader.__init__(self, stream, errors) __codec = codec.StreamReader(stream, errors) self.read = __codec.read self.readline = __codec.readline self.readlines = __codec.readlines self.reset = __codec.reset class StreamWriter(Codec, codecs.StreamWriter): def __init__(self, stream, errors='strict'): codecs.StreamWriter.__init__(self, stream, errors) __codec = codec.StreamWriter(stream, errors) self.write = __codec.write self.writelines = __codec.writelines self.reset = __codec.reset def getregentry(): return (Codec().encode,Codec().decode,StreamReader,StreamWriter) # ex: ts=8 sts=4 et 1.1 cjkcodecs/cjkcodecs/xxcodec.py.in Index: xxcodec.py.in =================================================================== # %%__%% # # %%encoding%%.py: Python Unicode Codec for %%ENCODING%% # # Copyright (C) 2003 Hye-Shik Chang <pe...@Fr...>. # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # # $Id: xxcodec.py.in,v 1.1 2003/05/20 08:12:36 perky Exp $ # from cjkcodecs._%%encoding%% import codec import codecs class Codec(codecs.Codec): encode = codec.encode decode = codec.decode class StreamReader(Codec, codecs.StreamReader): def __init__(self, stream, errors='strict'): codecs.StreamReader.__init__(self, stream, errors) __codec = codec.StreamReader(stream, errors) self.read = __codec.read self.readline = __codec.readline self.readlines = __codec.readlines self.reset = __codec.reset class StreamWriter(Codec, codecs.StreamWriter): def __init__(self, stream, errors='strict'): codecs.StreamWriter.__init__(self, stream, errors) __codec = codec.StreamWriter(stream, errors) self.write = __codec.write self.writelines = __codec.writelines self.reset = __codec.reset def getregentry(): return (Codec().encode,Codec().decode,StreamReader,StreamWriter) # ex: ts=8 sts=4 et |
From: Hye-Shik C. <pe...@us...> - 2003-05-20 07:32:33
|
perky 03/05/20 00:32:32 Modified: . setup.py Log: Enable select codecs which do not want to install. Revision Changes Path 1.7 +42 -9 cjkcodecs/setup.py Index: setup.py =================================================================== RCS file: /cvsroot/koco/cjkcodecs/setup.py,v retrieving revision 1.6 retrieving revision 1.7 diff -u -r1.6 -r1.7 --- setup.py 19 May 2003 10:38:07 -0000 1.6 +++ setup.py 20 May 2003 07:32:32 -0000 1.7 @@ -27,25 +27,58 @@ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # -# $Id: setup.py,v 1.6 2003/05/19 10:38:07 perky Exp $ +# $Id: setup.py,v 1.7 2003/05/20 07:32:32 perky Exp $ # import sys from distutils.core import setup, Extension +extensions = [] +encodings = { +'ja_JP': [], # 'shift_jis', 'cp932', 'euc_jp' +'ko_KR': ['euc_kr', 'cp949'], +'zh_CN': ['gb2312'], # 'gbk', 'gb18030' +'zh_TW': [], # 'big5', 'cp950' +} +locales = ['ja_JP', 'ko_KR', 'zh_CN', 'zh_TW'] + +for arg in sys.argv[1:]: # don't use getopt to ignore arguments for distutils + args = arg.split('=', 1) + if args[0] == '--disable-japanese': + locales.remove('ja_JP') + elif args[0] == '--disable-korean': + locales.remove('ko_KR') + elif args[0] == '--disable-simplified-chinese': + locales.remove('zh_CN') + elif args[0] == '--disable-traditional-chinese': + locales.remove('zh_TW') + elif args[0] == '--help': + print """\ +Language options: + --disable-japanese don't install Japanese codecs + --disable-korean don't install Korean codecs + --disable-simplified-chinese don't install Simplified Chinese codecs + --disable-traditional-chinese don't install Traditional Chinese codecs +""" + continue + else: + continue + + sys.argv.remove(arg) + +for loc in locales: + extensions.append(Extension('cjkcodecs.mapdata_'+loc, + ['src/maps/mapdata_%s.c'%loc])) + for enc in encodings[loc]: + extensions.append(Extension('cjkcodecs._'+enc, ['src/_%s.c'%enc])) + setup (name = "cjkcodecs", version = "1.0", author = "Hye-Shik Chang", author_email = "pe...@Fr...", ext_modules = - [Extension("cjkcodecs.mapdata_ja_JP", ["src/maps/mapdata_ja_JP.c"]), - Extension("cjkcodecs.mapdata_ko_KR", ["src/maps/mapdata_ko_KR.c"]), - Extension("cjkcodecs.mapdata_zh_CN", ["src/maps/mapdata_zh_CN.c"]), - Extension("cjkcodecs.mapdata_zh_TW", ["src/maps/mapdata_zh_TW.c"]), - Extension("cjkcodecs.multibytecodec", ["src/multibytecodec.c"]), - Extension("cjkcodecs._euc_kr", ["src/_euc_kr.c"]), - Extension("cjkcodecs._cp949", ["src/_cp949.c"]), - ], + [Extension("cjkcodecs.multibytecodec", ["src/multibytecodec.c"])] + + extensions ) # ex: ts=8 sts=4 et |
From: Hye-Shik C. <pe...@us...> - 2003-05-20 07:31:40
|
perky 03/05/20 00:31:37 Added: src _gb2312.c Log: Add gb2312 codec implementation. Revision Changes Path 1.1 cjkcodecs/src/_gb2312.c Index: _gb2312.c =================================================================== /* * _gb2312.c: the GB2312 codec * * Copyright (C) 2003 Hye-Shik Chang <pe...@Fr...>. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * $Id: _gb2312.c,v 1.1 2003/05/20 07:31:37 perky Exp $ */ #include "codeccommon.h" ENCMAP(gbcommon) DECMAP(gb2312) ENCODER(gb2312) { while (inleft > 0) { const encode_map *map; Py_UNICODE c = **inbuf, clow; DBCHAR code; if (c < 0x80) { if (outleft < 1) return MBERR_TOOSMALL; **outbuf = c; (*inbuf)++; inleft--; (*outbuf)++; outleft--; continue; } if (outleft < 2) return MBERR_TOOSMALL; map = &gbcommonencmap[c >> 8]; clow = c & 0xff; if (map->map == NULL || clow < map->bottom || clow > map->top || (code = map->map[clow - map->bottom]) == UNIINV) return 1; if (code & 0x8000) /* MSB set: GBK */ return 1; (*outbuf)[0] = (code >> 8) | 0x80; (*outbuf)[1] = (code & 0xFF) | 0x80; (*outbuf) += 2; outleft -= 2; (*inbuf)++; inleft--; } return 0; } DECODER(gb2312) { while (inleft > 0) { const decode_map *map; unsigned char c = **inbuf, c2; Py_UNICODE code; if (outleft < 1) return MBERR_TOOSMALL; if (c < 0x80) { **outbuf = c; (*inbuf)++; inleft--; (*outbuf)++; outleft--; continue; } if (inleft < 2) return MBERR_TOOFEW; c2 = (*inbuf)[1] ^ 0x80; map = &gb2312decmap[c & 0x7f]; if (map->map == NULL || c2 < map->bottom || c2 > map->top || (code = map->map[c2 - map->bottom]) == UNIINV) return 2; **outbuf = code; (*outbuf)++; outleft--; (*inbuf) += 2; inleft -= 2; } return 0; } CODECDEF(gb2312) NOMETHODS(__methods) void init_gb2312(void) { PyObject *codec; PyObject *m = NULL, *mod = NULL, *o = NULL; m = Py_InitModule("_gb2312", __methods); /* Import mapdata */ MAPOPEN(mod, "zh_CN") if (IMPORTMAP(mod, gb2312, NULL, &gb2312decmap) || IMPORTMAP(mod, gbcommon, &gbcommonencmap, NULL)) goto errorexit; MAPCLOSE(mod) /* Create Codec Instances */ MULTIBYTECODEC_OPEN(mod, o) REGISTERCODEC(m, o, codec) MULTIBYTECODEC_CLOSE(mod, o) if (PyErr_Occurred()) Py_FatalError("can't initialize the _gb2312 module"); return; errorexit: Py_XDECREF(m); Py_XDECREF(mod); Py_XDECREF(o); } /* * ex: ts=8 sts=4 et */ |
From: Hye-Shik C. <pe...@us...> - 2003-05-20 07:31:20
|
perky 03/05/20 00:31:19 Modified: src/maps mapdata_zh_CN.c Log: Fix typo. Revision Changes Path 1.3 +2 -2 cjkcodecs/src/maps/mapdata_zh_CN.c Index: mapdata_zh_CN.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/maps/mapdata_zh_CN.c,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- mapdata_zh_CN.c 18 May 2003 15:58:09 -0000 1.2 +++ mapdata_zh_CN.c 20 May 2003 07:31:19 -0000 1.3 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: mapdata_zh_CN.c,v 1.2 2003/05/18 15:58:09 perky Exp $ + * $Id: mapdata_zh_CN.c,v 1.3 2003/05/20 07:31:19 perky Exp $ */ #include "Python.h" @@ -54,7 +54,7 @@ struct dbcs_map *h; PyObject *m; - m = Py_InitModule("mapdata_zn_CN", __methods); + m = Py_InitModule("mapdata_zh_CN", __methods); for (h = mapholders; h->charset[0] != '\0'; h++) { char mhname[256] = "__map_"; |
From: Hye-Shik C. <pe...@us...> - 2003-05-20 06:46:56
|
perky 03/05/19 23:46:56 Modified: src/maps mapdata_ja_JP.c mapdata_ko_KR.c mapdata_zh_TW.c Log: Remove trailing white spaces Revision Changes Path 1.3 +2 -2 cjkcodecs/src/maps/mapdata_ja_JP.c Index: mapdata_ja_JP.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/maps/mapdata_ja_JP.c,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- mapdata_ja_JP.c 18 May 2003 15:58:09 -0000 1.2 +++ mapdata_ja_JP.c 20 May 2003 06:46:55 -0000 1.3 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: mapdata_ja_JP.c,v 1.2 2003/05/18 15:58:09 perky Exp $ + * $Id: mapdata_ja_JP.c,v 1.3 2003/05/20 06:46:55 perky Exp $ */ #include "Python.h" @@ -60,7 +60,7 @@ char mhname[256] = "__map_"; strcpy(mhname + sizeof("__map_") - 1, h->charset); - PyModule_AddObject(m, mhname, PyCObject_FromVoidPtr(h, NULL)); + PyModule_AddObject(m, mhname, PyCObject_FromVoidPtr(h, NULL)); } if (PyErr_Occurred()) 1.4 +2 -2 cjkcodecs/src/maps/mapdata_ko_KR.c Index: mapdata_ko_KR.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/maps/mapdata_ko_KR.c,v retrieving revision 1.3 retrieving revision 1.4 diff -u -r1.3 -r1.4 --- mapdata_ko_KR.c 19 May 2003 06:05:58 -0000 1.3 +++ mapdata_ko_KR.c 20 May 2003 06:46:55 -0000 1.4 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: mapdata_ko_KR.c,v 1.3 2003/05/19 06:05:58 perky Exp $ + * $Id: mapdata_ko_KR.c,v 1.4 2003/05/20 06:46:55 perky Exp $ */ #include "Python.h" @@ -58,7 +58,7 @@ char mhname[256] = "__map_"; strcpy(mhname + sizeof("__map_") - 1, h->charset); - PyModule_AddObject(m, mhname, PyCObject_FromVoidPtr(h, NULL)); + PyModule_AddObject(m, mhname, PyCObject_FromVoidPtr(h, NULL)); } if (PyErr_Occurred()) 1.3 +2 -2 cjkcodecs/src/maps/mapdata_zh_TW.c Index: mapdata_zh_TW.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/maps/mapdata_zh_TW.c,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- mapdata_zh_TW.c 18 May 2003 15:58:09 -0000 1.2 +++ mapdata_zh_TW.c 20 May 2003 06:46:55 -0000 1.3 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: mapdata_zh_TW.c,v 1.2 2003/05/18 15:58:09 perky Exp $ + * $Id: mapdata_zh_TW.c,v 1.3 2003/05/20 06:46:55 perky Exp $ */ #include "Python.h" @@ -56,7 +56,7 @@ char mhname[256] = "__map_"; strcpy(mhname + sizeof("__map_") - 1, h->charset); - PyModule_AddObject(m, mhname, PyCObject_FromVoidPtr(h, NULL)); + PyModule_AddObject(m, mhname, PyCObject_FromVoidPtr(h, NULL)); } if (PyErr_Occurred()) |
From: Hye-Shik C. <pe...@us...> - 2003-05-20 06:42:41
|
perky 03/05/19 23:42:40 Modified: src multibytecodec.c Log: Implement StreamReader. Revision Changes Path 1.10 +315 -16 cjkcodecs/src/multibytecodec.c Index: multibytecodec.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/multibytecodec.c,v retrieving revision 1.9 retrieving revision 1.10 diff -u -r1.9 -r1.10 --- multibytecodec.c 20 May 2003 04:16:56 -0000 1.9 +++ multibytecodec.c 20 May 2003 06:42:40 -0000 1.10 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: multibytecodec.c,v 1.9 2003/05/20 04:16:56 perky Exp $ + * $Id: multibytecodec.c,v 1.10 2003/05/20 06:42:40 perky Exp $ */ #include "Python.h" @@ -62,6 +62,9 @@ are 'ignore' and 'replace' as well as any other name registerd with\n\ codecs.register_error that is able to handle UnicodeDecodeErrors."); +PyDoc_STRVAR(MultibyteCodec_StreamReader__doc__, +"I.StreamReader(stream[, errors]) -> StreamReader instance"); + PyDoc_STRVAR(MultibyteCodec_StreamWriter__doc__, "I.StreamWriter(stream[, errors]) -> StreamWriter instance"); @@ -70,6 +73,8 @@ static PyObject *multibytecodec_encode(MultibyteCodec *, MultibyteCodec_State *, const Py_UNICODE *, int, PyObject *); +static PyObject *mbstreamreader_create(MultibyteCodec *, + PyObject *, const char *); static PyObject *mbstreamwriter_create(MultibyteCodec *, PyObject *, const char *); @@ -134,23 +139,23 @@ goto errorexit; \ } -static int +static int expand_decodebuffer(MultibyteDecodeBuffer *buf, int esize) -{ +{ int orgpos, orgsize; - + orgpos = (int)(buf->outbuf - PyUnicode_AS_UNICODE(buf->outobj)); orgsize = PyUnicode_GET_SIZE(buf->outobj); if (PyUnicode_Resize(&buf->outobj, orgsize + ( esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize)) == -1) return -1; - + buf->outbuf = PyUnicode_AS_UNICODE(buf->outobj) + orgpos; buf->outbuf_end = PyUnicode_AS_UNICODE(buf->outobj) + PyUnicode_GET_SIZE(buf->outobj); - - return 0; -} + + return 0; +} #define RESERVE_DECODEBUFFER(buf, s) { \ if ((s) < 1 || (buf)->outbuf + (s) > (buf)->outbuf_end) \ if (expand_decodebuffer(buf, s) == -1) \ @@ -227,7 +232,7 @@ goto errorexit; } else if (PyUnicodeEncodeError_SetStart(buf->excobj, start) != 0 || - PyUnicodeEncodeError_SetEnd(buf->excobj, end) != 0 || + PyUnicodeEncodeError_SetEnd(buf->excobj, end) != 0 || PyUnicodeEncodeError_SetReason(buf->excobj, reason) != 0) goto errorexit; @@ -359,8 +364,8 @@ if (retobj == NULL) goto errorexit; - if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 || - !PyUnicode_Check((retuni = PyTuple_GET_ITEM(retobj, 0))) || + if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 || + !PyUnicode_Check((retuni = PyTuple_GET_ITEM(retobj, 0))) || !PyInt_Check(PyTuple_GET_ITEM(retobj, 1))) { PyErr_SetString(PyExc_ValueError, "decoding error handler must return (unicode, int) tuple"); @@ -383,7 +388,7 @@ "position %d from error handler out of bounds", newpos); goto errorexit; } - buf->inbuf = buf->inbuf_top + newpos; + buf->inbuf = buf->inbuf_top + newpos; Py_DECREF(retobj); return 0; @@ -481,7 +486,7 @@ static PyObject * MultibyteCodec_Decode(MultibyteCodecObject *self, PyObject *args, PyObject *kwargs) -{ +{ MultibyteCodec_State state; MultibyteDecodeBuffer buf; PyObject *errorcb; @@ -548,9 +553,23 @@ } static PyObject * +MultibyteCodec_StreamReader(MultibyteCodecObject *self, + PyObject *args, PyObject *kwargs) +{ + PyObject *stream; + char *errors = NULL; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|s:StreamReader", + streamkwarglist, &stream, &errors)) + return NULL; + + return mbstreamreader_create(self->codec, stream, errors); +} + +static PyObject * MultibyteCodec_StreamWriter(MultibyteCodecObject *self, PyObject *args, PyObject *kwargs) -{ +{ PyObject *stream; char *errors = NULL; @@ -568,6 +587,9 @@ {"decode", (PyCFunction)MultibyteCodec_Decode, METH_VARARGS | METH_KEYWORDS, MultibyteCodec_Decode__doc__}, + {"StreamReader",(PyCFunction)MultibyteCodec_StreamReader, + METH_VARARGS | METH_KEYWORDS, + MultibyteCodec_StreamReader__doc__}, {"StreamWriter",(PyCFunction)MultibyteCodec_StreamWriter, METH_VARARGS | METH_KEYWORDS, MultibyteCodec_StreamWriter__doc__}, @@ -613,6 +635,258 @@ multibytecodec_methods, /* tp_methods */ }; +static PyObject * +mbstreamreader_iread(MultibyteStreamReaderObject *self, + const char *method, int sizehint) +{ + MultibyteDecodeBuffer buf; + PyObject *cres; + int rsize, r, finalsize = 0; + + if (sizehint == 0) + return PyUnicode_FromUnicode(NULL, 0); + + buf.outobj = buf.excobj = NULL; + cres = NULL; + + for (;;) { + if (sizehint < 0) + cres = PyObject_CallMethod(self->stream, (char *)method, NULL); + else + cres = PyObject_CallMethod(self->stream, + (char *)method, "i", sizehint); + if (cres == NULL) + goto errorexit; + + if (!PyString_Check(cres)) { + PyErr_SetString(PyExc_TypeError, + "stream function returned a non-string object"); + goto errorexit; + } + + if (self->pendingsize > 0) { + PyObject *ctr; + char *ctrdata; + + rsize = PyString_GET_SIZE(cres) + self->pendingsize; + ctr = PyString_FromStringAndSize(NULL, rsize); + if (ctr == NULL) + goto errorexit; + ctrdata = PyString_AS_STRING(ctr); + memcpy(ctrdata, self->pending, self->pendingsize); + memcpy(ctrdata + self->pendingsize, + PyString_AS_STRING(cres), PyString_GET_SIZE(cres)); + Py_DECREF(cres); + cres = ctr; + self->pendingsize = 0; + } + + rsize = PyString_GET_SIZE(cres); + buf.inbuf = buf.inbuf_top = (unsigned char *)PyString_AS_STRING(cres); + buf.inbuf_end = buf.inbuf_top + rsize; + if (buf.outobj == NULL) { + buf.outobj = PyUnicode_FromUnicode(NULL, rsize); + if (buf.outobj == NULL) + goto errorexit; + buf.outbuf = PyUnicode_AS_UNICODE(buf.outobj); + buf.outbuf_end = buf.outbuf + PyUnicode_GET_SIZE(buf.outobj); + } + + r = 0; + if (rsize > 0) + while (buf.inbuf < buf.inbuf_end) { + size_t inleft, outleft; + + inleft = (size_t)(buf.inbuf_end - buf.inbuf); + outleft = (size_t)(buf.outbuf_end - buf.outbuf); + + r = self->codec->decode(&self->state, &buf.inbuf, inleft, + &buf.outbuf, outleft); + if (r == 0 || r == MBERR_TOOFEW) + break; + else if (multibytecodec_decerror(self->codec, + &self->state, &buf, self->errors, r)) + goto errorexit; + } + + if (rsize == 0 || sizehint < 0) { /* end of file */ + if (buf.inbuf < buf.inbuf_end && + multibytecodec_decerror(self->codec, &self->state, &buf, + self->errors, MBERR_TOOFEW)) + goto errorexit; + } + + if (buf.inbuf < buf.inbuf_end) { /* pending sequence exists */ + size_t npendings; + + /* we can't assume that pendingsize is still 0 here. because + * this function can be called recursively from error callback */ + npendings = (size_t)(buf.inbuf_end - buf.inbuf); + if (npendings + self->pendingsize > MAXPENDING) { + PyErr_SetString(PyExc_RuntimeError, + "pending buffer overflow"); + goto errorexit; + } + memcpy(self->pending + self->pendingsize, buf.inbuf, npendings); + self->pendingsize += npendings; + } + + finalsize = (int)(buf.outbuf - PyUnicode_AS_UNICODE(buf.outobj)); + + Py_DECREF(cres); + cres = NULL; + + if (sizehint < 0 || finalsize != 0 || rsize == 0) + break; + + sizehint = 1; /* read 1 more byte and retry */ + } + + if (finalsize != PyUnicode_GET_SIZE(buf.outobj)) + if (PyUnicode_Resize(&buf.outobj, finalsize) == -1) + goto errorexit; + + Py_XDECREF(cres); + Py_XDECREF(buf.excobj); + return buf.outobj; + +errorexit: + Py_XDECREF(cres); + Py_XDECREF(buf.excobj); + Py_XDECREF(buf.outobj); + return NULL; +} + +static PyObject * +mbstreamreader_read(MultibyteStreamReaderObject *self, PyObject *args) +{ + PyObject *sizeobj = NULL; + long size; + + if (!PyArg_ParseTuple(args, "|O:read", &sizeobj)) + return NULL; + + if (sizeobj == Py_None || sizeobj == NULL) + size = -1; + else if (PyInt_Check(sizeobj)) + size = PyInt_AsLong(sizeobj); + else { + PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer"); + return NULL; + } + + return mbstreamreader_iread(self, "read", size); +} + +static PyObject * +mbstreamreader_readline(MultibyteStreamReaderObject *self, PyObject *args) +{ + PyObject *sizeobj = NULL; + long size; + + if (!PyArg_ParseTuple(args, "|O:readline", &sizeobj)) + return NULL; + + if (sizeobj == Py_None || sizeobj == NULL) + size = -1; + else if (PyInt_Check(sizeobj)) + size = PyInt_AsLong(sizeobj); + else { + PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer"); + return NULL; + } + + return mbstreamreader_iread(self, "readline", size); +} + +static PyObject * +mbstreamreader_readlines(MultibyteStreamReaderObject *self, PyObject *args) +{ + PyObject *sizehintobj = NULL, *r, *sr; + long sizehint; + + if (!PyArg_ParseTuple(args, "|O:readlines", &sizehintobj)) + return NULL; + + if (sizehintobj == Py_None || sizehintobj == NULL) + sizehint = -1; + else if (PyInt_Check(sizehintobj)) + sizehint = PyInt_AsLong(sizehintobj); + else { + PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer"); + return NULL; + } + + r = mbstreamreader_iread(self, "read", sizehint); + if (r == NULL) + return NULL; + + sr = PyUnicode_Splitlines(r, 1); + Py_DECREF(r); + return sr; +} + +static PyObject * +mbstreamreader_reset(MultibyteStreamReaderObject *self) +{ + self->state.p = NULL; + self->pendingsize = 0; + + Py_INCREF(Py_None); + return Py_None; +} + +static struct PyMethodDef mbstreamreader_methods[] = { + {"read", (PyCFunction)mbstreamreader_read, + METH_VARARGS, NULL}, + {"readline", (PyCFunction)mbstreamreader_readline, + METH_VARARGS, NULL}, + {"readlines", (PyCFunction)mbstreamreader_readlines, + METH_VARARGS, NULL}, + {"reset", (PyCFunction)mbstreamreader_reset, + METH_NOARGS, NULL}, + {NULL, NULL}, +}; + +static void +mbstreamreader_dealloc(MultibyteStreamWriterObject *self) +{ + PyObject_Del(self); +} + +static PyTypeObject MultibyteStreamReader_Type = { + PyObject_HEAD_INIT(NULL) + 0, /* ob_size */ + "MultibyteStreamReader", /* tp_name */ + sizeof(MultibyteStreamReaderObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)mbstreamreader_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + 0, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iterext */ + mbstreamreader_methods, /* tp_methods */ +}; + static int mbstreamwriter_iwrite(MultibyteStreamWriterObject *self, PyObject *unistr) @@ -755,7 +1029,7 @@ static PyObject * __create_codec(PyObject *ignore, PyObject *arg) { - MultibyteCodecObject *self; + MultibyteCodecObject *self; if (!PyCObject_Check(arg)) { PyErr_SetString(PyExc_ValueError, "argument type invalid"); @@ -768,7 +1042,32 @@ self->codec = PyCObject_AsVoidPtr(arg); - return (PyObject *)self; + return (PyObject *)self; +} + +static PyObject * +mbstreamreader_create(MultibyteCodec *codec, + PyObject *stream, const char *errors) +{ + MultibyteStreamReaderObject *self; + + self = PyObject_New(MultibyteStreamReaderObject, + &MultibyteStreamReader_Type); + if (self == NULL) + return NULL; + + self->errors = get_errorcallback(errors); + if (self->errors == NULL) { + Py_DECREF(self); + return NULL; + } + self->codec = codec; + self->stream = stream; + Py_INCREF(stream); + self->state.p = NULL; + self->pendingsize = 0; + + return (PyObject *)self; } static PyObject * |
From: Hye-Shik C. <pe...@us...> - 2003-05-20 04:17:06
|
perky 03/05/19 21:16:57 Modified: src multibytecodec.c Log: We can hide __create_codec function symbol. Hmm. but I don't like it. hehe Revision Changes Path 1.9 +2 -2 cjkcodecs/src/multibytecodec.c Index: multibytecodec.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/multibytecodec.c,v retrieving revision 1.8 retrieving revision 1.9 diff -u -r1.8 -r1.9 --- multibytecodec.c 20 May 2003 04:11:44 -0000 1.8 +++ multibytecodec.c 20 May 2003 04:16:56 -0000 1.9 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: multibytecodec.c,v 1.8 2003/05/20 04:11:44 perky Exp $ + * $Id: multibytecodec.c,v 1.9 2003/05/20 04:16:56 perky Exp $ */ #include "Python.h" @@ -752,7 +752,7 @@ mbstreamwriter_methods, /* tp_methods */ }; -PyObject * +static PyObject * __create_codec(PyObject *ignore, PyObject *arg) { MultibyteCodecObject *self; |
From: Hye-Shik C. <pe...@us...> - 2003-05-20 04:11:44
|
perky 03/05/19 21:11:44 Modified: src codeccommon.h multibytecodec.c multibytecodec.h Log: Implement StreamWriter. Revision Changes Path 1.4 +5 -5 cjkcodecs/src/codeccommon.h Index: codeccommon.h =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/codeccommon.h,v retrieving revision 1.3 retrieving revision 1.4 diff -u -r1.3 -r1.4 --- codeccommon.h 19 May 2003 23:07:12 -0000 1.3 +++ codeccommon.h 20 May 2003 04:11:44 -0000 1.4 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: codeccommon.h,v 1.3 2003/05/19 23:07:12 perky Exp $ + * $Id: codeccommon.h,v 1.4 2003/05/20 04:11:44 perky Exp $ */ #include "Python.h" @@ -40,16 +40,16 @@ #define ENCODER(encoding) \ static int encoding##_encode( \ - PyMultibyteCodec_State *state, \ + MultibyteCodec_State *state, \ const Py_UNICODE **inbuf, size_t inleft, \ unsigned char **outbuf, size_t outleft) #define DECODER(encoding) \ static int encoding##_decode( \ - PyMultibyteCodec_State *state, \ + MultibyteCodec_State *state, \ const unsigned char **inbuf, size_t inleft, \ Py_UNICODE **outbuf, size_t outleft) #define CODECDEF(encoding) \ - static PyMultibyteCodec __codec = { \ + static MultibyteCodec __codec = { \ #encoding, encoding##_encode, encoding##_decode \ }; #define NOMETHODS(name) \ @@ -106,7 +106,7 @@ } static PyObject * -createcodec(PyObject *cofunc, PyMultibyteCodec *codec) +createcodec(PyObject *cofunc, MultibyteCodec *codec) { PyObject *args, *r; 1.8 +206 -20 cjkcodecs/src/multibytecodec.c Index: multibytecodec.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/multibytecodec.c,v retrieving revision 1.7 retrieving revision 1.8 diff -u -r1.7 -r1.8 --- multibytecodec.c 19 May 2003 22:56:37 -0000 1.7 +++ multibytecodec.c 20 May 2003 04:11:44 -0000 1.8 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: multibytecodec.c,v 1.7 2003/05/19 22:56:37 perky Exp $ + * $Id: multibytecodec.c,v 1.8 2003/05/20 04:11:44 perky Exp $ */ #include "Python.h" @@ -45,7 +45,7 @@ } MultibyteDecodeBuffer; PyDoc_STRVAR(MultibyteCodec_Encode__doc__, -"I.encode(unicode, [,errors]) -> (string, length consumed)\n\ +"I.encode(unicode[, errors]) -> (string, length consumed)\n\ \n\ Return an encoded string version of `unicode'. errors may be given to\n\ set a different error handling scheme. Default is 'strict' meaning that\n\ @@ -54,7 +54,7 @@ registered with codecs.register_error that can handle UnicodeEncodeErrors."); PyDoc_STRVAR(MultibyteCodec_Decode__doc__, -"I.decode(string, [,errors]) -> (unicodeobject, length consumed)\n\ +"I.decode(string[, errors]) -> (unicodeobject, length consumed)\n\ \n\ Decodes `string' using I, an MultibyteCodec instance. errors may be given\n\ to set a different error handling scheme. Default is 'strict' meaning\n\ @@ -62,10 +62,16 @@ are 'ignore' and 'replace' as well as any other name registerd with\n\ codecs.register_error that is able to handle UnicodeDecodeErrors."); +PyDoc_STRVAR(MultibyteCodec_StreamWriter__doc__, +"I.StreamWriter(stream[, errors]) -> StreamWriter instance"); + static char *codeckwarglist[] = {"input", "errors", NULL}; +static char *streamkwarglist[] = {"stream", "errors", NULL}; -static PyObject *multibytecodec_encode(PyMultibyteCodec *, - PyMultibyteCodec_State *, const Py_UNICODE *, int, PyObject *); +static PyObject *multibytecodec_encode(MultibyteCodec *, + MultibyteCodec_State *, const Py_UNICODE *, int, PyObject *); +static PyObject *mbstreamwriter_create(MultibyteCodec *, + PyObject *, const char *); static PyObject * make_tuple(PyObject *unicode, int len) @@ -152,8 +158,8 @@ } static int -multibytecodec_encerror(PyMultibyteCodec *codec, - PyMultibyteCodec_State *state, +multibytecodec_encerror(MultibyteCodec *codec, + MultibyteCodec_State *state, MultibyteEncodeBuffer *buf, PyObject *errors, int e) { @@ -281,8 +287,8 @@ } static int -multibytecodec_decerror(PyMultibyteCodec *codec, - PyMultibyteCodec_State *state, +multibytecodec_decerror(MultibyteCodec *codec, + MultibyteCodec_State *state, MultibyteDecodeBuffer *buf, PyObject *errors, int e) { @@ -387,8 +393,8 @@ } static PyObject * -multibytecodec_encode(PyMultibyteCodec *codec, - PyMultibyteCodec_State *state, +multibytecodec_encode(MultibyteCodec *codec, + MultibyteCodec_State *state, const Py_UNICODE *data, int datalen, PyObject *errors) { @@ -438,10 +444,10 @@ } static PyObject * -MultibyteCodec_Encode(PyMultibyteCodecObject *self, +MultibyteCodec_Encode(MultibyteCodecObject *self, PyObject *args, PyObject *kwargs) { - PyMultibyteCodec_State state; + MultibyteCodec_State state; Py_UNICODE *data; PyObject *errorcb, *r; const char *errors = NULL; @@ -473,10 +479,10 @@ } static PyObject * -MultibyteCodec_Decode(PyMultibyteCodecObject *self, +MultibyteCodec_Decode(MultibyteCodecObject *self, PyObject *args, PyObject *kwargs) { - PyMultibyteCodec_State state; + MultibyteCodec_State state; MultibyteDecodeBuffer buf; PyObject *errorcb; const char *data, *errors = NULL; @@ -541,6 +547,20 @@ return NULL; } +static PyObject * +MultibyteCodec_StreamWriter(MultibyteCodecObject *self, + PyObject *args, PyObject *kwargs) +{ + PyObject *stream; + char *errors = NULL; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|s:StreamWriter", + streamkwarglist, &stream, &errors)) + return NULL; + + return mbstreamwriter_create(self->codec, stream, errors); +} + static struct PyMethodDef multibytecodec_methods[] = { {"encode", (PyCFunction)MultibyteCodec_Encode, METH_VARARGS | METH_KEYWORDS, @@ -548,20 +568,23 @@ {"decode", (PyCFunction)MultibyteCodec_Decode, METH_VARARGS | METH_KEYWORDS, MultibyteCodec_Decode__doc__}, + {"StreamWriter",(PyCFunction)MultibyteCodec_StreamWriter, + METH_VARARGS | METH_KEYWORDS, + MultibyteCodec_StreamWriter__doc__}, {NULL, NULL}, }; static void -multibytecodec_dealloc(PyMultibyteCodecObject *self) +multibytecodec_dealloc(MultibyteCodecObject *self) { PyObject_Del(self); } -static PyTypeObject PyMultibyteCodec_Type = { +static PyTypeObject MultibyteCodec_Type = { PyObject_HEAD_INIT(NULL) 0, /* ob_size */ "MultibyteCodec", /* tp_name */ - sizeof(PyMultibyteCodecObject), /* tp_basicsize */ + sizeof(MultibyteCodecObject), /* tp_basicsize */ 0, /* tp_itemsize */ /* methods */ (destructor)multibytecodec_dealloc, /* tp_dealloc */ @@ -590,23 +613,186 @@ multibytecodec_methods, /* tp_methods */ }; +static int +mbstreamwriter_iwrite(MultibyteStreamWriterObject *self, + PyObject *unistr) +{ + PyObject *wr, *r = NULL; + int rsize; + + if (!PyUnicode_Check(unistr)) { + PyErr_SetString(PyExc_TypeError, + "only unicode objects are encodable."); + return -1; + } + + rsize = PyUnicode_GET_SIZE(unistr); + if (rsize == 0) + return 0; + + r = multibytecodec_encode(self->codec, &self->state, + (Py_UNICODE *)PyUnicode_AS_UNICODE(unistr), rsize, self->errors); + if (r == NULL) + goto errorexit; + + wr = PyObject_CallMethod(self->stream, "write", "O", r); + if (wr == NULL) + goto errorexit; + + Py_DECREF(r); + Py_DECREF(wr); + return 0; + +errorexit: + Py_XDECREF(r); + return -1; +} + +static PyObject * +mbstreamwriter_write(MultibyteStreamWriterObject *self, PyObject *args) +{ + PyObject *strobj; + + if (!PyArg_ParseTuple(args, "O:write", &strobj)) + return NULL; + + if (mbstreamwriter_iwrite(self, strobj)) + return NULL; + else { + Py_INCREF(Py_None); + return Py_None; + } +} + +static PyObject * +mbstreamwriter_writelines(MultibyteStreamWriterObject *self, PyObject *args) +{ + PyObject *lines, *strobj; + int i, r; + + if (!PyArg_ParseTuple(args, "O:writelines", &lines)) + return NULL; + + if (!PySequence_Check(lines)) { + PyErr_SetString(PyExc_TypeError, "arg must be a sequence object"); + return NULL; + } + + for (i = 0; i < PySequence_Length(lines); i++) { + /* length can be changed even within this loop */ + strobj = PySequence_GetItem(lines, i); + if (strobj == NULL) + return NULL; + + r = mbstreamwriter_iwrite(self, strobj); + Py_DECREF(strobj); + if (r == -1) + return NULL; + } + + Py_INCREF(Py_None); + return Py_None; +} + +static PyObject * +mbstreamwriter_reset(MultibyteStreamWriterObject *self) +{ + self->state.p = NULL; + + Py_INCREF(Py_None); + return Py_None; +} + +static void +mbstreamwriter_dealloc(MultibyteStreamWriterObject *self) +{ + PyObject_Del(self); +} + +static struct PyMethodDef mbstreamwriter_methods[] = { + {"write", (PyCFunction)mbstreamwriter_write, + METH_VARARGS, NULL}, + {"writelines", (PyCFunction)mbstreamwriter_writelines, + METH_VARARGS, NULL}, + {"reset", (PyCFunction)mbstreamwriter_reset, + METH_NOARGS, NULL}, + {NULL, NULL}, +}; + +static PyTypeObject MultibyteStreamWriter_Type = { + PyObject_HEAD_INIT(NULL) + 0, /* ob_size */ + "MultibyteStreamWriter", /* tp_name */ + sizeof(MultibyteStreamWriterObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)mbstreamwriter_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + 0, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iterext */ + mbstreamwriter_methods, /* tp_methods */ +}; + PyObject * __create_codec(PyObject *ignore, PyObject *arg) { - PyMultibyteCodecObject *self; + MultibyteCodecObject *self; if (!PyCObject_Check(arg)) { PyErr_SetString(PyExc_ValueError, "argument type invalid"); return NULL; } - self = PyObject_New(PyMultibyteCodecObject, &PyMultibyteCodec_Type); + self = PyObject_New(MultibyteCodecObject, &MultibyteCodec_Type); if (self == NULL) return NULL; self->codec = PyCObject_AsVoidPtr(arg); return (PyObject *)self; +} + +static PyObject * +mbstreamwriter_create(MultibyteCodec *codec, + PyObject *stream, const char *errors) +{ + MultibyteStreamWriterObject *self; + + self = PyObject_New(MultibyteStreamWriterObject, + &MultibyteStreamWriter_Type); + if (self == NULL) + return NULL; + + self->errors = get_errorcallback(errors); + if (self->errors == NULL) { + Py_DECREF(self); + return NULL; + } + self->codec = codec; + self->stream = stream; + Py_INCREF(stream); + self->state.p = NULL; + + return (PyObject *)self; } static struct PyMethodDef __methods[] = { 1.5 +15 -13 cjkcodecs/src/multibytecodec.h Index: multibytecodec.h =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/multibytecodec.h,v retrieving revision 1.4 retrieving revision 1.5 diff -u -r1.4 -r1.5 --- multibytecodec.h 19 May 2003 06:06:38 -0000 1.4 +++ multibytecodec.h 20 May 2003 04:11:44 -0000 1.5 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: multibytecodec.h,v 1.4 2003/05/19 06:06:38 perky Exp $ + * $Id: multibytecodec.h,v 1.5 2003/05/20 04:11:44 perky Exp $ */ #ifndef _PYTHON_MULTIBYTECODEC_H_ @@ -38,12 +38,12 @@ typedef union { unsigned long i; void *p; -} PyMultibyteCodec_State; +} MultibyteCodec_State; -typedef int (*mbencode_func)(PyMultibyteCodec_State *state, +typedef int (*mbencode_func)(MultibyteCodec_State *state, const Py_UNICODE **inbuf, size_t inleft, unsigned char **outbuf, size_t outleft); -typedef int (*mbdecode_func)(PyMultibyteCodec_State *state, +typedef int (*mbdecode_func)(MultibyteCodec_State *state, const unsigned char **inbuf, size_t inleft, Py_UNICODE **outbuf, size_t outleft); @@ -51,31 +51,33 @@ const char *encoding; mbencode_func encode; mbdecode_func decode; -} PyMultibyteCodec; +} MultibyteCodec; typedef struct { PyObject_HEAD - PyMultibyteCodec *codec; -} PyMultibyteCodecObject; + MultibyteCodec *codec; +} MultibyteCodecObject; #define MAXPENDING 8 typedef struct { PyObject_HEAD - PyMultibyteCodec *codec; - PyMultibyteCodec_State state; + MultibyteCodec *codec; + MultibyteCodec_State state; unsigned char pending[MAXPENDING]; int pendingsize; PyObject *stream, *errors; -} PyMultibyteStreamReaderObject; +} MultibyteStreamReaderObject; typedef struct { PyObject_HEAD - PyMultibyteCodec *codec; - PyMultibyteCodec_State state; + MultibyteCodec *codec; + MultibyteCodec_State state; +#if 0 /* StreamWriter doesn't buffer on the current implementation. */ Py_UNICODE pending[MAXPENDING]; int pendingsize; +#endif PyObject *stream, *errors; -} PyMultibyteStreamWriterObject; +} MultibyteStreamWriterObject; /* positive values for illegal sequences */ #define MBERR_TOOSMALL (-1) /* insufficient output buffer space */ |
From: Hye-Shik C. <pe...@us...> - 2003-05-19 23:34:55
|
perky 03/05/19 16:34:54 Modified: src _euc_kr.c Log: Optimize detecting c2 < 0x80 Revision Changes Path 1.6 +3 -6 cjkcodecs/src/_euc_kr.c Index: _euc_kr.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/_euc_kr.c,v retrieving revision 1.5 retrieving revision 1.6 diff -u -r1.5 -r1.6 --- _euc_kr.c 19 May 2003 23:07:12 -0000 1.5 +++ _euc_kr.c 19 May 2003 23:34:54 -0000 1.6 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: _euc_kr.c,v 1.5 2003/05/19 23:07:12 perky Exp $ + * $Id: _euc_kr.c,v 1.6 2003/05/19 23:34:54 perky Exp $ */ #include "codeccommon.h" @@ -88,14 +88,11 @@ if (inleft < 2) return MBERR_TOOFEW; - if ((c2 = (*inbuf)[1]) < 0x80) - return 2; - else - c2 &= 0x7f; + c2 = (*inbuf)[1] ^ 0x80; map = &ksx1001decmap[c & 0x7f]; if (map->map == NULL || c2 < map->bottom || c2 > map->top || (code = map->map[c2 - map->bottom]) == UNIINV) - return 1; + return 2; **outbuf = code; (*outbuf)++; outleft--; |
From: Hye-Shik C. <pe...@us...> - 2003-05-19 23:12:29
|
perky 03/05/19 16:12:28 Modified: src _cp949.c Log: Reduce an evil goto. Revision Changes Path 1.3 +3 -5 cjkcodecs/src/_cp949.c Index: _cp949.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/_cp949.c,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- _cp949.c 19 May 2003 23:07:12 -0000 1.2 +++ _cp949.c 19 May 2003 23:12:28 -0000 1.3 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: _cp949.c,v 1.2 2003/05/19 23:07:12 perky Exp $ + * $Id: _cp949.c,v 1.3 2003/05/19 23:12:28 perky Exp $ */ #include "codeccommon.h" @@ -91,13 +91,11 @@ if (inleft < 2) return MBERR_TOOFEW; - if ((*inbuf)[1] < 0x80) - goto cp949dec; - c2 = (*inbuf)[1] & 0x7f; + c2 = (*inbuf)[1] ^ 0x80; map = &ksx1001decmap[c & 0x7f]; if (map->map == NULL || c2 < map->bottom || c2 > map->top || (code = map->map[c2 - map->bottom]) == UNIINV) { -cp949dec: c2 = (*inbuf)[1]; + c2 ^= 0x80; map = &cp949extdecmap[c]; if (map->map == NULL || c2 < map->bottom || c2 > map->top || (code = map->map[c2 - map->bottom]) == UNIINV) |
From: Hye-Shik C. <pe...@us...> - 2003-05-19 23:07:13
|
perky 03/05/19 16:07:13 Modified: src _cp949.c _euc_kr.c cjkcommon.h codeccommon.h Log: Typedef encode_map and decode_map from each of struct unim_index and struct dbcs_index to make it more readable. Revision Changes Path 1.2 +3 -3 cjkcodecs/src/_cp949.c Index: _cp949.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/_cp949.c,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- _cp949.c 19 May 2003 10:38:08 -0000 1.1 +++ _cp949.c 19 May 2003 23:07:12 -0000 1.2 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: _cp949.c,v 1.1 2003/05/19 10:38:08 perky Exp $ + * $Id: _cp949.c,v 1.2 2003/05/19 23:07:12 perky Exp $ */ #include "codeccommon.h" @@ -38,7 +38,7 @@ ENCODER(cp949) { while (inleft > 0) { - const struct unim_index *map; + const encode_map *map; Py_UNICODE c = **inbuf, clow; DBCHAR code; @@ -74,7 +74,7 @@ DECODER(cp949) { while (inleft > 0) { - const struct dbcs_index *map; + const decode_map *map; unsigned char c = **inbuf, c2; Py_UNICODE code; 1.5 +3 -3 cjkcodecs/src/_euc_kr.c Index: _euc_kr.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/_euc_kr.c,v retrieving revision 1.4 retrieving revision 1.5 diff -u -r1.4 -r1.5 --- _euc_kr.c 19 May 2003 10:38:08 -0000 1.4 +++ _euc_kr.c 19 May 2003 23:07:12 -0000 1.5 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: _euc_kr.c,v 1.4 2003/05/19 10:38:08 perky Exp $ + * $Id: _euc_kr.c,v 1.5 2003/05/19 23:07:12 perky Exp $ */ #include "codeccommon.h" @@ -37,7 +37,7 @@ ENCODER(euc_kr) { while (inleft > 0) { - const struct unim_index *map; + const encode_map *map; Py_UNICODE c = **inbuf, clow; DBCHAR code; @@ -72,7 +72,7 @@ DECODER(euc_kr) { while (inleft > 0) { - const struct dbcs_index *map; + const decode_map *map; unsigned char c = **inbuf, c2; Py_UNICODE code; 1.8 +3 -1 cjkcodecs/src/cjkcommon.h Index: cjkcommon.h =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/cjkcommon.h,v retrieving revision 1.7 retrieving revision 1.8 diff -u -r1.7 -r1.8 --- cjkcommon.h 19 May 2003 10:45:48 -0000 1.7 +++ cjkcommon.h 19 May 2003 23:07:12 -0000 1.8 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: cjkcommon.h,v 1.7 2003/05/19 10:45:48 perky Exp $ + * $Id: cjkcommon.h,v 1.8 2003/05/19 23:07:12 perky Exp $ */ #ifndef _CJKCOMMON_H_ @@ -41,11 +41,13 @@ const Py_UNICODE *map; unsigned char bottom, top; }; +typedef struct dbcs_index decode_map; struct unim_index { const DBCHAR *map; unsigned char bottom, top; }; +typedef struct unim_index encode_map; struct dbcs_map { const char *charset; 1.3 +3 -3 cjkcodecs/src/codeccommon.h Index: codeccommon.h =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/codeccommon.h,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- codeccommon.h 19 May 2003 08:12:24 -0000 1.2 +++ codeccommon.h 19 May 2003 23:07:12 -0000 1.3 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: codeccommon.h,v 1.2 2003/05/19 08:12:24 perky Exp $ + * $Id: codeccommon.h,v 1.3 2003/05/19 23:07:12 perky Exp $ */ #include "Python.h" @@ -34,9 +34,9 @@ #include "cjkcommon.h" #define ENCMAP(encoding) \ - const static struct unim_index *encoding##encmap; + const static encode_map *encoding##encmap; #define DECMAP(encoding) \ - const static struct dbcs_index *encoding##decmap; + const static decode_map *encoding##decmap; #define ENCODER(encoding) \ static int encoding##_encode( \ |
From: Hye-Shik C. <pe...@us...> - 2003-05-19 22:56:38
|
perky 03/05/19 15:56:37 Modified: src multibytecodec.c Log: Fix several copy&paste errors. Revision Changes Path 1.7 +17 -22 cjkcodecs/src/multibytecodec.c Index: multibytecodec.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/multibytecodec.c,v retrieving revision 1.6 retrieving revision 1.7 diff -u -r1.6 -r1.7 --- multibytecodec.c 19 May 2003 22:37:13 -0000 1.6 +++ multibytecodec.c 19 May 2003 22:56:37 -0000 1.7 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: multibytecodec.c,v 1.6 2003/05/19 22:37:13 perky Exp $ + * $Id: multibytecodec.c,v 1.7 2003/05/19 22:56:37 perky Exp $ */ #include "Python.h" @@ -162,14 +162,14 @@ size_t esize; int retstrsize, newpos, start, end; - if (e == MBERR_TOOSMALL) { - RESERVE_ENCODEBUFFER(buf, -1); - return 0; /* retry it */ - } else if (e > 0) { + if (e > 0) { reason = "illegal multibyte sequence"; esize = e; } else { switch (e) { + case MBERR_TOOSMALL: + RESERVE_ENCODEBUFFER(buf, -1); + return 0; /* retry it */ case MBERR_TOOFEW: reason = "incomplete multibyte sequence"; esize = (size_t)(buf->inbuf_end - buf->inbuf); @@ -291,14 +291,14 @@ size_t esize; int start, end, retunisize, newpos; - if (e == MBERR_TOOSMALL) { - RESERVE_DECODEBUFFER(buf, -1); - return 0; /* retry it */ - } else if (e > 0) { + if (e > 0) { reason = "illegal multibyte sequence"; esize = e; } else { switch (e) { + case MBERR_TOOSMALL: + RESERVE_DECODEBUFFER(buf, -1); + return 0; /* retry it */ case MBERR_TOOFEW: reason = "incomplete multibyte sequence"; esize = (size_t)(buf->inbuf_end - buf->inbuf); @@ -407,22 +407,19 @@ buf.outbuf = (unsigned char *)PyString_AS_STRING(buf.outobj); buf.outbuf_end = buf.outbuf + PyString_GET_SIZE(buf.outobj); - for (;;) { + while (buf.inbuf < buf.inbuf_end) { int r; size_t inleft, outleft; /* we don't reuse inleft and outleft here. * error callbacks can relocate the cursor anywhere on buffer */ inleft = (size_t)(buf.inbuf_end - buf.inbuf); - if (inleft == 0) break; outleft = (size_t)(buf.outbuf_end - buf.outbuf); r = codec->encode(state, &buf.inbuf, inleft, &buf.outbuf, outleft); if (r == 0) break; else if (multibytecodec_encerror(codec, state, &buf, errors, r)) goto errorexit; - else if (buf.inbuf >= buf.inbuf_end) - break; } finalsize = (int)((char*)buf.outbuf - PyString_AS_STRING(buf.outobj)); @@ -483,7 +480,7 @@ MultibyteDecodeBuffer buf; PyObject *errorcb; const char *data, *errors = NULL; - int datalen, finalsize, r; + int datalen, finalsize; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|z:decode", codeckwarglist, &data, &datalen, &errors)) @@ -509,21 +506,19 @@ buf.outbuf_end = buf.outbuf + PyUnicode_GET_SIZE(buf.outobj); state.p = NULL; - for (;;) { + while (buf.inbuf < buf.inbuf_end) { size_t inleft, outleft; int r; - inleft = (size_t)(bufinbuf_end - bufinbuf); - if (inleft == 0) break; - outleft = (size_t)(bufoutbuf_end - bufoutbuf); + inleft = (size_t)(buf.inbuf_end - buf.inbuf); + outleft = (size_t)(buf.outbuf_end - buf.outbuf); - r = codec->decode(&state, &bufinbuf, inleft, &bufoutbuf, outleft); + r = self->codec->decode(&state, &buf.inbuf, inleft, + &buf.outbuf, outleft); if (r == 0) break; - else if (multibytecodec_decerror(codec, &state, &buf, errors, r)) + else if (multibytecodec_decerror(self->codec, &state, &buf, errorcb, r)) goto errorexit; - else if (bufinbuf >= bufinbuf_end) - break; } finalsize = (int)(buf.outbuf - PyUnicode_AS_UNICODE(buf.outobj)); |
From: Hye-Shik C. <pe...@us...> - 2003-05-19 22:37:15
|
perky 03/05/19 15:37:14 Modified: src multibytecodec.c Log: Inline iencode, idecode function. They are just 2 strikes; decode & StreamReader, and the other couple. Revision Changes Path 1.6 +41 -76 cjkcodecs/src/multibytecodec.c Index: multibytecodec.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/multibytecodec.c,v retrieving revision 1.5 retrieving revision 1.6 diff -u -r1.5 -r1.6 --- multibytecodec.c 19 May 2003 10:38:08 -0000 1.5 +++ multibytecodec.c 19 May 2003 22:37:13 -0000 1.6 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: multibytecodec.c,v 1.5 2003/05/19 10:38:08 perky Exp $ + * $Id: multibytecodec.c,v 1.6 2003/05/19 22:37:13 perky Exp $ */ #include "Python.h" @@ -386,47 +386,6 @@ return -1; } -static int -multibytecodec_iencode(PyMultibyteCodec *codec, - PyMultibyteCodec_State *state, - MultibyteEncodeBuffer *buf, - PyObject *errors) -{ - for (;;) { - int r; - size_t inleft, outleft; - - /* we don't reuse inleft and outleft here. - * error callbacks can relocate the cursor anywhere on buffer */ - inleft = (size_t)(buf->inbuf_end - buf->inbuf); - if (inleft == 0) return 0; - outleft = (size_t)(buf->outbuf_end - buf->outbuf); - r = codec->encode(state, &buf->inbuf, inleft, &buf->outbuf, outleft); - if (r == 0) - return 0; - else if (multibytecodec_encerror(codec, state, buf, errors, r)) - return -1; - else if (buf->inbuf >= buf->inbuf_end) - return 0; - } -} - -static int -multibytecodec_prepencoderbuf(MultibyteEncodeBuffer *buf, - const Py_UNICODE *data, int datalen) -{ - buf->excobj = NULL; - buf->inbuf = buf->inbuf_top = data; - buf->inbuf_end = buf->inbuf_top + datalen; - buf->outobj = PyString_FromStringAndSize(NULL, datalen * 2 + 16); - if (buf->outobj == NULL) - return -1; - buf->outbuf = (unsigned char *)PyString_AS_STRING(buf->outobj); - buf->outbuf_end = buf->outbuf + PyString_GET_SIZE(buf->outobj); - - return 0; -} - static PyObject * multibytecodec_encode(PyMultibyteCodec *codec, PyMultibyteCodec_State *state, @@ -439,12 +398,32 @@ if (datalen == 0) return PyString_FromString(""); - if (multibytecodec_prepencoderbuf(&buf, data, datalen) == -1) + buf.excobj = NULL; + buf.inbuf = buf.inbuf_top = data; + buf.inbuf_end = buf.inbuf_top + datalen; + buf.outobj = PyString_FromStringAndSize(NULL, datalen * 2 + 16); + if (buf.outobj == NULL) goto errorexit; + buf.outbuf = (unsigned char *)PyString_AS_STRING(buf.outobj); + buf.outbuf_end = buf.outbuf + PyString_GET_SIZE(buf.outobj); - if (multibytecodec_iencode(codec, state, &buf, errors) == -1) - goto errorexit; - /* XXX: FLUSH IT! */ + for (;;) { + int r; + size_t inleft, outleft; + + /* we don't reuse inleft and outleft here. + * error callbacks can relocate the cursor anywhere on buffer */ + inleft = (size_t)(buf.inbuf_end - buf.inbuf); + if (inleft == 0) break; + outleft = (size_t)(buf.outbuf_end - buf.outbuf); + r = codec->encode(state, &buf.inbuf, inleft, &buf.outbuf, outleft); + if (r == 0) + break; + else if (multibytecodec_encerror(codec, state, &buf, errors, r)) + goto errorexit; + else if (buf.inbuf >= buf.inbuf_end) + break; + } finalsize = (int)((char*)buf.outbuf - PyString_AS_STRING(buf.outobj)); @@ -496,32 +475,6 @@ return NULL; } -static int -multibytecodec_idecode(PyMultibyteCodec *codec, - PyMultibyteCodec_State *state, - MultibyteDecodeBuffer *buf, - PyObject *errors) -{ - for (;;) { - size_t inleft, outleft; - int r; - - inleft = (size_t)(buf->inbuf_end - buf->inbuf); - if (inleft == 0) return 0; - outleft = (size_t)(buf->outbuf_end - buf->outbuf); - - r = codec->decode(state, &buf->inbuf, inleft, &buf->outbuf, outleft); - if (r == 0) - return 0; - else if (multibytecodec_decerror(codec, state, buf, errors, r)) - return -1; - else if (buf->inbuf >= buf->inbuf_end) - return 0; - } - - return 0; -} - static PyObject * MultibyteCodec_Decode(PyMultibyteCodecObject *self, PyObject *args, PyObject *kwargs) @@ -556,10 +509,22 @@ buf.outbuf_end = buf.outbuf + PyUnicode_GET_SIZE(buf.outobj); state.p = NULL; - r = multibytecodec_idecode(self->codec, &state, &buf, errorcb); - /* XXX: FLUSH IT! */ - if (r != 0) - goto errorexit; + for (;;) { + size_t inleft, outleft; + int r; + + inleft = (size_t)(bufinbuf_end - bufinbuf); + if (inleft == 0) break; + outleft = (size_t)(bufoutbuf_end - bufoutbuf); + + r = codec->decode(&state, &bufinbuf, inleft, &bufoutbuf, outleft); + if (r == 0) + break; + else if (multibytecodec_decerror(codec, &state, &buf, errors, r)) + goto errorexit; + else if (bufinbuf >= bufinbuf_end) + break; + } finalsize = (int)(buf.outbuf - PyUnicode_AS_UNICODE(buf.outobj)); |
From: Hye-Shik C. <pe...@us...> - 2003-05-19 10:45:49
|
perky 03/05/19 03:45:48 Modified: src cjkcommon.h Log: Remove unused range constants Revision Changes Path 1.7 +1 -42 cjkcodecs/src/cjkcommon.h Index: cjkcommon.h =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/cjkcommon.h,v retrieving revision 1.6 retrieving revision 1.7 diff -u -r1.6 -r1.7 --- cjkcommon.h 17 May 2003 21:01:48 -0000 1.6 +++ cjkcommon.h 19 May 2003 10:45:48 -0000 1.7 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: cjkcommon.h,v 1.6 2003/05/17 21:01:48 perky Exp $ + * $Id: cjkcommon.h,v 1.7 2003/05/19 10:45:48 perky Exp $ */ #ifndef _CJKCOMMON_H_ @@ -52,47 +52,6 @@ const struct unim_index *encmap; const struct dbcs_index *decmap; }; - - -#define GB2312_C1_BOTTOM 0x21 -#define GB2312_C1_TOP 0x7e -#define GB2312_C2_BOTTOM 0x21 -#define GB2312_C2_TOP 0x7e - -#define GBK_C1_BOTTOM 0x81 -#define GBK_C1_TOP 0xfe -#define GBK_C2_BOTTOM 0x40 -#define GBK_C2_TOP 0xfe - -#define GB18030_C1_BOTTOM 0xa1 -#define GB18030_C1_TOP 0xfe -#define GB18030_C2_BOTTOM 0x40 -#define GB18030_C2_TOP 0xfe - -#define JISX0208_C1_BOTTOM 0x21 -#define JISX0208_C1_TOP 0x74 -#define JISX0208_C2_BOTTOM 0x21 -#define JISX0208_C2_TOP 0x7e - -#define JISX0212_C1_BOTTOM 0x22 -#define JISX0212_C1_TOP 0x6d -#define JISX0212_C2_BOTTOM 0x21 -#define JISX0212_C2_TOP 0x7e - -#define CP932_C1_BOTTOM 0x81 -#define CP932_C1_TOP 0xfc -#define CP932_C2_BOTTOM 0x40 -#define CP932_C2_TOP 0xfc - -#define KSX1001_C1_BOTTOM 0x21 -#define KSX1001_C1_TOP 0x7d -#define KSX1001_C2_BOTTOM 0x21 -#define KSX1001_C2_TOP 0x7e - -#define CP949_C1_BOTTOM 0x81 -#define CP949_C1_TOP 0xfe -#define CP949_C2_BOTTOM 0x41 -#define CP949_C2_TOP 0xfe #endif |
From: Hye-Shik C. <pe...@us...> - 2003-05-19 10:38:08
|
perky 03/05/19 03:38:08 Modified: src _euc_kr.c multibytecodec.c Added: src _cp949.c Log: Add decoder implementation and cp949 codec. Revision Changes Path 1.4 +32 -3 cjkcodecs/src/_euc_kr.c Index: _euc_kr.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/_euc_kr.c,v retrieving revision 1.3 retrieving revision 1.4 diff -u -r1.3 -r1.4 --- _euc_kr.c 19 May 2003 08:12:23 -0000 1.3 +++ _euc_kr.c 19 May 2003 10:38:08 -0000 1.4 @@ -26,14 +26,13 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: _euc_kr.c,v 1.3 2003/05/19 08:12:23 perky Exp $ + * $Id: _euc_kr.c,v 1.4 2003/05/19 10:38:08 perky Exp $ */ #include "codeccommon.h" ENCMAP(cp949) DECMAP(ksx1001) -DECMAP(cp949ext) ENCODER(euc_kr) { @@ -72,6 +71,37 @@ DECODER(euc_kr) { + while (inleft > 0) { + const struct dbcs_index *map; + unsigned char c = **inbuf, c2; + Py_UNICODE code; + + if (outleft < 1) + return MBERR_TOOSMALL; + + if (c < 0x80) { + **outbuf = c; + (*inbuf)++; inleft--; + (*outbuf)++; outleft--; + continue; + } + + if (inleft < 2) + return MBERR_TOOFEW; + if ((c2 = (*inbuf)[1]) < 0x80) + return 2; + else + c2 &= 0x7f; + map = &ksx1001decmap[c & 0x7f]; + if (map->map == NULL || c2 < map->bottom || c2 > map->top || + (code = map->map[c2 - map->bottom]) == UNIINV) + return 1; + + **outbuf = code; + (*outbuf)++; outleft--; + (*inbuf) += 2; inleft -= 2; + } + return 0; } @@ -89,7 +119,6 @@ /* Import mapdata */ MAPOPEN(mod, "ko_KR") if (IMPORTMAP(mod, ksx1001, NULL, &ksx1001decmap) || - IMPORTMAP(mod, cp949ext, NULL, &cp949extdecmap) || IMPORTMAP(mod, cp949, &cp949encmap, NULL)) goto errorexit; MAPCLOSE(mod) 1.5 +250 -20 cjkcodecs/src/multibytecodec.c Index: multibytecodec.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/multibytecodec.c,v retrieving revision 1.4 retrieving revision 1.5 diff -u -r1.4 -r1.5 --- multibytecodec.c 19 May 2003 06:32:17 -0000 1.4 +++ multibytecodec.c 19 May 2003 10:38:08 -0000 1.5 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: multibytecodec.c,v 1.4 2003/05/19 06:32:17 perky Exp $ + * $Id: multibytecodec.c,v 1.5 2003/05/19 10:38:08 perky Exp $ */ #include "Python.h" @@ -38,6 +38,12 @@ PyObject *excobj, *outobj; } MultibyteEncodeBuffer; +typedef struct { + const unsigned char *inbuf, *inbuf_top, *inbuf_end; + Py_UNICODE *outbuf, *outbuf_end; + PyObject *excobj, *outobj; +} MultibyteDecodeBuffer; + PyDoc_STRVAR(MultibyteCodec_Encode__doc__, "I.encode(unicode, [,errors]) -> (string, length consumed)\n\ \n\ @@ -47,7 +53,16 @@ 'ignore', 'replace' and 'xmlcharrefreplace' as well as any other name\n\ registered with codecs.register_error that can handle UnicodeEncodeErrors."); -static char *kwarglist[] = {"input", "errors", NULL}; +PyDoc_STRVAR(MultibyteCodec_Decode__doc__, +"I.decode(string, [,errors]) -> (unicodeobject, length consumed)\n\ +\n\ +Decodes `string' using I, an MultibyteCodec instance. errors may be given\n\ +to set a different error handling scheme. Default is 'strict' meaning\n\ +that encoding errors raise a UnicodeDecodeError. Other possible values\n\ +are 'ignore' and 'replace' as well as any other name registerd with\n\ +codecs.register_error that is able to handle UnicodeDecodeErrors."); + +static char *codeckwarglist[] = {"input", "errors", NULL}; static PyObject *multibytecodec_encode(PyMultibyteCodec *, PyMultibyteCodec_State *, const Py_UNICODE *, int, PyObject *); @@ -113,15 +128,39 @@ goto errorexit; \ } +static int +expand_decodebuffer(MultibyteDecodeBuffer *buf, int esize) +{ + int orgpos, orgsize; + + orgpos = (int)(buf->outbuf - PyUnicode_AS_UNICODE(buf->outobj)); + orgsize = PyUnicode_GET_SIZE(buf->outobj); + if (PyUnicode_Resize(&buf->outobj, orgsize + ( + esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize)) == -1) + return -1; + + buf->outbuf = PyUnicode_AS_UNICODE(buf->outobj) + orgpos; + buf->outbuf_end = PyUnicode_AS_UNICODE(buf->outobj) + + PyUnicode_GET_SIZE(buf->outobj); + + return 0; +} +#define RESERVE_DECODEBUFFER(buf, s) { \ + if ((s) < 1 || (buf)->outbuf + (s) > (buf)->outbuf_end) \ + if (expand_decodebuffer(buf, s) == -1) \ + goto errorexit; \ +} + static int -multibytecodec_error(PyMultibyteCodec *codec, - PyMultibyteCodec_State *state, - MultibyteEncodeBuffer *buf, - PyObject *errors, int e) +multibytecodec_encerror(PyMultibyteCodec *codec, + PyMultibyteCodec_State *state, + MultibyteEncodeBuffer *buf, + PyObject *errors, int e) { PyObject *retobj = NULL, *retstr = NULL, *argsobj, *tobj; const char *reason; - int retstrsize, newpos, start, end, esize; + size_t esize; + int retstrsize, newpos, start, end; if (e == MBERR_TOOSMALL) { RESERVE_ENCODEBUFFER(buf, -1); @@ -133,7 +172,7 @@ switch (e) { case MBERR_TOOFEW: reason = "incomplete multibyte sequence"; - esize = (int)(buf->inbuf_end - buf->inbuf); + esize = (size_t)(buf->inbuf_end - buf->inbuf); break; case MBERR_INTERNAL: PyErr_SetString(PyExc_RuntimeError, "internal codec error"); @@ -180,14 +219,11 @@ start, end, reason); if (buf->excobj == NULL) goto errorexit; - } else { - if (PyUnicodeEncodeError_SetStart(buf->excobj, start) != 0) - goto errorexit; - if (PyUnicodeEncodeError_SetEnd(buf->excobj, end) != 0) - goto errorexit; - if (PyUnicodeEncodeError_SetReason(buf->excobj, reason) != 0) + } else + if (PyUnicodeEncodeError_SetStart(buf->excobj, start) != 0 || + PyUnicodeEncodeError_SetEnd(buf->excobj, end) != 0 || + PyUnicodeEncodeError_SetReason(buf->excobj, reason) != 0) goto errorexit; - } if (errors == ERROR_STRICT) { PyCodec_StrictErrors(buf->excobj); @@ -245,6 +281,112 @@ } static int +multibytecodec_decerror(PyMultibyteCodec *codec, + PyMultibyteCodec_State *state, + MultibyteDecodeBuffer *buf, + PyObject *errors, int e) +{ + PyObject *argsobj, *retobj = NULL, *retuni = NULL; + const char *reason; + size_t esize; + int start, end, retunisize, newpos; + + if (e == MBERR_TOOSMALL) { + RESERVE_DECODEBUFFER(buf, -1); + return 0; /* retry it */ + } else if (e > 0) { + reason = "illegal multibyte sequence"; + esize = e; + } else { + switch (e) { + case MBERR_TOOFEW: + reason = "incomplete multibyte sequence"; + esize = (size_t)(buf->inbuf_end - buf->inbuf); + break; + case MBERR_INTERNAL: + PyErr_SetString(PyExc_RuntimeError, "internal codec error"); + return -1; + default: + PyErr_SetString(PyExc_RuntimeError, "unknown runtime error"); + return -1; + } + } + + if (errors == ERROR_REPLACE) { + RESERVE_DECODEBUFFER(buf, 1); + *buf->outbuf++ = Py_UNICODE_REPLACEMENT_CHARACTER; + } + if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) { + buf->inbuf += esize; + return 0; + } + + start = (int)(buf->inbuf - buf->inbuf_top); + end = start + esize; + + /* use cached exception object if available */ + if (buf->excobj == NULL) { + buf->excobj = PyUnicodeDecodeError_Create(codec->encoding, + buf->inbuf_top, (int)(buf->inbuf_end - buf->inbuf_top), + start, end, reason); + if (buf->excobj == NULL) + goto errorexit; + } else + if (PyUnicodeDecodeError_SetStart(buf->excobj, start) || + PyUnicodeDecodeError_SetEnd(buf->excobj, end) || + PyUnicodeDecodeError_SetReason(buf->excobj, reason)) + goto errorexit; + + if (errors == ERROR_STRICT) { + PyCodec_StrictErrors(buf->excobj); + goto errorexit; + } + + argsobj = PyTuple_New(1); + if (argsobj == NULL) + goto errorexit; + + PyTuple_SET_ITEM(argsobj, 0, buf->excobj); + Py_INCREF(buf->excobj); + retobj = PyObject_CallObject(errors, argsobj); + Py_DECREF(argsobj); + if (retobj == NULL) + goto errorexit; + + if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 || + !PyUnicode_Check((retuni = PyTuple_GET_ITEM(retobj, 0))) || + !PyInt_Check(PyTuple_GET_ITEM(retobj, 1))) { + PyErr_SetString(PyExc_ValueError, + "decoding error handler must return (unicode, int) tuple"); + goto errorexit; + } + + retunisize = PyUnicode_GET_SIZE(retuni); + if (retunisize > 0) { + RESERVE_DECODEBUFFER(buf, retunisize); + memcpy((char *)buf->outbuf, PyUnicode_AS_DATA(retuni), + retunisize * Py_UNICODE_SIZE); + buf->outbuf += retunisize; + } + + newpos = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(retobj, 1)); + if (newpos < 0) + newpos += (int)(buf->inbuf_end - buf->inbuf_top); + if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) { + PyErr_Format(PyExc_IndexError, + "position %d from error handler out of bounds", newpos); + goto errorexit; + } + buf->inbuf = buf->inbuf_top + newpos; + Py_DECREF(retobj); + return 0; + +errorexit: + Py_XDECREF(retobj); + return -1; +} + +static int multibytecodec_iencode(PyMultibyteCodec *codec, PyMultibyteCodec_State *state, MultibyteEncodeBuffer *buf, @@ -257,13 +399,12 @@ /* we don't reuse inleft and outleft here. * error callbacks can relocate the cursor anywhere on buffer */ inleft = (size_t)(buf->inbuf_end - buf->inbuf); + if (inleft == 0) return 0; outleft = (size_t)(buf->outbuf_end - buf->outbuf); - r = codec->encode(state, &buf->inbuf, inleft, - &buf->outbuf, outleft); - + r = codec->encode(state, &buf->inbuf, inleft, &buf->outbuf, outleft); if (r == 0) return 0; - else if (multibytecodec_error(codec, state, buf, errors, r)) + else if (multibytecodec_encerror(codec, state, buf, errors, r)) return -1; else if (buf->inbuf >= buf->inbuf_end) return 0; @@ -303,6 +444,7 @@ if (multibytecodec_iencode(codec, state, &buf, errors) == -1) goto errorexit; + /* XXX: FLUSH IT! */ finalsize = (int)((char*)buf.outbuf - PyString_AS_STRING(buf.outobj)); @@ -330,7 +472,7 @@ int datalen; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "u#|z:encode", - kwarglist, &data, &datalen, &errors)) + codeckwarglist, &data, &datalen, &errors)) return NULL; errorcb = get_errorcallback(errors); @@ -354,10 +496,98 @@ return NULL; } +static int +multibytecodec_idecode(PyMultibyteCodec *codec, + PyMultibyteCodec_State *state, + MultibyteDecodeBuffer *buf, + PyObject *errors) +{ + for (;;) { + size_t inleft, outleft; + int r; + + inleft = (size_t)(buf->inbuf_end - buf->inbuf); + if (inleft == 0) return 0; + outleft = (size_t)(buf->outbuf_end - buf->outbuf); + + r = codec->decode(state, &buf->inbuf, inleft, &buf->outbuf, outleft); + if (r == 0) + return 0; + else if (multibytecodec_decerror(codec, state, buf, errors, r)) + return -1; + else if (buf->inbuf >= buf->inbuf_end) + return 0; + } + + return 0; +} + +static PyObject * +MultibyteCodec_Decode(PyMultibyteCodecObject *self, + PyObject *args, PyObject *kwargs) +{ + PyMultibyteCodec_State state; + MultibyteDecodeBuffer buf; + PyObject *errorcb; + const char *data, *errors = NULL; + int datalen, finalsize, r; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|z:decode", + codeckwarglist, &data, &datalen, &errors)) + return NULL; + + errorcb = get_errorcallback(errors); + if (errorcb == NULL) + return NULL; + + if (datalen == 0) { + if (errorcb > ERROR_MAX) + {Py_DECREF(errorcb);} + return PyUnicode_FromUnicode(NULL, 0); + } + + buf.outobj = buf.excobj = NULL; + buf.inbuf = buf.inbuf_top = (unsigned char *)data; + buf.inbuf_end = buf.inbuf_top + datalen; + buf.outobj = PyUnicode_FromUnicode(NULL, datalen); + if (buf.outobj == NULL) + goto errorexit; + buf.outbuf = PyUnicode_AS_UNICODE(buf.outobj); + buf.outbuf_end = buf.outbuf + PyUnicode_GET_SIZE(buf.outobj); + + state.p = NULL; + r = multibytecodec_idecode(self->codec, &state, &buf, errorcb); + /* XXX: FLUSH IT! */ + if (r != 0) + goto errorexit; + + finalsize = (int)(buf.outbuf - PyUnicode_AS_UNICODE(buf.outobj)); + + if (finalsize != PyUnicode_GET_SIZE(buf.outobj)) + if (PyUnicode_Resize(&buf.outobj, finalsize) == -1) + goto errorexit; + + Py_XDECREF(buf.excobj); + if (errorcb > ERROR_MAX) + {Py_DECREF(errorcb);} + return make_tuple(buf.outobj, datalen); + +errorexit: + if (errorcb > ERROR_MAX) + {Py_DECREF(errorcb);} + Py_XDECREF(buf.excobj); + Py_XDECREF(buf.outobj); + + return NULL; +} + static struct PyMethodDef multibytecodec_methods[] = { {"encode", (PyCFunction)MultibyteCodec_Encode, METH_VARARGS | METH_KEYWORDS, MultibyteCodec_Encode__doc__}, + {"decode", (PyCFunction)MultibyteCodec_Decode, + METH_VARARGS | METH_KEYWORDS, + MultibyteCodec_Decode__doc__}, {NULL, NULL}, }; 1.1 cjkcodecs/src/_cp949.c Index: _cp949.c =================================================================== /* * _cp949.c: the CP949 codec * * Copyright (C) 2003 Hye-Shik Chang <pe...@Fr...>. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * $Id: _cp949.c,v 1.1 2003/05/19 10:38:08 perky Exp $ */ #include "codeccommon.h" ENCMAP(cp949) DECMAP(ksx1001) DECMAP(cp949ext) ENCODER(cp949) { while (inleft > 0) { const struct unim_index *map; Py_UNICODE c = **inbuf, clow; DBCHAR code; if (c < 0x80) { if (outleft < 1) return MBERR_TOOSMALL; **outbuf = c; (*inbuf)++; inleft--; (*outbuf)++; outleft--; continue; } if (outleft < 2) return MBERR_TOOSMALL; map = &cp949encmap[c >> 8]; clow = c & 0xff; if (map->map == NULL || clow < map->bottom || clow > map->top || (code = map->map[clow - map->bottom]) == UNIINV) return 1; (*outbuf)[0] = (code >> 8) | 0x80; if (code & 0x8000) (*outbuf)[1] = (code & 0xFF); /* MSB set: CP949 */ else (*outbuf)[1] = (code & 0xFF) | 0x80; /* MSB unset: ks x 1001 */ (*outbuf) += 2; outleft -= 2; (*inbuf)++; inleft--; } return 0; } DECODER(cp949) { while (inleft > 0) { const struct dbcs_index *map; unsigned char c = **inbuf, c2; Py_UNICODE code; if (outleft < 1) return MBERR_TOOSMALL; if (c < 0x80) { **outbuf = c; (*inbuf)++; inleft--; (*outbuf)++; outleft--; continue; } if (inleft < 2) return MBERR_TOOFEW; if ((*inbuf)[1] < 0x80) goto cp949dec; c2 = (*inbuf)[1] & 0x7f; map = &ksx1001decmap[c & 0x7f]; if (map->map == NULL || c2 < map->bottom || c2 > map->top || (code = map->map[c2 - map->bottom]) == UNIINV) { cp949dec: c2 = (*inbuf)[1]; map = &cp949extdecmap[c]; if (map->map == NULL || c2 < map->bottom || c2 > map->top || (code = map->map[c2 - map->bottom]) == UNIINV) return 2; } **outbuf = code; (*outbuf)++; outleft--; (*inbuf) += 2; inleft -= 2; } return 0; } CODECDEF(cp949) NOMETHODS(__methods) void init_cp949(void) { PyObject *codec; PyObject *m = NULL, *mod = NULL, *o = NULL; m = Py_InitModule("_cp949", __methods); /* Import mapdata */ MAPOPEN(mod, "ko_KR") if (IMPORTMAP(mod, ksx1001, NULL, &ksx1001decmap) || IMPORTMAP(mod, cp949ext, NULL, &cp949extdecmap) || IMPORTMAP(mod, cp949, &cp949encmap, NULL)) goto errorexit; MAPCLOSE(mod) /* Create Codec Instances */ MULTIBYTECODEC_OPEN(mod, o) REGISTERCODEC(m, o, codec) MULTIBYTECODEC_CLOSE(mod, o) if (PyErr_Occurred()) Py_FatalError("can't initialize the _cp949 module"); return; errorexit: Py_XDECREF(m); Py_XDECREF(mod); Py_XDECREF(o); } /* * ex: ts=8 sts=4 et */ |
From: Hye-Shik C. <pe...@us...> - 2003-05-19 10:38:08
|
perky 03/05/19 03:38:07 Modified: . setup.py Log: Add decoder implementation and cp949 codec. Revision Changes Path 1.6 +2 -1 cjkcodecs/setup.py Index: setup.py =================================================================== RCS file: /cvsroot/koco/cjkcodecs/setup.py,v retrieving revision 1.5 retrieving revision 1.6 diff -u -r1.5 -r1.6 --- setup.py 19 May 2003 06:14:19 -0000 1.5 +++ setup.py 19 May 2003 10:38:07 -0000 1.6 @@ -27,7 +27,7 @@ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # -# $Id: setup.py,v 1.5 2003/05/19 06:14:19 perky Exp $ +# $Id: setup.py,v 1.6 2003/05/19 10:38:07 perky Exp $ # import sys @@ -44,6 +44,7 @@ Extension("cjkcodecs.mapdata_zh_TW", ["src/maps/mapdata_zh_TW.c"]), Extension("cjkcodecs.multibytecodec", ["src/multibytecodec.c"]), Extension("cjkcodecs._euc_kr", ["src/_euc_kr.c"]), + Extension("cjkcodecs._cp949", ["src/_cp949.c"]), ], ) |
From: Hye-Shik C. <pe...@us...> - 2003-05-19 08:12:25
|
perky 03/05/19 01:12:24 Modified: src _euc_kr.c codeccommon.h Log: Make more concise! Revision Changes Path 1.3 +13 -31 cjkcodecs/src/_euc_kr.c Index: _euc_kr.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/_euc_kr.c,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- _euc_kr.c 19 May 2003 06:32:17 -0000 1.2 +++ _euc_kr.c 19 May 2003 08:12:23 -0000 1.3 @@ -26,21 +26,16 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: _euc_kr.c,v 1.2 2003/05/19 06:32:17 perky Exp $ + * $Id: _euc_kr.c,v 1.3 2003/05/19 08:12:23 perky Exp $ */ -#include "Python.h" -#include "multibytecodec.h" -#include "cjkcommon.h" #include "codeccommon.h" -const static struct unim_index *cp949encmap; -const static struct dbcs_index *ksx1001decmap, *cp949decmap; +ENCMAP(cp949) +DECMAP(ksx1001) +DECMAP(cp949ext) -static int -euc_kr_encode(PyMultibyteCodec_State *state, - const Py_UNICODE **inbuf, size_t inleft, - unsigned char **outbuf, size_t outleft) +ENCODER(euc_kr) { while (inleft > 0) { const struct unim_index *map; @@ -75,23 +70,13 @@ return 0; } -static int -euc_kr_decode(PyMultibyteCodec_State *state, - const unsigned char **inbuf, size_t inleft, - Py_UNICODE **outbuf, size_t outleft) +DECODER(euc_kr) { return 0; } -static PyMultibyteCodec __codec = { - "euc_kr", - euc_kr_encode, - euc_kr_decode, -}; - -static struct PyMethodDef __methods[] = { - {NULL, NULL}, -}; +CODECDEF(euc_kr) +NOMETHODS(__methods) void init_euc_kr(void) @@ -103,18 +88,15 @@ /* Import mapdata */ MAPOPEN(mod, "ko_KR") - if (importmap(mod, "__map_ksx1001", NULL, &ksx1001decmap) || - importmap(mod, "__map_cp949ext", NULL, &cp949decmap) || - importmap(mod, "__map_cp949", &cp949encmap, NULL)) - goto errorexit; + if (IMPORTMAP(mod, ksx1001, NULL, &ksx1001decmap) || + IMPORTMAP(mod, cp949ext, NULL, &cp949extdecmap) || + IMPORTMAP(mod, cp949, &cp949encmap, NULL)) + goto errorexit; MAPCLOSE(mod) /* Create Codec Instances */ MULTIBYTECODEC_OPEN(mod, o) - codec = createcodec(o, &__codec); - if (codec == NULL) - goto errorexit; - PyModule_AddObject(m, "codec", codec); + REGISTERCODEC(m, o, codec) MULTIBYTECODEC_CLOSE(mod, o) if (PyErr_Occurred()) 1.2 +37 -1 cjkcodecs/src/codeccommon.h Index: codeccommon.h =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/codeccommon.h,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- codeccommon.h 19 May 2003 06:06:38 -0000 1.1 +++ codeccommon.h 19 May 2003 08:12:24 -0000 1.2 @@ -26,9 +26,37 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: codeccommon.h,v 1.1 2003/05/19 06:06:38 perky Exp $ + * $Id: codeccommon.h,v 1.2 2003/05/19 08:12:24 perky Exp $ */ +#include "Python.h" +#include "multibytecodec.h" +#include "cjkcommon.h" + +#define ENCMAP(encoding) \ + const static struct unim_index *encoding##encmap; +#define DECMAP(encoding) \ + const static struct dbcs_index *encoding##decmap; + +#define ENCODER(encoding) \ + static int encoding##_encode( \ + PyMultibyteCodec_State *state, \ + const Py_UNICODE **inbuf, size_t inleft, \ + unsigned char **outbuf, size_t outleft) +#define DECODER(encoding) \ + static int encoding##_decode( \ + PyMultibyteCodec_State *state, \ + const unsigned char **inbuf, size_t inleft, \ + Py_UNICODE **outbuf, size_t outleft) +#define CODECDEF(encoding) \ + static PyMultibyteCodec __codec = { \ + #encoding, encoding##_encode, encoding##_decode \ + }; +#define NOMETHODS(name) \ + static struct PyMethodDef name[] = { \ + {NULL, NULL}, \ + }; + #define MAPOPEN(mod, locale) \ mod = PyImport_ImportModule("mapdata_" locale); \ if (mod == NULL) goto errorexit; @@ -43,6 +71,14 @@ goto errorexit; #define MULTIBYTECODEC_CLOSE(mod, o) \ Py_DECREF(o); Py_DECREF(mod); + +#define IMPORTMAP(mod, encoding, em, dm) \ + importmap(mod, "__map_" #encoding, em, dm) +#define REGISTERCODEC(m, o, codec) \ + codec = createcodec(o, &__codec); \ + if (codec == NULL) \ + goto errorexit; \ + PyModule_AddObject(m, "codec", codec); static int importmap(PyObject *mod, const char *symbol, |
From: Hye-Shik C. <pe...@us...> - 2003-05-19 06:32:18
|
perky 03/05/18 23:32:17 Modified: src _euc_kr.c multibytecodec.c Log: Enable error callbacks and fix codec typos. Revision Changes Path 1.2 +3 -3 cjkcodecs/src/_euc_kr.c Index: _euc_kr.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/_euc_kr.c,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- _euc_kr.c 19 May 2003 06:06:38 -0000 1.1 +++ _euc_kr.c 19 May 2003 06:32:17 -0000 1.2 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: _euc_kr.c,v 1.1 2003/05/19 06:06:38 perky Exp $ + * $Id: _euc_kr.c,v 1.2 2003/05/19 06:32:17 perky Exp $ */ #include "Python.h" @@ -62,9 +62,9 @@ clow = c & 0xff; if (map->map == NULL || clow < map->bottom || clow > map->top || (code = map->map[clow - map->bottom]) == UNIINV) - return 2; + return 1; if (code & 0x8000) /* MSB set: CP949 */ - return 2; + return 1; (*outbuf)[0] = (code >> 8) | 0x80; (*outbuf)[1] = (code & 0xFF) | 0x80; 1.4 +7 -19 cjkcodecs/src/multibytecodec.c Index: multibytecodec.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/multibytecodec.c,v retrieving revision 1.3 retrieving revision 1.4 diff -u -r1.3 -r1.4 --- multibytecodec.c 19 May 2003 06:06:38 -0000 1.3 +++ multibytecodec.c 19 May 2003 06:32:17 -0000 1.4 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: multibytecodec.c,v 1.3 2003/05/19 06:06:38 perky Exp $ + * $Id: multibytecodec.c,v 1.4 2003/05/19 06:32:17 perky Exp $ */ #include "Python.h" @@ -49,6 +49,9 @@ static char *kwarglist[] = {"input", "errors", NULL}; +static PyObject *multibytecodec_encode(PyMultibyteCodec *, + PyMultibyteCodec_State *, const Py_UNICODE *, int, PyObject *); + static PyObject * make_tuple(PyObject *unicode, int len) { @@ -162,7 +165,7 @@ *buf->outbuf++ = '?'; } } - if (errors == ERROR_IGNORE) { + if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) { buf->inbuf += esize; return 0; } @@ -191,7 +194,6 @@ goto errorexit; } -#if 0 argsobj = PyTuple_New(1); if (argsobj == NULL) goto errorexit; @@ -211,8 +213,8 @@ goto errorexit; } - retstr = multibytecodec_encode(self, ic, PyUnicode_AS_UNICODE(tobj), - PyUnicode_GET_SIZE(tobj), ERROR_STRICT, 0); + retstr = multibytecodec_encode(codec, state, PyUnicode_AS_UNICODE(tobj), + PyUnicode_GET_SIZE(tobj), ERROR_STRICT); if (retstr == NULL) goto errorexit; @@ -230,25 +232,11 @@ "position %d from error handler out of bounds", newpos); goto errorexit; } - switch (self->unitype) { - case UNIINTERNAL_UTF_8: - if (newpos >= start) /* buf->rinbuf is at 'start' position now */ - buf->rinbuf = skipchars_utf8(buf->rinbuf, newpos - start); - else - buf->rinbuf = skipchars_utf8(buf->rinbuf_top, newpos); - break; - case UNIINTERNAL_UCS_SWAPPED: - buf->rinbuf = buf->rinbuf_top + newpos * Py_UNICODE_SIZE; - break; - default: - break; - } buf->inbuf = buf->inbuf_top + newpos; Py_DECREF(retobj); Py_DECREF(retstr); return 0; -#endif errorexit: Py_XDECREF(retobj); |