Thread: [KoCo-CVS] [Commit] cjkcodecs/src _big5.c _cp949.c _cp950.c _euc_kr.c _gb18030.c _gb2312.c _gbk.c co
Brought to you by:
perky
From: Hye-Shik C. <pe...@us...> - 2003-05-22 00:07:11
|
perky 03/05/21 16:52:12 Modified: src _big5.c _cp949.c _cp950.c _euc_kr.c _gb18030.c _gb2312.c _gbk.c codeccommon.h Log: Look! Codec codes are more puzzled now! ;) Revision Changes Path 1.2 +21 -63 cjkcodecs/src/_big5.c Index: _big5.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/_big5.c,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- _big5.c 20 May 2003 11:20:56 -0000 1.1 +++ _big5.c 21 May 2003 23:52:11 -0000 1.2 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: _big5.c,v 1.1 2003/05/20 11:20:56 perky Exp $ + * $Id: _big5.c,v 1.2 2003/05/21 23:52:11 perky Exp $ */ #include "codeccommon.h" @@ -37,32 +37,25 @@ ENCODER(big5) { while (inleft > 0) { - const encode_map *map; - Py_UNICODE c = **inbuf, clow; + Py_UNICODE c = **inbuf; DBCHAR code; if (c < 0x80) { - if (outleft < 1) - return MBERR_TOOSMALL; + RESERVE_OUTBUF(1) **outbuf = c; - (*inbuf)++; inleft--; - (*outbuf)++; outleft--; + NEXT(1, 1) continue; } UCS4INVALID(c) - if (outleft < 2) - return MBERR_TOOSMALL; - map = &big5encmap[c >> 8]; - clow = c & 0xff; - if (map->map == NULL || clow < map->bottom || clow > map->top || - (code = map->map[clow - map->bottom]) == NOCHAR) - return 1; + RESERVE_OUTBUF(2) + + TRYMAP_ENC(&big5encmap[c >> 8], code, c & 0xff) + else return 1; (*outbuf)[0] = (code >> 8) | 0x80; (*outbuf)[1] = (code & 0xFF) | 0x80; - (*outbuf) += 2; outleft -= 2; - (*inbuf)++; inleft--; + NEXT(1, 2) } return 0; @@ -71,68 +64,33 @@ DECODER(big5) { while (inleft > 0) { - const decode_map *map; - unsigned char c = **inbuf, c2; + unsigned char c = **inbuf; Py_UNICODE code; - if (outleft < 1) - return MBERR_TOOSMALL; + RESERVE_OUTBUF(1) if (c < 0x80) { **outbuf = c; - (*inbuf)++; inleft--; - (*outbuf)++; outleft--; + NEXT(1, 1) continue; } - if (inleft < 2) - return MBERR_TOOFEW; - c2 = (*inbuf)[1]; - map = &big5decmap[c]; - if (map->map == NULL || c2 < map->bottom || c2 > map->top || - (code = map->map[c2 - map->bottom]) == UNIINV) - return 2; + RESERVE_INBUF(2) + TRYMAP_DEC(&big5decmap[c], code, (*inbuf)[1]) + else return 2; **outbuf = code; - (*outbuf)++; outleft--; - (*inbuf) += 2; inleft -= 2; + NEXT(2, 1) } return 0; } -CODECDEF(big5) -NOMETHODS(__methods) - -void -init_big5(void) -{ - PyObject *codec; - PyObject *m = NULL, *mod = NULL, *o = NULL; - - m = Py_InitModule("_big5", __methods); - - /* Import mapdata */ - MAPOPEN(mod, "zh_TW") - if (IMPORTMAP(mod, big5, &big5encmap, &big5decmap)) - goto errorexit; - MAPCLOSE(mod) - - /* Create Codec Instances */ - MULTIBYTECODEC_OPEN(mod, o) - REGISTERCODEC(m, o, codec) - MULTIBYTECODEC_CLOSE(mod, o) - - if (PyErr_Occurred()) - Py_FatalError("can't initialize the _big5 module"); - - return; - -errorexit: - Py_XDECREF(m); - Py_XDECREF(mod); - Py_XDECREF(o); -} +BEGIN_CODEC_REGISTRY(big5) + MAPOPEN(zh_TW) + IMPORTMAP_ENCDEC(big5) + MAPCLOSE() +END_CODEC_REGISTRY(big5) /* * ex: ts=8 sts=4 et 1.6 +23 -71 cjkcodecs/src/_cp949.c Index: _cp949.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/_cp949.c,v retrieving revision 1.5 retrieving revision 1.6 diff -u -r1.5 -r1.6 --- _cp949.c 20 May 2003 11:20:56 -0000 1.5 +++ _cp949.c 21 May 2003 23:52:12 -0000 1.6 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: _cp949.c,v 1.5 2003/05/20 11:20:56 perky Exp $ + * $Id: _cp949.c,v 1.6 2003/05/21 23:52:12 perky Exp $ */ #include "codeccommon.h" @@ -38,35 +38,27 @@ ENCODER(cp949) { while (inleft > 0) { - const encode_map *map; - Py_UNICODE c = **inbuf, clow; + Py_UNICODE c = **inbuf; DBCHAR code; if (c < 0x80) { - if (outleft < 1) - return MBERR_TOOSMALL; + RESERVE_OUTBUF(1) **outbuf = c; - (*inbuf)++; inleft--; - (*outbuf)++; outleft--; + NEXT(1, 1) continue; } UCS4INVALID(c) - if (outleft < 2) - return MBERR_TOOSMALL; - map = &cp949encmap[c >> 8]; - clow = c & 0xff; - if (map->map == NULL || clow < map->bottom || clow > map->top || - (code = map->map[clow - map->bottom]) == NOCHAR) - return 1; + RESERVE_OUTBUF(2) + TRYMAP_ENC(&cp949encmap[c >> 8], code, c & 0xff) + else return 1; (*outbuf)[0] = (code >> 8) | 0x80; if (code & 0x8000) (*outbuf)[1] = (code & 0xFF); /* MSB set: CP949 */ else (*outbuf)[1] = (code & 0xFF) | 0x80; /* MSB unset: ks x 1001 */ - (*outbuf) += 2; outleft -= 2; - (*inbuf)++; inleft--; + NEXT(1, 2) } return 0; @@ -75,76 +67,36 @@ DECODER(cp949) { while (inleft > 0) { - const decode_map *map; - unsigned char c = **inbuf, c2; + unsigned char c = **inbuf; Py_UNICODE code; - if (outleft < 1) - return MBERR_TOOSMALL; + RESERVE_OUTBUF(1) if (c < 0x80) { **outbuf = c; - (*inbuf)++; inleft--; - (*outbuf)++; outleft--; + NEXT(1, 1) continue; } - if (inleft < 2) - return MBERR_TOOFEW; - - c2 = (*inbuf)[1] ^ 0x80; - map = &ksx1001decmap[c & 0x7f]; - if (map->map == NULL || c2 < map->bottom || c2 > map->top || - (code = map->map[c2 - map->bottom]) == UNIINV) { - c2 ^= 0x80; - map = &cp949extdecmap[c]; - if (map->map == NULL || c2 < map->bottom || c2 > map->top || - (code = map->map[c2 - map->bottom]) == UNIINV) - return 2; - } + RESERVE_INBUF(2) + TRYMAP_DEC(&ksx1001decmap[c & 0x7f], code, (*inbuf)[1] ^ 0x80) + else TRYMAP_DEC(&cp949extdecmap[c], code, (*inbuf)[1]) + else return 2; **outbuf = code; - (*outbuf)++; outleft--; - (*inbuf) += 2; inleft -= 2; + NEXT(2, 1) } return 0; } -CODECDEF(cp949) -NOMETHODS(__methods) - -void -init_cp949(void) -{ - PyObject *codec; - PyObject *m = NULL, *mod = NULL, *o = NULL; - - m = Py_InitModule("_cp949", __methods); - - /* Import mapdata */ - MAPOPEN(mod, "ko_KR") - if (IMPORTMAP(mod, ksx1001, NULL, &ksx1001decmap) || - IMPORTMAP(mod, cp949ext, NULL, &cp949extdecmap) || - IMPORTMAP(mod, cp949, &cp949encmap, NULL)) - goto errorexit; - MAPCLOSE(mod) - - /* Create Codec Instances */ - MULTIBYTECODEC_OPEN(mod, o) - REGISTERCODEC(m, o, codec) - MULTIBYTECODEC_CLOSE(mod, o) - - if (PyErr_Occurred()) - Py_FatalError("can't initialize the _cp949 module"); - - return; - -errorexit: - Py_XDECREF(m); - Py_XDECREF(mod); - Py_XDECREF(o); -} +BEGIN_CODEC_REGISTRY(cp949) + MAPOPEN(ko_KR) + IMPORTMAP_DEC(ksx1001) + IMPORTMAP_DEC(cp949ext) + IMPORTMAP_ENC(cp949) + MAPCLOSE() +END_CODEC_REGISTRY(cp949) /* * ex: ts=8 sts=4 et 1.2 +23 -72 cjkcodecs/src/_cp950.c Index: _cp950.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/_cp950.c,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- _cp950.c 20 May 2003 11:20:56 -0000 1.1 +++ _cp950.c 21 May 2003 23:52:12 -0000 1.2 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: _cp950.c,v 1.1 2003/05/20 11:20:56 perky Exp $ + * $Id: _cp950.c,v 1.2 2003/05/21 23:52:12 perky Exp $ */ #include "codeccommon.h" @@ -39,36 +39,25 @@ ENCODER(cp950) { while (inleft > 0) { - const encode_map *map; - Py_UNICODE c = **inbuf, clow; + Py_UNICODE c = **inbuf; DBCHAR code; if (c < 0x80) { - if (outleft < 1) - return MBERR_TOOSMALL; + RESERVE_OUTBUF(1) **outbuf = c; - (*inbuf)++; inleft--; - (*outbuf)++; outleft--; + NEXT(1, 1) continue; } UCS4INVALID(c) - if (outleft < 2) - return MBERR_TOOSMALL; - map = &cp950extencmap[c >> 8]; - clow = c & 0xff; - if (map->map == NULL || clow < map->bottom || clow > map->top || - (code = map->map[clow - map->bottom]) == NOCHAR) { - map = &big5encmap[c >> 8]; - if (map->map == NULL || clow < map->bottom || clow > map->top || - (code = map->map[clow - map->bottom]) == NOCHAR) - return 1; - } + RESERVE_OUTBUF(2) + TRYMAP_ENC(&cp950extencmap[c >> 8], code, c & 0xff) + else TRYMAP_ENC(&big5encmap[c >> 8], code, c & 0xff) + else return 1; (*outbuf)[0] = (code >> 8) | 0x80; (*outbuf)[1] = (code & 0xFF); - (*outbuf) += 2; outleft -= 2; - (*inbuf)++; inleft--; + NEXT(1, 2) } return 0; @@ -77,74 +66,36 @@ DECODER(cp950) { while (inleft > 0) { - const decode_map *map; - unsigned char c = **inbuf, c2; + unsigned char c = **inbuf; Py_UNICODE code; - if (outleft < 1) - return MBERR_TOOSMALL; + RESERVE_OUTBUF(1) if (c < 0x80) { **outbuf = c; - (*inbuf)++; inleft--; - (*outbuf)++; outleft--; + NEXT(1, 1) continue; } - if (inleft < 2) - return MBERR_TOOFEW; + RESERVE_INBUF(2) - c2 = (*inbuf)[1]; - map = &cp950extdecmap[c & 0x7f]; - if (map->map == NULL || c2 < map->bottom || c2 > map->top || - (code = map->map[c2 - map->bottom]) == UNIINV) { - map = &big5decmap[c]; - if (map->map == NULL || c2 < map->bottom || c2 > map->top || - (code = map->map[c2 - map->bottom]) == UNIINV) - return 2; - } + TRYMAP_DEC(&cp950extdecmap[c & 0x7f], code, (*inbuf)[1]) + else TRYMAP_DEC(&cp950extdecmap[c & 0x7f], code, (*inbuf)[1]) + else return 2; **outbuf = code; - (*outbuf)++; outleft--; - (*inbuf) += 2; inleft -= 2; + NEXT(2, 1) } return 0; } -CODECDEF(cp950) -NOMETHODS(__methods) - -void -init_cp950(void) -{ - PyObject *codec; - PyObject *m = NULL, *mod = NULL, *o = NULL; - - m = Py_InitModule("_cp950", __methods); - - /* Import mapdata */ - MAPOPEN(mod, "zh_TW") - if (IMPORTMAP(mod, big5, &big5encmap, &big5decmap) || - IMPORTMAP(mod, cp950ext, &cp950extencmap, &cp950extdecmap)) - goto errorexit; - MAPCLOSE(mod) - - /* Create Codec Instances */ - MULTIBYTECODEC_OPEN(mod, o) - REGISTERCODEC(m, o, codec) - MULTIBYTECODEC_CLOSE(mod, o) - - if (PyErr_Occurred()) - Py_FatalError("can't initialize the _cp950 module"); - - return; - -errorexit: - Py_XDECREF(m); - Py_XDECREF(mod); - Py_XDECREF(o); -} +BEGIN_CODEC_REGISTRY(cp950) + MAPOPEN(zh_TW) + IMPORTMAP_ENCDEC(big5) + IMPORTMAP_ENCDEC(cp950ext) + MAPCLOSE() +END_CODEC_REGISTRY(cp950) /* * ex: ts=8 sts=4 et 1.9 +23 -64 cjkcodecs/src/_euc_kr.c Index: _euc_kr.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/_euc_kr.c,v retrieving revision 1.8 retrieving revision 1.9 diff -u -r1.8 -r1.9 --- _euc_kr.c 20 May 2003 11:20:56 -0000 1.8 +++ _euc_kr.c 21 May 2003 23:52:12 -0000 1.9 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: _euc_kr.c,v 1.8 2003/05/20 11:20:56 perky Exp $ + * $Id: _euc_kr.c,v 1.9 2003/05/21 23:52:12 perky Exp $ */ #include "codeccommon.h" @@ -37,34 +37,27 @@ ENCODER(euc_kr) { while (inleft > 0) { - const encode_map *map; - Py_UNICODE c = **inbuf, clow; + Py_UNICODE c = **inbuf; DBCHAR code; if (c < 0x80) { - if (outleft < 1) - return MBERR_TOOSMALL; + RESERVE_OUTBUF(1) **outbuf = c; - (*inbuf)++; inleft--; - (*outbuf)++; outleft--; + NEXT(1, 1) continue; } UCS4INVALID(c) - if (outleft < 2) - return MBERR_TOOSMALL; - map = &cp949encmap[c >> 8]; - clow = c & 0xff; - if (map->map == NULL || clow < map->bottom || clow > map->top || - (code = map->map[clow - map->bottom]) == NOCHAR) - return 1; + RESERVE_OUTBUF(2) + TRYMAP_ENC(&cp949encmap[c >> 8], code, c & 0xff) + else return 1; + if (code & 0x8000) /* MSB set: CP949 */ return 1; (*outbuf)[0] = (code >> 8) | 0x80; (*outbuf)[1] = (code & 0xFF) | 0x80; - (*outbuf) += 2; outleft -= 2; - (*inbuf)++; inleft--; + NEXT(1, 2) } return 0; @@ -73,69 +66,35 @@ DECODER(euc_kr) { while (inleft > 0) { - const decode_map *map; - unsigned char c = **inbuf, c2; + unsigned char c = **inbuf; Py_UNICODE code; - if (outleft < 1) - return MBERR_TOOSMALL; + RESERVE_OUTBUF(1) if (c < 0x80) { **outbuf = c; - (*inbuf)++; inleft--; - (*outbuf)++; outleft--; + NEXT(1, 1) continue; } - if (inleft < 2) - return MBERR_TOOFEW; - c2 = (*inbuf)[1] ^ 0x80; - map = &ksx1001decmap[c & 0x7f]; - if (map->map == NULL || c2 < map->bottom || c2 > map->top || - (code = map->map[c2 - map->bottom]) == UNIINV) - return 2; + RESERVE_INBUF(2) + + TRYMAP_DEC(&ksx1001decmap[c & 0x7f], code, (*inbuf)[1] ^ 0x80) + else return 2; **outbuf = code; - (*outbuf)++; outleft--; - (*inbuf) += 2; inleft -= 2; + NEXT(2, 1) } return 0; } -CODECDEF(euc_kr) -NOMETHODS(__methods) - -void -init_euc_kr(void) -{ - PyObject *codec; - PyObject *m = NULL, *mod = NULL, *o = NULL; - - m = Py_InitModule("_euc_kr", __methods); - - /* Import mapdata */ - MAPOPEN(mod, "ko_KR") - if (IMPORTMAP(mod, ksx1001, NULL, &ksx1001decmap) || - IMPORTMAP(mod, cp949, &cp949encmap, NULL)) - goto errorexit; - MAPCLOSE(mod) - - /* Create Codec Instances */ - MULTIBYTECODEC_OPEN(mod, o) - REGISTERCODEC(m, o, codec) - MULTIBYTECODEC_CLOSE(mod, o) - - if (PyErr_Occurred()) - Py_FatalError("can't initialize the _euc_kr module"); - - return; - -errorexit: - Py_XDECREF(m); - Py_XDECREF(mod); - Py_XDECREF(o); -} +BEGIN_CODEC_REGISTRY(euc_kr) + MAPOPEN(ko_KR) + IMPORTMAP_DEC(ksx1001) + IMPORTMAP_ENC(cp949) + MAPCLOSE() +END_CODEC_REGISTRY(euc_kr) /* * ex: ts=8 sts=4 et 1.3 +58 -122 cjkcodecs/src/_gb18030.c Index: _gb18030.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/_gb18030.c,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- _gb18030.c 20 May 2003 11:20:56 -0000 1.2 +++ _gb18030.c 21 May 2003 23:52:12 -0000 1.3 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: _gb18030.c,v 1.2 2003/05/20 11:20:56 perky Exp $ + * $Id: _gb18030.c,v 1.3 2003/05/21 23:52:12 perky Exp $ */ #include "codeccommon.h" @@ -42,16 +42,13 @@ ENCODER(gb18030) { while (inleft > 0) { - const encode_map *map; - Py_UNICODE c = **inbuf, clow; + Py_UNICODE c = **inbuf; DBCHAR code; if (c < 0x80) { - if (outleft < 1) - return MBERR_TOOSMALL; + RESERVE_OUTBUF(1) **outbuf = c; - (*inbuf)++; inleft--; - (*outbuf)++; outleft--; + NEXT(1, 1) continue; } #if Py_UNICODE_SIZE == 4 @@ -60,8 +57,7 @@ else if (nc >= 0x10000) { Py_UNICODE tc = c; - if (outleft < 4) - return MBERR_TOOSMALL; + RESERVE_OUTBUF(4) (*outbuf)[3] = (unsigned char)(tc % 10) + 0x30; tc /= 10; @@ -71,59 +67,46 @@ tc /= 10; (*outbuf)[0] = (unsigned char)(tc + 0x90); - (*outbuf) += 4; outleft -= 4; - (*inbuf)++; inleft--; + NEXT(1, 4) continue; } #endif - if (outleft < 2) - return MBERR_TOOSMALL; + RESERVE_OUTBUF(2) GBK_PREENCODE(c, code) - else { - map = &gbcommonencmap[c >> 8]; - clow = c & 0xff; - if (map->map == NULL || clow < map->bottom || clow > map->top || - (code = map->map[clow - map->bottom]) == NOCHAR) { - map = &gb18030extencmap[c >> 8]; - clow = c & 0xff; - if (map->map == NULL || clow < map->bottom || clow > map->top || - (code = map->map[clow - map->bottom]) == NOCHAR) { - const struct _gb18030_to_unibmp_ranges *utrrange; - - if (outleft < 4) - return MBERR_TOOSMALL; - - for (utrrange = gb18030_to_unibmp_ranges; - utrrange->first != 0; - utrrange++) - if (utrrange->first <= c && c <= utrrange->last) { - Py_UNICODE tc; - - tc = c - utrrange->first + utrrange->base; - - (*outbuf)[3] = (unsigned char)(tc % 10) + 0x30; - tc /= 10; - (*outbuf)[2] = (unsigned char)(tc % 126) + 0x81; - tc /= 126; - (*outbuf)[1] = (unsigned char)(tc % 10) + 0x30; - tc /= 10; - (*outbuf)[0] = (unsigned char)tc + 0x81; - - (*outbuf) += 4; outleft -= 4; - (*inbuf)++; inleft--; - break; - } - - if (utrrange->first == 0) { - PyErr_SetString(PyExc_RuntimeError, - "unicode mapping invalid"); - return 1; - } - continue; + else TRYMAP_ENC(&gbcommonencmap[c >> 8], code, c && 0xff) + else TRYMAP_ENC(&gb18030extencmap[c >> 8], code, c && 0xff) { + const struct _gb18030_to_unibmp_ranges *utrrange; + + RESERVE_OUTBUF(4) + + for (utrrange = gb18030_to_unibmp_ranges; + utrrange->first != 0; + utrrange++) + if (utrrange->first <= c && c <= utrrange->last) { + Py_UNICODE tc; + + tc = c - utrrange->first + utrrange->base; + + (*outbuf)[3] = (unsigned char)(tc % 10) + 0x30; + tc /= 10; + (*outbuf)[2] = (unsigned char)(tc % 126) + 0x81; + tc /= 126; + (*outbuf)[1] = (unsigned char)(tc % 10) + 0x30; + tc /= 10; + (*outbuf)[0] = (unsigned char)tc + 0x81; + + NEXT(1, 4) + break; } + + if (utrrange->first == 0) { + PyErr_SetString(PyExc_RuntimeError, + "unicode mapping invalid"); + return 1; } + continue; } (*outbuf)[0] = (code >> 8) | 0x80; @@ -131,8 +114,8 @@ (*outbuf)[1] = (code & 0xFF); /* MSB set: GBK or GB18030ext */ else (*outbuf)[1] = (code & 0xFF) | 0x80; /* MSB unset: GB2312 */ - (*outbuf) += 2; outleft -= 2; - (*inbuf)++; inleft--; + + NEXT(1, 2) } return 0; @@ -141,22 +124,18 @@ DECODER(gb18030) { while (inleft > 0) { - const decode_map *map; unsigned char c = **inbuf, c2; Py_UNICODE code; - if (outleft < 1) - return MBERR_TOOSMALL; + RESERVE_OUTBUF(1) if (c < 0x80) { **outbuf = c; - (*inbuf)++; inleft--; - (*outbuf)++; outleft--; + NEXT(1, 1) continue; } - if (inleft < 2) - return MBERR_TOOFEW; + RESERVE_INBUF(2) c2 = (*inbuf)[1]; if (c2 >= 0x30 && c2 <= 0x39) { /* 4 bytes seq */ @@ -164,8 +143,7 @@ unsigned char c3, c4; Py_UNICODE lseq; - if (inleft < 4) - return MBERR_TOOFEW; + RESERVE_INBUF(4) c3 = (*inbuf)[2]; c4 = (*inbuf)[3]; if (c < 0x81 || c3 < 0x81 || c4 < 0x30 || c4 > 0x39) @@ -181,8 +159,7 @@ lseq >= (utr + 1)->base; utr++) ; **outbuf = utr->first - utr->base + lseq; - (*outbuf)++; outleft--; - (*inbuf) += 4; inleft -= 4; + NEXT(4, 1) continue; } } @@ -192,8 +169,7 @@ (Py_UNICODE)c3 * 10 + c4; if (lseq <= 0x10FFFF) { **outbuf = lseq; - (*outbuf)++; outleft--; - (*inbuf) += 4; inleft -= 4; + NEXT(4, 1) continue; } } @@ -202,66 +178,26 @@ } GBK_PREDECODE(c, c2, code) - else { - c2 ^= 0x80; - map = &gb2312decmap[c & 0x7f]; - if (map->map == NULL || c2 < map->bottom || c2 > map->top || - (code = map->map[c2 - map->bottom]) == UNIINV) { - c2 ^= 0x80; - map = &gbkextdecmap[c]; - if (map->map == NULL || c2 < map->bottom || c2 > map->top || - (code = map->map[c2 - map->bottom]) == UNIINV) { - map = &gb18030extdecmap[c]; - if (map->map == NULL || c2 < map->bottom || c2 > map->top || - (code = map->map[c2 - map->bottom]) == UNIINV) - return 2; - } - } - } + else TRYMAP_DEC(&gb2312decmap[c & 0x7f], code, c2 ^ 0x80) + else TRYMAP_DEC(&gbkextdecmap[c], code, c2) + else TRYMAP_DEC(&gb18030extdecmap[c], code, c2) + else return 2; **outbuf = code; - (*outbuf)++; outleft--; - (*inbuf) += 2; inleft -= 2; + NEXT(2, 1) } return 0; } -CODECDEF(gb18030) -NOMETHODS(__methods) - -void -init_gb18030(void) -{ - PyObject *codec; - PyObject *m = NULL, *mod = NULL, *o = NULL; - - m = Py_InitModule("_gb18030", __methods); - - /* Import mapdata */ - MAPOPEN(mod, "zh_CN") - if (IMPORTMAP(mod, gb2312, NULL, &gb2312decmap) || - IMPORTMAP(mod, gbkext, NULL, &gbkextdecmap) || - IMPORTMAP(mod, gb18030ext, &gb18030extencmap, &gb18030extdecmap) || - IMPORTMAP(mod, gbcommon, &gbcommonencmap, NULL)) - goto errorexit; - MAPCLOSE(mod) - - /* Create Codec Instances */ - MULTIBYTECODEC_OPEN(mod, o) - REGISTERCODEC(m, o, codec) - MULTIBYTECODEC_CLOSE(mod, o) - - if (PyErr_Occurred()) - Py_FatalError("can't initialize the _gb18030 module"); - - return; - -errorexit: - Py_XDECREF(m); - Py_XDECREF(mod); - Py_XDECREF(o); -} +BEGIN_CODEC_REGISTRY(gb18030) + MAPOPEN(zh_CN) + IMPORTMAP_DEC(gb2312) + IMPORTMAP_DEC(gbkext) + IMPORTMAP_ENC(gbcommon) + IMPORTMAP_ENCDEC(gb18030ext) + MAPCLOSE() +END_CODEC_REGISTRY(gb18030) /* * ex: ts=8 sts=4 et 1.4 +22 -64 cjkcodecs/src/_gb2312.c Index: _gb2312.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/_gb2312.c,v retrieving revision 1.3 retrieving revision 1.4 diff -u -r1.3 -r1.4 --- _gb2312.c 20 May 2003 11:20:56 -0000 1.3 +++ _gb2312.c 21 May 2003 23:52:12 -0000 1.4 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: _gb2312.c,v 1.3 2003/05/20 11:20:56 perky Exp $ + * $Id: _gb2312.c,v 1.4 2003/05/21 23:52:12 perky Exp $ */ #include "codeccommon.h" @@ -37,34 +37,27 @@ ENCODER(gb2312) { while (inleft > 0) { - const encode_map *map; - Py_UNICODE c = **inbuf, clow; + Py_UNICODE c = **inbuf; DBCHAR code; if (c < 0x80) { - if (outleft < 1) - return MBERR_TOOSMALL; + RESERVE_OUTBUF(1) **outbuf = c; - (*inbuf)++; inleft--; - (*outbuf)++; outleft--; + NEXT(1, 1) continue; } UCS4INVALID(c) - if (outleft < 2) - return MBERR_TOOSMALL; - map = &gbcommonencmap[c >> 8]; - clow = c & 0xff; - if (map->map == NULL || clow < map->bottom || clow > map->top || - (code = map->map[clow - map->bottom]) == NOCHAR) - return 1; + RESERVE_OUTBUF(2) + TRYMAP_ENC(&gbcommonencmap[c >> 8], code, c & 0xff) + else return 1; + if (code & 0x8000) /* MSB set: GBK */ return 1; (*outbuf)[0] = (code >> 8) | 0x80; (*outbuf)[1] = (code & 0xFF) | 0x80; - (*outbuf) += 2; outleft -= 2; - (*inbuf)++; inleft--; + NEXT(1, 2) } return 0; @@ -73,69 +66,34 @@ DECODER(gb2312) { while (inleft > 0) { - const decode_map *map; - unsigned char c = **inbuf, c2; + unsigned char c = **inbuf; Py_UNICODE code; - if (outleft < 1) - return MBERR_TOOSMALL; + RESERVE_OUTBUF(1) if (c < 0x80) { **outbuf = c; - (*inbuf)++; inleft--; - (*outbuf)++; outleft--; + NEXT(1, 1) continue; } - if (inleft < 2) - return MBERR_TOOFEW; - c2 = (*inbuf)[1] ^ 0x80; - map = &gb2312decmap[c & 0x7f]; - if (map->map == NULL || c2 < map->bottom || c2 > map->top || - (code = map->map[c2 - map->bottom]) == UNIINV) - return 2; + RESERVE_INBUF(2) + TRYMAP_DEC(&gb2312decmap[c & 0x7f], code, (*inbuf)[1] ^ 0x80) + else return 2; **outbuf = code; - (*outbuf)++; outleft--; - (*inbuf) += 2; inleft -= 2; + NEXT(2, 1) } return 0; } -CODECDEF(gb2312) -NOMETHODS(__methods) - -void -init_gb2312(void) -{ - PyObject *codec; - PyObject *m = NULL, *mod = NULL, *o = NULL; - - m = Py_InitModule("_gb2312", __methods); - - /* Import mapdata */ - MAPOPEN(mod, "zh_CN") - if (IMPORTMAP(mod, gb2312, NULL, &gb2312decmap) || - IMPORTMAP(mod, gbcommon, &gbcommonencmap, NULL)) - goto errorexit; - MAPCLOSE(mod) - - /* Create Codec Instances */ - MULTIBYTECODEC_OPEN(mod, o) - REGISTERCODEC(m, o, codec) - MULTIBYTECODEC_CLOSE(mod, o) - - if (PyErr_Occurred()) - Py_FatalError("can't initialize the _gb2312 module"); - - return; - -errorexit: - Py_XDECREF(m); - Py_XDECREF(mod); - Py_XDECREF(o); -} +BEGIN_CODEC_REGISTRY(gb2312) + MAPOPEN(zh_CN) + IMPORTMAP_DEC(gb2312) + IMPORTMAP_ENC(gbcommon) + MAPCLOSE() +END_CODEC_REGISTRY(gb2312) /* * ex: ts=8 sts=4 et 1.3 +23 -74 cjkcodecs/src/_gbk.c Index: _gbk.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/_gbk.c,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- _gbk.c 20 May 2003 11:20:56 -0000 1.2 +++ _gbk.c 21 May 2003 23:52:12 -0000 1.3 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: _gbk.c,v 1.2 2003/05/20 11:20:56 perky Exp $ + * $Id: _gbk.c,v 1.3 2003/05/21 23:52:12 perky Exp $ */ #include "codeccommon.h" @@ -39,39 +39,29 @@ ENCODER(gbk) { while (inleft > 0) { - const encode_map *map; - Py_UNICODE c = **inbuf, clow; + Py_UNICODE c = **inbuf; DBCHAR code; if (c < 0x80) { - if (outleft < 1) - return MBERR_TOOSMALL; + RESERVE_OUTBUF(1) **outbuf = c; - (*inbuf)++; inleft--; - (*outbuf)++; outleft--; + NEXT(1, 1) continue; } UCS4INVALID(c) - if (outleft < 2) - return MBERR_TOOSMALL; + RESERVE_OUTBUF(2) GBK_PREENCODE(c, code) - else { - map = &gbcommonencmap[c >> 8]; - clow = c & 0xff; - if (map->map == NULL || clow < map->bottom || clow > map->top || - (code = map->map[clow - map->bottom]) == NOCHAR) - return 1; - } + else TRYMAP_ENC(&gbcommonencmap[c >> 8], code, c & 0xff) + else return 1; (*outbuf)[0] = (code >> 8) | 0x80; if (code & 0x8000) (*outbuf)[1] = (code & 0xFF); /* MSB set: GBK */ else (*outbuf)[1] = (code & 0xFF) | 0x80; /* MSB unset: GB2312 */ - (*outbuf) += 2; outleft -= 2; - (*inbuf)++; inleft--; + NEXT(1, 2) } return 0; @@ -80,79 +70,38 @@ DECODER(gbk) { while (inleft > 0) { - const decode_map *map; - unsigned char c = **inbuf, c2; + unsigned char c = **inbuf; Py_UNICODE code; - if (outleft < 1) - return MBERR_TOOSMALL; + RESERVE_OUTBUF(1) if (c < 0x80) { **outbuf = c; - (*inbuf)++; inleft--; - (*outbuf)++; outleft--; + NEXT(1, 1) continue; } - if (inleft < 2) - return MBERR_TOOFEW; + RESERVE_INBUF(2) GBK_PREDECODE(c, (*inbuf)[1], code) - else { - c2 = (*inbuf)[1] ^ 0x80; - map = &gb2312decmap[c & 0x7f]; - if (map->map == NULL || c2 < map->bottom || c2 > map->top || - (code = map->map[c2 - map->bottom]) == UNIINV) { - c2 ^= 0x80; - map = &gbkextdecmap[c]; - if (map->map == NULL || c2 < map->bottom || c2 > map->top || - (code = map->map[c2 - map->bottom]) == UNIINV) - return 2; - } - } + else TRYMAP_DEC(&gb2312decmap[c & 0x7f], code, (*inbuf)[1] ^ 0x80) + else TRYMAP_DEC(&gbkextdecmap[c], code, (*inbuf)[1]) + else return 2; **outbuf = code; - (*outbuf)++; outleft--; - (*inbuf) += 2; inleft -= 2; + NEXT(2, 1) } return 0; } -CODECDEF(gbk) -NOMETHODS(__methods) - -void -init_gbk(void) -{ - PyObject *codec; - PyObject *m = NULL, *mod = NULL, *o = NULL; - - m = Py_InitModule("_gbk", __methods); - - /* Import mapdata */ - MAPOPEN(mod, "zh_CN") - if (IMPORTMAP(mod, gb2312, NULL, &gb2312decmap) || - IMPORTMAP(mod, gbkext, NULL, &gbkextdecmap) || - IMPORTMAP(mod, gbcommon, &gbcommonencmap, NULL)) - goto errorexit; - MAPCLOSE(mod) - - /* Create Codec Instances */ - MULTIBYTECODEC_OPEN(mod, o) - REGISTERCODEC(m, o, codec) - MULTIBYTECODEC_CLOSE(mod, o) - - if (PyErr_Occurred()) - Py_FatalError("can't initialize the _gbk module"); - - return; - -errorexit: - Py_XDECREF(m); - Py_XDECREF(mod); - Py_XDECREF(o); -} +BEGIN_CODEC_REGISTRY(gbk) + MAPOPEN(zh_CN) + IMPORTMAP_DEC(gb2312) + IMPORTMAP_DEC(gbkext) + IMPORTMAP_ENC(gbcommon) + MAPCLOSE() +END_CODEC_REGISTRY(gbk) /* * ex: ts=8 sts=4 et 1.6 +77 -27 cjkcodecs/src/codeccommon.h Index: codeccommon.h =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/codeccommon.h,v retrieving revision 1.5 retrieving revision 1.6 diff -u -r1.5 -r1.6 --- codeccommon.h 20 May 2003 10:08:47 -0000 1.5 +++ codeccommon.h 21 May 2003 23:52:12 -0000 1.6 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: codeccommon.h,v 1.5 2003/05/20 10:08:47 perky Exp $ + * $Id: codeccommon.h,v 1.6 2003/05/21 23:52:12 perky Exp $ */ #include "Python.h" @@ -48,45 +48,95 @@ MultibyteCodec_State *state, \ const unsigned char **inbuf, size_t inleft, \ Py_UNICODE **outbuf, size_t outleft) -#define CODECDEF(encoding) \ + +#if Py_UNICODE_SIZE == 4 +#define UCS4INVALID(code) \ + if ((code) > 0xFFFF) \ + return 1; +#else +#define UCS4INVALID(code) +#endif + +#define BEGIN_CODEC_REGISTRY(encoding) \ static MultibyteCodec __codec = { \ #encoding, encoding##_encode, encoding##_decode \ - }; -#define NOMETHODS(name) \ - static struct PyMethodDef name[] = { \ + }; \ + \ + static struct PyMethodDef __methods[] = { \ {NULL, NULL}, \ - }; + }; \ + \ + void \ + init_##encoding(void) \ + { \ + PyObject *codec; \ + PyObject *m = NULL, *mod = NULL, *o = NULL; \ + \ + m = Py_InitModule("_" #encoding, __methods); -#define MAPOPEN(mod, locale) \ - mod = PyImport_ImportModule("mapdata_" locale); \ - if (mod == NULL) goto errorexit; -#define MAPCLOSE(mod) \ +#define MAPOPEN(locale) \ + mod = PyImport_ImportModule("mapdata_" #locale); \ + if (mod == NULL) goto errorexit; \ + if ( +#define IMPORTMAP_ENCDEC(charset) \ + importmap(mod, "__map_" #charset, &charset##encmap, \ + &charset##decmap) || +#define IMPORTMAP_ENC(charset) \ + importmap(mod, "__map_" #charset, &charset##encmap, \ + NULL) || +#define IMPORTMAP_DEC(charset) \ + importmap(mod, "__map_" #charset, NULL, \ + &charset##decmap) || +#define MAPCLOSE() \ + 0) goto errorexit; \ Py_DECREF(mod); -#define MULTIBYTECODEC_OPEN(mod, o) \ +#define END_CODEC_REGISTRY(encoding) \ mod = PyImport_ImportModule("multibytecodec"); \ if (mod == NULL) goto errorexit; \ o = PyObject_GetAttrString(mod, "__create_codec"); \ if (o == NULL || !PyCallable_Check(o)) \ - goto errorexit; -#define MULTIBYTECODEC_CLOSE(mod, o) \ - Py_DECREF(o); Py_DECREF(mod); - -#define IMPORTMAP(mod, encoding, em, dm) \ - importmap(mod, "__map_" #encoding, em, dm) -#define REGISTERCODEC(m, o, codec) \ + goto errorexit; \ + \ codec = createcodec(o, &__codec); \ if (codec == NULL) \ goto errorexit; \ - PyModule_AddObject(m, "codec", codec); - -#if Py_UNICODE_SIZE == 4 -#define UCS4INVALID(code) \ - if ((code) > 0xFFFF) \ - return 1; -#else -#define UCS4INVALID(code) -#endif + PyModule_AddObject(m, "codec", codec); \ + Py_DECREF(o); Py_DECREF(mod); \ + \ + if (PyErr_Occurred()) \ + Py_FatalError("can't initialize the _" #encoding \ + " module"); \ + \ + return; \ + \ +errorexit: \ + Py_XDECREF(m); \ + Py_XDECREF(mod); \ + Py_XDECREF(o); \ +} + +#define NEXT(i, o) \ + (*inbuf) += (i); \ + (inleft) -= (i); \ + (*outbuf) += (o); \ + (outleft) -= (o); + +#define RESERVE_INBUF(n) \ + if (inleft < (n)) \ + return MBERR_TOOFEW; +#define RESERVE_OUTBUF(n) \ + if (outleft < (n)) \ + return MBERR_TOOSMALL; + +#define TRYMAP_ENC(m, assi, val) \ + if ((m)->map != NULL && (val) >= (m)->bottom && \ + (val)<= (m)->top && ((assi) = (m)->map[(val) - \ + (m)->bottom]) != NOCHAR) ; +#define TRYMAP_DEC(m, assi, val) \ + if ((m)->map != NULL && (val) >= (m)->bottom && \ + (val)<= (m)->top && ((assi) = (m)->map[(val) - \ + (m)->bottom]) != UNIINV) ; static int importmap(PyObject *mod, const char *symbol, |