[KoCo-CVS] [Commit] cjkcodecs/src _gb18030.c _gbk.c
Brought to you by:
perky
From: Hye-Shik C. <pe...@us...> - 2003-05-20 11:52:42
|
perky 03/05/20 03:59:09 Added: src _gb18030.c _gbk.c Log: Add gb18030 and gbk codec. Revision Changes Path 1.1 cjkcodecs/src/_gb18030.c Index: _gb18030.c =================================================================== /* * _gb18030.c: the GB18030 codec * * Copyright (C) 2003 Hye-Shik Chang <pe...@Fr...>. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * $Id: _gb18030.c,v 1.1 2003/05/20 10:59:08 perky Exp $ */ #include "codeccommon.h" #include "maps/tweak_gbk.h" #include "maps/map_gb18030uni.h" ENCMAP(gbcommon) ENCMAP(gb18030ext) DECMAP(gb2312) DECMAP(gbkext) DECMAP(gb18030ext) ENCODER(gb18030) { while (inleft > 0) { const encode_map *map; Py_UNICODE c = **inbuf, clow; DBCHAR code; if (c < 0x80) { if (outleft < 1) return MBERR_TOOSMALL; **outbuf = c; (*inbuf)++; inleft--; (*outbuf)++; outleft--; continue; } #if Py_UNICODE_SIZE == 4 else if (nc > 0x10FFFF) return 1; else if (nc >= 0x10000) { Py_UNICODE tc = c; if (outleft < 4) return MBERR_TOOSMALL; (*outbuf)[3] = (unsigned char)(tc % 10) + 0x30; tc /= 10; (*outbuf)[2] = (unsigned char)(tc % 126) + 0x81; tc /= 126; (*outbuf)[1] = (unsigned char)(tc % 10) + 0x30; tc /= 10; (*outbuf)[0] = (unsigned char)(tc + 0x90); (*outbuf) += 4; outleft -= 4; (*inbuf)++; inleft--; continue; } #endif if (outleft < 2) return MBERR_TOOSMALL; GBK_PREENCODE(c, code) else { map = &gbcommonencmap[c >> 8]; clow = c & 0xff; if (map->map == NULL || clow < map->bottom || clow > map->top || (code = map->map[clow - map->bottom]) == UNIINV) { map = &gb18030extencmap[c >> 8]; clow = c & 0xff; if (map->map == NULL || clow < map->bottom || clow > map->top || (code = map->map[clow - map->bottom]) == UNIINV) { const struct _gb18030_to_unibmp_ranges *utrrange; if (outleft < 4) return MBERR_TOOSMALL; for (utrrange = gb18030_to_unibmp_ranges; utrrange->first != 0; utrrange++) if (utrrange->first <= c && c <= utrrange->last) { Py_UNICODE tc; tc = c - utrrange->first + utrrange->base; (*outbuf)[3] = (unsigned char)(tc % 10) + 0x30; tc /= 10; (*outbuf)[2] = (unsigned char)(tc % 126) + 0x81; tc /= 126; (*outbuf)[1] = (unsigned char)(tc % 10) + 0x30; tc /= 10; (*outbuf)[0] = (unsigned char)tc + 0x81; (*outbuf) += 4; outleft -= 4; (*inbuf)++; inleft--; break; } if (utrrange->first == 0) { PyErr_SetString(PyExc_RuntimeError, "unicode mapping invalid"); return 1; } continue; } } } (*outbuf)[0] = (code >> 8) | 0x80; if (code & 0x8000) (*outbuf)[1] = (code & 0xFF); /* MSB set: GBK or GB18030ext */ else (*outbuf)[1] = (code & 0xFF) | 0x80; /* MSB unset: GB2312 */ (*outbuf) += 2; outleft -= 2; (*inbuf)++; inleft--; } return 0; } DECODER(gb18030) { while (inleft > 0) { const decode_map *map; unsigned char c = **inbuf, c2; Py_UNICODE code; if (outleft < 1) return MBERR_TOOSMALL; if (c < 0x80) { **outbuf = c; (*inbuf)++; inleft--; (*outbuf)++; outleft--; continue; } if (inleft < 2) return MBERR_TOOFEW; c2 = (*inbuf)[1]; if (c2 >= 0x30 && c2 <= 0x39) { /* 4 bytes seq */ const struct _gb18030_to_unibmp_ranges *utr; unsigned char c3, c4; Py_UNICODE lseq; if (inleft < 4) return MBERR_TOOFEW; c3 = (*inbuf)[2]; c4 = (*inbuf)[3]; if (c < 0x81 || c3 < 0x81 || c4 < 0x30 || c4 > 0x39) return 4; c -= 0x81; c2 -= 0x30; c3 -= 0x81; c4 -= 0x30; if (c < 4) { /* U+0080 - U+FFFF */ lseq = ((Py_UNICODE)c * 10 + c2) * 1260 + (Py_UNICODE)c3 * 10 + c4; if (lseq < 39420) { for (utr = gb18030_to_unibmp_ranges; lseq >= (utr + 1)->base; utr++) ; **outbuf = utr->first - utr->base + lseq; (*outbuf)++; outleft--; (*inbuf) += 4; inleft -= 4; continue; } } #if Py_UNICODE_SIZE == 4 else if (c >= 15) { /* U+10000 - U+10FFFF */ lseq = 0x10000 + (((Py_UNICODE)c-15) * 10 + c2) * 1260 + (Py_UNICODE)c3 * 10 + c4; if (lseq <= 0x10FFFF) { **outbuf = lseq; (*outbuf)++; outleft--; (*inbuf) += 4; inleft -= 4; continue; } } #endif return 4; } GBK_PREDECODE(c, c2, code) else { c2 ^= 0x80; map = &gb2312decmap[c & 0x7f]; if (map->map == NULL || c2 < map->bottom || c2 > map->top || (code = map->map[c2 - map->bottom]) == UNIINV) { c2 ^= 0x80; map = &gbkextdecmap[c]; if (map->map == NULL || c2 < map->bottom || c2 > map->top || (code = map->map[c2 - map->bottom]) == UNIINV) { map = &gb18030extdecmap[c]; if (map->map == NULL || c2 < map->bottom || c2 > map->top || (code = map->map[c2 - map->bottom]) == UNIINV) return 2; } } } **outbuf = code; (*outbuf)++; outleft--; (*inbuf) += 2; inleft -= 2; } return 0; } CODECDEF(gb18030) NOMETHODS(__methods) void init_gb18030(void) { PyObject *codec; PyObject *m = NULL, *mod = NULL, *o = NULL; m = Py_InitModule("_gb18030", __methods); /* Import mapdata */ MAPOPEN(mod, "zh_CN") if (IMPORTMAP(mod, gb2312, NULL, &gb2312decmap) || IMPORTMAP(mod, gbkext, NULL, &gbkextdecmap) || IMPORTMAP(mod, gb18030ext, &gb18030extencmap, &gb18030extdecmap) || IMPORTMAP(mod, gbcommon, &gbcommonencmap, NULL)) goto errorexit; MAPCLOSE(mod) /* Create Codec Instances */ MULTIBYTECODEC_OPEN(mod, o) REGISTERCODEC(m, o, codec) MULTIBYTECODEC_CLOSE(mod, o) if (PyErr_Occurred()) Py_FatalError("can't initialize the _gb18030 module"); return; errorexit: Py_XDECREF(m); Py_XDECREF(mod); Py_XDECREF(o); } /* * ex: ts=8 sts=4 et */ 1.1 cjkcodecs/src/_gbk.c Index: _gbk.c =================================================================== /* * _gbk.c: the GBK codec * * Copyright (C) 2003 Hye-Shik Chang <pe...@Fr...>. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * $Id: _gbk.c,v 1.1 2003/05/20 10:59:08 perky Exp $ */ #include "codeccommon.h" #include "maps/tweak_gbk.h" ENCMAP(gbcommon) DECMAP(gb2312) DECMAP(gbkext) ENCODER(gbk) { while (inleft > 0) { const encode_map *map; Py_UNICODE c = **inbuf, clow; DBCHAR code; if (c < 0x80) { if (outleft < 1) return MBERR_TOOSMALL; **outbuf = c; (*inbuf)++; inleft--; (*outbuf)++; outleft--; continue; } UCS4INVALID(c) if (outleft < 2) return MBERR_TOOSMALL; GBK_PREENCODE(c, code) else { map = &gbcommonencmap[c >> 8]; clow = c & 0xff; if (map->map == NULL || clow < map->bottom || clow > map->top || (code = map->map[clow - map->bottom]) == UNIINV) return 1; } (*outbuf)[0] = (code >> 8) | 0x80; if (code & 0x8000) (*outbuf)[1] = (code & 0xFF); /* MSB set: GBK */ else (*outbuf)[1] = (code & 0xFF) | 0x80; /* MSB unset: GB2312 */ (*outbuf) += 2; outleft -= 2; (*inbuf)++; inleft--; } return 0; } DECODER(gbk) { while (inleft > 0) { const decode_map *map; unsigned char c = **inbuf, c2; Py_UNICODE code; if (outleft < 1) return MBERR_TOOSMALL; if (c < 0x80) { **outbuf = c; (*inbuf)++; inleft--; (*outbuf)++; outleft--; continue; } if (inleft < 2) return MBERR_TOOFEW; GBK_PREDECODE(c, (*inbuf)[1], code) else { c2 = (*inbuf)[1] ^ 0x80; map = &gb2312decmap[c & 0x7f]; if (map->map == NULL || c2 < map->bottom || c2 > map->top || (code = map->map[c2 - map->bottom]) == UNIINV) { c2 ^= 0x80; map = &gbkextdecmap[c]; if (map->map == NULL || c2 < map->bottom || c2 > map->top || (code = map->map[c2 - map->bottom]) == UNIINV) return 2; } } **outbuf = code; (*outbuf)++; outleft--; (*inbuf) += 2; inleft -= 2; } return 0; } CODECDEF(gbk) NOMETHODS(__methods) void init_gbk(void) { PyObject *codec; PyObject *m = NULL, *mod = NULL, *o = NULL; m = Py_InitModule("_gbk", __methods); /* Import mapdata */ MAPOPEN(mod, "zh_CN") if (IMPORTMAP(mod, gb2312, NULL, &gb2312decmap) || IMPORTMAP(mod, gbkext, NULL, &gbkextdecmap) || IMPORTMAP(mod, gbcommon, &gbcommonencmap, NULL)) goto errorexit; MAPCLOSE(mod) /* Create Codec Instances */ MULTIBYTECODEC_OPEN(mod, o) REGISTERCODEC(m, o, codec) MULTIBYTECODEC_CLOSE(mod, o) if (PyErr_Occurred()) Py_FatalError("can't initialize the _gbk module"); return; errorexit: Py_XDECREF(m); Py_XDECREF(mod); Py_XDECREF(o); } /* * ex: ts=8 sts=4 et */ |