[KoCo-CVS] [Commit] cjkcodecs/src _euc_jisx0213.c cjkcommon.h codeccommon.h
Brought to you by:
perky
From: Hye-Shik C. <pe...@us...> - 2003-07-07 08:17:37
|
perky 03/07/07 01:17:36 Modified: src cjkcommon.h codeccommon.h Added: src _euc_jisx0213.c Log: Add EUC-JISX0213 codec. Revision Changes Path 1.12 +2 -2 cjkcodecs/src/cjkcommon.h Index: cjkcommon.h =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/cjkcommon.h,v retrieving revision 1.11 retrieving revision 1.12 diff -u -r1.11 -r1.12 --- cjkcommon.h 7 Jul 2003 04:46:40 -0000 1.11 +++ cjkcommon.h 7 Jul 2003 08:17:36 -0000 1.12 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: cjkcommon.h,v 1.11 2003/07/07 04:46:40 perky Exp $ + * $Id: cjkcommon.h,v 1.12 2003/07/07 08:17:36 perky Exp $ */ #ifndef _CJKCOMMON_H_ @@ -74,7 +74,7 @@ }; struct pair_encodemap { - ucs2_t body, modifier; + ucs4_t uniseq; DBCHAR code; }; 1.17 +138 -138 cjkcodecs/src/codeccommon.h Index: codeccommon.h =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/codeccommon.h,v retrieving revision 1.16 retrieving revision 1.17 diff -u -r1.16 -r1.17 --- codeccommon.h 6 Jul 2003 10:10:31 -0000 1.16 +++ codeccommon.h 7 Jul 2003 08:17:36 -0000 1.17 @@ -1,138 +1,138 @@ -/* - * codeccommon.h: Common Codec Routines - * - * Copyright (C) 2003 Hye-Shik Chang <pe...@Fr...>. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, - * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING - * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * $Id: codeccommon.h,v 1.16 2003/07/06 10:10:31 perky Exp $ - */ - -#include "Python.h" -#include "multibytecodec.h" -#include "multibytecodec_compat.h" -#include "cjkcommon.h" - -#define ENCMAP(encoding) \ - const static encode_map *encoding##encmap; -#define DECMAP(encoding) \ - const static decode_map *encoding##decmap; - -#define ENCODER_INIT(encoding) \ - static int encoding##_encode_init( \ - MultibyteCodec_State *state) -#define ENCODER(encoding) \ - static int encoding##_encode( \ - MultibyteCodec_State *state, \ - const Py_UNICODE **inbuf, size_t inleft, \ - unsigned char **outbuf, size_t outleft, int flags) -#define ENCODER_RESET(encoding) \ - static int encoding##_encode_reset( \ - MultibyteCodec_State *state, \ - unsigned char **outbuf, size_t outleft) - -#define DECODER_INIT(encoding) \ - static int encoding##_decode_init( \ - MultibyteCodec_State *state) -#define DECODER(encoding) \ - static int encoding##_decode( \ - MultibyteCodec_State *state, \ - const unsigned char **inbuf, size_t inleft, \ - Py_UNICODE **outbuf, size_t outleft) -#define DECODER_RESET(encoding) \ - static int encoding##_decode_reset( \ - MultibyteCodec_State *state) - -#if Py_UNICODE_SIZE == 4 -#define UCS4INVALID(code) \ - if ((code) > 0xFFFF) \ - return 1; -#else -#define UCS4INVALID(code) \ - if (0) ; -#endif - -#define NEXT_IN(i) \ - (*inbuf) += (i); \ - (inleft) -= (i); -#define NEXT_OUT(o) \ - (*outbuf) += (o); \ - (outleft) -= (o); -#define NEXT(i, o) NEXT_IN(i) NEXT_OUT(o) - -#define RESERVE_INBUF(n) \ - if (inleft < (n)) \ - return MBERR_TOOFEW; -#define RESERVE_OUTBUF(n) \ - if (outleft < (n)) \ - return MBERR_TOOSMALL; - -#define WRITE1(c1) \ - RESERVE_OUTBUF(1) \ - (*outbuf)[0] = (unsigned char)(c1); -#define WRITE2(c1, c2) \ - RESERVE_OUTBUF(2) \ - (*outbuf)[0] = (unsigned char)(c1); \ - (*outbuf)[1] = (unsigned char)(c2); -#define WRITE3(c1, c2, c3) \ - RESERVE_OUTBUF(3) \ - (*outbuf)[0] = (unsigned char)(c1); \ - (*outbuf)[1] = (unsigned char)(c2); \ - (*outbuf)[2] = (unsigned char)(c3); -#define WRITE4(c1, c2, c3, c4) \ - RESERVE_OUTBUF(4) \ - (*outbuf)[0] = (unsigned char)(c1); \ - (*outbuf)[1] = (unsigned char)(c2); \ - (*outbuf)[2] = (unsigned char)(c3); \ - (*outbuf)[3] = (unsigned char)(c4); - -#if Py_UNICODE_SIZE == 2 -# define PUTUCS4(c) \ - RESERVE_OUTBUF(2) \ - (*outbuf)[0] = 0xd800 + (((c) - 0x10000) >> 10); \ - (*outbuf)[1] = 0xdc00 + (((c) - 0x10000) & 0x3ff); \ - NEXT_OUT(2) -#else -# define PUTUCS4(c) \ - RESERVE_OUTBUF(1) \ - **outbuf = (Py_UNICODE)(c); \ - NEXT_OUT(1) -#endif - -#define _TRYMAP_ENC(m, assi, val) \ - if ((m)->map != NULL && (val) >= (m)->bottom && \ - (val)<= (m)->top && ((assi) = (m)->map[(val) - \ - (m)->bottom]) != NOCHAR) -#define TRYMAP_ENC(charset, assi, uni) \ - _TRYMAP_ENC(&charset##encmap[uni >> 8], assi, uni & 0xff) -#define _TRYMAP_DEC(m, assi, val) \ - if ((m)->map != NULL && (val) >= (m)->bottom && \ - (val)<= (m)->top && ((assi) = (m)->map[(val) - \ - (m)->bottom]) != UNIINV) -#define TRYMAP_DEC(charset, assi, c1, c2) \ - _TRYMAP_DEC(&charset##decmap[c1], assi, c2) - -/* - * ex: ts=8 sts=4 et - */ +/* + * codeccommon.h: Common Codec Routines + * + * Copyright (C) 2003 Hye-Shik Chang <pe...@Fr...>. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * $Id: codeccommon.h,v 1.17 2003/07/07 08:17:36 perky Exp $ + */ + +#include "Python.h" +#include "multibytecodec.h" +#include "multibytecodec_compat.h" +#include "cjkcommon.h" + +#define ENCMAP(encoding) \ + const static encode_map *encoding##encmap; +#define DECMAP(encoding) \ + const static decode_map *encoding##decmap; + +#define ENCODER_INIT(encoding) \ + static int encoding##_encode_init( \ + MultibyteCodec_State *state) +#define ENCODER(encoding) \ + static int encoding##_encode( \ + MultibyteCodec_State *state, \ + const Py_UNICODE **inbuf, size_t inleft, \ + unsigned char **outbuf, size_t outleft, int flags) +#define ENCODER_RESET(encoding) \ + static int encoding##_encode_reset( \ + MultibyteCodec_State *state, \ + unsigned char **outbuf, size_t outleft) + +#define DECODER_INIT(encoding) \ + static int encoding##_decode_init( \ + MultibyteCodec_State *state) +#define DECODER(encoding) \ + static int encoding##_decode( \ + MultibyteCodec_State *state, \ + const unsigned char **inbuf, size_t inleft, \ + Py_UNICODE **outbuf, size_t outleft) +#define DECODER_RESET(encoding) \ + static int encoding##_decode_reset( \ + MultibyteCodec_State *state) + +#if Py_UNICODE_SIZE == 4 +#define UCS4INVALID(code) \ + if ((code) > 0xFFFF) \ + return 1; +#else +#define UCS4INVALID(code) \ + if (0) ; +#endif + +#define NEXT_IN(i) \ + (*inbuf) += (i); \ + (inleft) -= (i); +#define NEXT_OUT(o) \ + (*outbuf) += (o); \ + (outleft) -= (o); +#define NEXT(i, o) NEXT_IN(i) NEXT_OUT(o) + +#define RESERVE_INBUF(n) \ + if (inleft < (n)) \ + return MBERR_TOOFEW; +#define RESERVE_OUTBUF(n) \ + if (outleft < (n)) \ + return MBERR_TOOSMALL; + +#define WRITE1(c1) \ + RESERVE_OUTBUF(1) \ + (*outbuf)[0] = (c1); +#define WRITE2(c1, c2) \ + RESERVE_OUTBUF(2) \ + (*outbuf)[0] = (c1); \ + (*outbuf)[1] = (c2); +#define WRITE3(c1, c2, c3) \ + RESERVE_OUTBUF(3) \ + (*outbuf)[0] = (c1); \ + (*outbuf)[1] = (c2); \ + (*outbuf)[2] = (c3); +#define WRITE4(c1, c2, c3, c4) \ + RESERVE_OUTBUF(4) \ + (*outbuf)[0] = (c1); \ + (*outbuf)[1] = (c2); \ + (*outbuf)[2] = (c3); \ + (*outbuf)[3] = (c4); + +#if Py_UNICODE_SIZE == 2 +# define PUTUCS4(c) \ + RESERVE_OUTBUF(2) \ + (*outbuf)[0] = 0xd800 + (((c) - 0x10000) >> 10); \ + (*outbuf)[1] = 0xdc00 + (((c) - 0x10000) & 0x3ff); \ + NEXT_OUT(2) +#else +# define PUTUCS4(c) \ + RESERVE_OUTBUF(1) \ + **outbuf = (Py_UNICODE)(c); \ + NEXT_OUT(1) +#endif + +#define _TRYMAP_ENC(m, assi, val) \ + if ((m)->map != NULL && (val) >= (m)->bottom && \ + (val)<= (m)->top && ((assi) = (m)->map[(val) - \ + (m)->bottom]) != NOCHAR) +#define TRYMAP_ENC(charset, assi, uni) \ + _TRYMAP_ENC(&charset##encmap[uni >> 8], assi, uni & 0xff) +#define _TRYMAP_DEC(m, assi, val) \ + if ((m)->map != NULL && (val) >= (m)->bottom && \ + (val)<= (m)->top && ((assi) = (m)->map[(val) - \ + (m)->bottom]) != UNIINV) +#define TRYMAP_DEC(charset, assi, c1, c2) \ + _TRYMAP_DEC(&charset##decmap[c1], assi, c2) + +/* + * ex: ts=8 sts=4 et + */ 1.1 cjkcodecs/src/_euc_jisx0213.c Index: _euc_jisx0213.c =================================================================== /* * _euc_jisx0213.c: the EUC-JISX0213 codec * * Copyright (C) 2003 Hye-Shik Chang <pe...@Fr...>. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * $Id: _euc_jisx0213.c,v 1.1 2003/07/07 08:17:36 perky Exp $ */ #include "codeccommon.h" #include "maps/map_jisx0213_pairs.h" ENCMAP(jisxcommon) DECMAP(jisx0208) DECMAP(jisx0212) ENCMAP(jisx0213_bmp) DECMAP(jisx0213_1_bmp) DECMAP(jisx0213_2_bmp) ENCMAP(jisx0213_emp) DECMAP(jisx0213_1_emp) DECMAP(jisx0213_2_emp) #define EMPBASE 0x20000 static DBCHAR find_pairencmap(ucs2_t body, ucs2_t modifier, struct pair_encodemap *haystack, int haystacksize) { int pos, min, max; ucs4_t value = body << 16 | modifier; min = 0; max = haystacksize; for (pos = haystacksize >> 1; min != max; pos = (min + max) >> 1) if (value < haystack[pos].uniseq) { if (max == pos) break; else max = pos; } else if (value > haystack[pos].uniseq) { if (min == pos) break; else min = pos; } else break; if (value == haystack[pos].uniseq) return haystack[pos].code; else return DBCINV; } ENCODER(euc_jisx0213) { while (inleft > 0) { ucs4_t c = **inbuf; DBCHAR code; int insize = 1; if (c < 0x80) { RESERVE_OUTBUF(1) **outbuf = c; NEXT(1, 1) continue; } #if Py_UNICODE_SIZE == 2 if (c >> 10 == 0xd800 >> 10) { /* high surrogate */ RESERVE_INBUF(2) if ((*inbuf)[1] >> 10 == 0xdc00 >> 10) { /* low surrogate */ c = 0x10000 + ((c - 0xd800) << 10) + ((ucs4_t)((*inbuf)[1]) - 0xdc00); } } #endif if (c <= 0xFFFF) { /* try 0213 first because it might have MULTIC */ TRYMAP_ENC(jisx0213_bmp, code, c) { if (code == MULTIC) { if (inleft < 2) { if (flags & MBENC_FLUSH) { code = find_pairencmap(c, 0, jisx0213_pairencmap, JISX0213_ENCPAIRS); if (code == DBCINV) return 1; } else return MBERR_TOOFEW; } else { code = find_pairencmap(c, (*inbuf)[1], jisx0213_pairencmap, JISX0213_ENCPAIRS); if (code == DBCINV) { code = find_pairencmap(c, 0, jisx0213_pairencmap, JISX0213_ENCPAIRS); if (code == DBCINV) return 1; } else insize = 2; } } } else TRYMAP_ENC(jisxcommon, code, c); else if (c >= 0xff61 && c <= 0xff9f) { /* JIS X 0201 half-width katakana */ RESERVE_OUTBUF(2) (*outbuf)[0] = 0x8e; (*outbuf)[1] = (unsigned char)(c - 0xfec0); NEXT(1, 2) continue; } else if (c == 0xff3c) /* F/W REVERSE SOLIDUS (see NOTES.euc-jisx0213) */ code = 0x2140; else return 1; } else TRYMAP_ENC(jisx0213_emp, code, c & 0xffff); else return 1; if (code & 0x8000) { /* Codeset 2 */ RESERVE_OUTBUF(3) (*outbuf)[0] = 0x8f; (*outbuf)[1] = code >> 8; (*outbuf)[2] = (code & 0xFF) | 0x80; NEXT(insize, 3) } else { /* Codeset 1 */ RESERVE_OUTBUF(2) (*outbuf)[0] = (code >> 8) | 0x80; (*outbuf)[1] = (code & 0xFF) | 0x80; NEXT(insize, 2) } } return 0; } DECODER(euc_jisx0213) { while (inleft > 0) { unsigned char c = **inbuf; ucs4_t code; RESERVE_OUTBUF(1) if (c < 0x80) { **outbuf = c; NEXT(1, 1) continue; } if (c == 0x8e) { /* JIS X 0201 half-width katakana */ unsigned char c2; RESERVE_INBUF(2) c2 = (*inbuf)[1]; if (c2 >= 0xa1 && c2 <= 0xdf) { **outbuf = 0xfec0 + c2; NEXT(2, 1) } else return 2; } else if (c == 0x8f) { unsigned char c2, c3; RESERVE_INBUF(3) c2 = (*inbuf)[1] ^ 0x80; c3 = (*inbuf)[2] ^ 0x80; /* JIS X 0213 Plane 2 or JIS X 0212 (see NOTES.euc-jisx0213) */ TRYMAP_DEC(jisx0213_2_bmp, **outbuf, c2, c3) ; else TRYMAP_DEC(jisx0213_2_emp, code, c2, c3) { PUTUCS4(EMPBASE | code) NEXT_IN(3) continue; } else TRYMAP_DEC(jisx0212, **outbuf, c2, c3) ; else return 3; NEXT(3, 1) } else { unsigned char c2; RESERVE_INBUF(2) c ^= 0x80; c2 = (*inbuf)[1] ^ 0x80; /* JIS X 0213 Plane 1 */ if (c == 0xa1 && c2 == 0xc0) **outbuf = 0xff3c; else TRYMAP_DEC(jisx0208, **outbuf, c, c2); else TRYMAP_DEC(jisx0213_1_bmp, **outbuf, c, c2); else TRYMAP_DEC(jisx0213_1_emp, code, c, c2) { PUTUCS4(EMPBASE | code) NEXT_IN(2) continue; } else TRYMAP_DEC(jisx0213_pair, code, c, c2) { WRITE2(code >> 16, code & 0xffff) NEXT(2, 2) continue; } else return 2; NEXT(2, 1) } } return 0; } #include "codecentry.h" BEGIN_CODEC_REGISTRY(euc_jisx0213) MAPOPEN(ja_JP) IMPORTMAP_ENC(jisxcommon) IMPORTMAP_DEC(jisx0208) IMPORTMAP_DEC(jisx0212) IMPORTMAP_ENC(jisx0213_bmp) IMPORTMAP_DEC(jisx0213_1_bmp) IMPORTMAP_DEC(jisx0213_2_bmp) IMPORTMAP_ENC(jisx0213_emp) IMPORTMAP_DEC(jisx0213_1_emp) IMPORTMAP_DEC(jisx0213_2_emp) MAPCLOSE() END_CODEC_REGISTRY(euc_jisx0213) /* * ex: ts=8 sts=4 et */ |