Thread: [KoCo-CVS] [Commit] cjkcodecs/src codeccommon.h _iso_2022_jp.c _iso_2022_jp_1.c _iso_2022_kr.c
Brought to you by:
perky
From: Hye-Shik C. <pe...@us...> - 2003-07-06 10:10:33
|
perky 03/07/06 03:10:31 Modified: src codeccommon.h _iso_2022_jp.c _iso_2022_jp_1.c _iso_2022_kr.c Log: Rename PAVE* -> WRITE* Revision Changes Path 1.16 +138 -138 cjkcodecs/src/codeccommon.h Index: codeccommon.h =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/codeccommon.h,v retrieving revision 1.15 retrieving revision 1.16 diff -u -r1.15 -r1.16 --- codeccommon.h 5 Jul 2003 19:49:02 -0000 1.15 +++ codeccommon.h 6 Jul 2003 10:10:31 -0000 1.16 @@ -1,138 +1,138 @@ -/* - * codeccommon.h: Common Codec Routines - * - * Copyright (C) 2003 Hye-Shik Chang <pe...@Fr...>. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, - * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING - * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * $Id: codeccommon.h,v 1.15 2003/07/05 19:49:02 perky Exp $ - */ - -#include "Python.h" -#include "multibytecodec.h" -#include "multibytecodec_compat.h" -#include "cjkcommon.h" - -#define ENCMAP(encoding) \ - const static encode_map *encoding##encmap; -#define DECMAP(encoding) \ - const static decode_map *encoding##decmap; - -#define ENCODER_INIT(encoding) \ - static int encoding##_encode_init( \ - MultibyteCodec_State *state) -#define ENCODER(encoding) \ - static int encoding##_encode( \ - MultibyteCodec_State *state, \ - const Py_UNICODE **inbuf, size_t inleft, \ - unsigned char **outbuf, size_t outleft, int flags) -#define ENCODER_RESET(encoding) \ - static int encoding##_encode_reset( \ - MultibyteCodec_State *state, \ - unsigned char **outbuf, size_t outleft) - -#define DECODER_INIT(encoding) \ - static int encoding##_decode_init( \ - MultibyteCodec_State *state) -#define DECODER(encoding) \ - static int encoding##_decode( \ - MultibyteCodec_State *state, \ - const unsigned char **inbuf, size_t inleft, \ - Py_UNICODE **outbuf, size_t outleft) -#define DECODER_RESET(encoding) \ - static int encoding##_decode_reset( \ - MultibyteCodec_State *state) - -#if Py_UNICODE_SIZE == 4 -#define UCS4INVALID(code) \ - if ((code) > 0xFFFF) \ - return 1; -#else -#define UCS4INVALID(code) \ - if (0) ; -#endif - -#define NEXT_IN(i) \ - (*inbuf) += (i); \ - (inleft) -= (i); -#define NEXT_OUT(o) \ - (*outbuf) += (o); \ - (outleft) -= (o); -#define NEXT(i, o) NEXT_IN(i) NEXT_OUT(o) - -#define RESERVE_INBUF(n) \ - if (inleft < (n)) \ - return MBERR_TOOFEW; -#define RESERVE_OUTBUF(n) \ - if (outleft < (n)) \ - return MBERR_TOOSMALL; - -#define PAVE1(c1) \ - RESERVE_OUTBUF(1) \ - (*outbuf)[0] = (unsigned char)(c1); -#define PAVE2(c1, c2) \ - RESERVE_OUTBUF(2) \ - (*outbuf)[0] = (unsigned char)(c1); \ - (*outbuf)[1] = (unsigned char)(c2); -#define PAVE3(c1, c2, c3) \ - RESERVE_OUTBUF(3) \ - (*outbuf)[0] = (unsigned char)(c1); \ - (*outbuf)[1] = (unsigned char)(c2); \ - (*outbuf)[2] = (unsigned char)(c3); -#define PAVE4(c1, c2, c3, c4) \ - RESERVE_OUTBUF(4) \ - (*outbuf)[0] = (unsigned char)(c1); \ - (*outbuf)[1] = (unsigned char)(c2); \ - (*outbuf)[2] = (unsigned char)(c3); \ - (*outbuf)[3] = (unsigned char)(c4); - -#if Py_UNICODE_SIZE == 2 -# define PUTUCS4(c) \ - RESERVE_OUTBUF(2) \ - (*outbuf)[0] = 0xd800 + (((c) - 0x10000) >> 10); \ - (*outbuf)[1] = 0xdc00 + (((c) - 0x10000) & 0x3ff); \ - NEXT_OUT(2) -#else -# define PUTUCS4(c) \ - RESERVE_OUTBUF(1) \ - **outbuf = (Py_UNICODE)(c); \ - NEXT_OUT(1) -#endif - -#define _TRYMAP_ENC(m, assi, val) \ - if ((m)->map != NULL && (val) >= (m)->bottom && \ - (val)<= (m)->top && ((assi) = (m)->map[(val) - \ - (m)->bottom]) != NOCHAR) -#define TRYMAP_ENC(charset, assi, uni) \ - _TRYMAP_ENC(&charset##encmap[uni >> 8], assi, uni & 0xff) -#define _TRYMAP_DEC(m, assi, val) \ - if ((m)->map != NULL && (val) >= (m)->bottom && \ - (val)<= (m)->top && ((assi) = (m)->map[(val) - \ - (m)->bottom]) != UNIINV) -#define TRYMAP_DEC(charset, assi, c1, c2) \ - _TRYMAP_DEC(&charset##decmap[c1], assi, c2) - -/* - * ex: ts=8 sts=4 et - */ +/* + * codeccommon.h: Common Codec Routines + * + * Copyright (C) 2003 Hye-Shik Chang <pe...@Fr...>. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * $Id: codeccommon.h,v 1.16 2003/07/06 10:10:31 perky Exp $ + */ + +#include "Python.h" +#include "multibytecodec.h" +#include "multibytecodec_compat.h" +#include "cjkcommon.h" + +#define ENCMAP(encoding) \ + const static encode_map *encoding##encmap; +#define DECMAP(encoding) \ + const static decode_map *encoding##decmap; + +#define ENCODER_INIT(encoding) \ + static int encoding##_encode_init( \ + MultibyteCodec_State *state) +#define ENCODER(encoding) \ + static int encoding##_encode( \ + MultibyteCodec_State *state, \ + const Py_UNICODE **inbuf, size_t inleft, \ + unsigned char **outbuf, size_t outleft, int flags) +#define ENCODER_RESET(encoding) \ + static int encoding##_encode_reset( \ + MultibyteCodec_State *state, \ + unsigned char **outbuf, size_t outleft) + +#define DECODER_INIT(encoding) \ + static int encoding##_decode_init( \ + MultibyteCodec_State *state) +#define DECODER(encoding) \ + static int encoding##_decode( \ + MultibyteCodec_State *state, \ + const unsigned char **inbuf, size_t inleft, \ + Py_UNICODE **outbuf, size_t outleft) +#define DECODER_RESET(encoding) \ + static int encoding##_decode_reset( \ + MultibyteCodec_State *state) + +#if Py_UNICODE_SIZE == 4 +#define UCS4INVALID(code) \ + if ((code) > 0xFFFF) \ + return 1; +#else +#define UCS4INVALID(code) \ + if (0) ; +#endif + +#define NEXT_IN(i) \ + (*inbuf) += (i); \ + (inleft) -= (i); +#define NEXT_OUT(o) \ + (*outbuf) += (o); \ + (outleft) -= (o); +#define NEXT(i, o) NEXT_IN(i) NEXT_OUT(o) + +#define RESERVE_INBUF(n) \ + if (inleft < (n)) \ + return MBERR_TOOFEW; +#define RESERVE_OUTBUF(n) \ + if (outleft < (n)) \ + return MBERR_TOOSMALL; + +#define WRITE1(c1) \ + RESERVE_OUTBUF(1) \ + (*outbuf)[0] = (unsigned char)(c1); +#define WRITE2(c1, c2) \ + RESERVE_OUTBUF(2) \ + (*outbuf)[0] = (unsigned char)(c1); \ + (*outbuf)[1] = (unsigned char)(c2); +#define WRITE3(c1, c2, c3) \ + RESERVE_OUTBUF(3) \ + (*outbuf)[0] = (unsigned char)(c1); \ + (*outbuf)[1] = (unsigned char)(c2); \ + (*outbuf)[2] = (unsigned char)(c3); +#define WRITE4(c1, c2, c3, c4) \ + RESERVE_OUTBUF(4) \ + (*outbuf)[0] = (unsigned char)(c1); \ + (*outbuf)[1] = (unsigned char)(c2); \ + (*outbuf)[2] = (unsigned char)(c3); \ + (*outbuf)[3] = (unsigned char)(c4); + +#if Py_UNICODE_SIZE == 2 +# define PUTUCS4(c) \ + RESERVE_OUTBUF(2) \ + (*outbuf)[0] = 0xd800 + (((c) - 0x10000) >> 10); \ + (*outbuf)[1] = 0xdc00 + (((c) - 0x10000) & 0x3ff); \ + NEXT_OUT(2) +#else +# define PUTUCS4(c) \ + RESERVE_OUTBUF(1) \ + **outbuf = (Py_UNICODE)(c); \ + NEXT_OUT(1) +#endif + +#define _TRYMAP_ENC(m, assi, val) \ + if ((m)->map != NULL && (val) >= (m)->bottom && \ + (val)<= (m)->top && ((assi) = (m)->map[(val) - \ + (m)->bottom]) != NOCHAR) +#define TRYMAP_ENC(charset, assi, uni) \ + _TRYMAP_ENC(&charset##encmap[uni >> 8], assi, uni & 0xff) +#define _TRYMAP_DEC(m, assi, val) \ + if ((m)->map != NULL && (val) >= (m)->bottom && \ + (val)<= (m)->top && ((assi) = (m)->map[(val) - \ + (m)->bottom]) != UNIINV) +#define TRYMAP_DEC(charset, assi, c1, c2) \ + _TRYMAP_DEC(&charset##decmap[c1], assi, c2) + +/* + * ex: ts=8 sts=4 et + */ 1.5 +271 -271 cjkcodecs/src/_iso_2022_jp.c Index: _iso_2022_jp.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/_iso_2022_jp.c,v retrieving revision 1.4 retrieving revision 1.5 diff -u -r1.4 -r1.5 --- _iso_2022_jp.c 5 Jun 2003 09:56:21 -0000 1.4 +++ _iso_2022_jp.c 6 Jul 2003 10:10:31 -0000 1.5 @@ -1,271 +1,271 @@ -/* - * _iso_2022_jp.c: the ISO-2022-JP codec (RFC1468) - * - * Copyright (C) 2003 Hye-Shik Chang <pe...@Fr...>. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, - * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING - * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * $Id: _iso_2022_jp.c,v 1.4 2003/06/05 09:56:21 perky Exp $ - */ - -#include "codeccommon.h" -#include "iso2022common.h" -#include "maps/alg_jisx0201.h" - -ENCMAP(jisxcommon) -DECMAP(jisx0208) - -#define HAVE_ENCODER_INIT -ENCODER_INIT(iso_2022_jp) -{ - state->i = 0; - STATE_SETG0(state, CHARSET_ASCII) - STATE_SETG1(state, CHARSET_ASCII) - return 0; -} - -#define HAVE_ENCODER_RESET -ENCODER_RESET(iso_2022_jp) -{ - if (STATE_GETG0(state) != CHARSET_ASCII) { - RESERVE_OUTBUF(3) - PAVE3(ESC, '(', 'B') - STATE_SETG0(state, CHARSET_ASCII) - NEXT_OUT(3) - } - return 0; -} - -/* ISO-2022-JP changes designations instead of shifting-out */ - -ENCODER(iso_2022_jp) -{ - while (inleft > 0) { - Py_UNICODE c = **inbuf; - DBCHAR code; - - if (c < 0x80) { - switch (STATE_GETG0(state)) { - case CHARSET_ASCII: - PAVE1(c) - NEXT(1, 1) - break; - case CHARSET_JISX0201_R: - JISX0201_R_ENCODE(c, code) - else { /* FALLTHROUGH (yay!) */ - default: - PAVE3(ESC, '(', 'B') - NEXT_OUT(3) - STATE_SETG0(state, CHARSET_ASCII) - code = c; - } - PAVE1(code) - NEXT(1, 1) - break; - } - if (c == '\n') - STATE_CLEARFLAG(state, F_SHIFTED) - } else UCS4INVALID(c) - else { - unsigned char charset; - - charset = STATE_GETG0(state); - if (charset == CHARSET_JISX0201_R) { - code = DBCINV; - JISX0201_R_ENCODE(c, code) - if (code != DBCINV) { - PAVE1(code) - NEXT(1, 1) - continue; - } - } - - TRYMAP_ENC(jisxcommon, code, c) { - if (code & 0x8000) /* MSB set: JIS X 0212 */ - return 1; - if (charset != CHARSET_JISX0208) { - PAVE3(ESC, '$', 'B') - STATE_SETG0(state, CHARSET_JISX0208) - NEXT_OUT(3) - } - PAVE2(code >> 8, code & 0xff) - NEXT(1, 2) - } else { - JISX0201_R_ENCODE(c, code) - else - return 1; - /* if (charset == CHARSET_JISX0201_R) : already checked */ - PAVE4(ESC, '(', 'J', code) - STATE_SETG0(state, CHARSET_JISX0201_R) - NEXT(1, 4) - } - } - } - - return 0; -} - -#define HAVE_DECODER_INIT -DECODER_INIT(iso_2022_jp) -{ - state->i = 0; - STATE_SETG0(state, CHARSET_ASCII) - STATE_SETG1(state, CHARSET_ASCII) - return 0; -} - -#define HAVE_DECODER_RESET -DECODER_RESET(iso_2022_jp) -{ - STATE_CLEARFLAG(state, F_SHIFTED) - return 0; -} - -DECODER(iso_2022_jp) -{ - while (inleft > 0) { - unsigned char c = **inbuf; - - if (STATE_GETFLAG(state, F_ESCTHROUGHOUT)) { - /* ESC throughout mode: for non-iso2022 escape sequences */ - RESERVE_OUTBUF(1) - **outbuf = c; /* assume as ISO-8859-1 */ - NEXT(1, 1) - if (IS_ESCEND(c)) { - STATE_CLEARFLAG(state, F_ESCTHROUGHOUT) - } - continue; - } - - switch (c) { - case ESC: - RESERVE_INBUF(2) - if (IS_ISO2022ESC((*inbuf)[1])) { - int eslen; - - eslen = iso2022esclen(*inbuf, inleft); - if (eslen < 0) - return eslen == MBERR_INTERNAL ? 1 : eslen; - - if (eslen == 3) { - unsigned char charset; - - if ((*inbuf)[1] == '$') { - if ((*inbuf)[2] == '@' || (*inbuf)[2] == 'B') { - charset = (*inbuf)[2] | CHARSET_DOUBLEBYTE; - STATE_SETG0(state, charset); - } else - return 3; - } else { - if ((*inbuf)[2] == 'B' || (*inbuf)[2] == 'J') - charset = (*inbuf)[2]; - else - return 3; - - if ((*inbuf)[1] == '(') { - STATE_SETG0(state, charset) - } else if ((*inbuf)[1] == ')') { - STATE_SETG1(state, charset) - } else - return 3; - } - } else - return eslen; - NEXT_IN(eslen) - } else { - STATE_SETFLAG(state, F_ESCTHROUGHOUT) - **outbuf = ESC; - NEXT(1, 1) - } - break; - case SI: - STATE_CLEARFLAG(state, F_SHIFTED) - NEXT_IN(1) - break; - case SO: - STATE_SETFLAG(state, F_SHIFTED) - NEXT_IN(1) - break; - case '\n': - STATE_CLEARFLAG(state, F_SHIFTED) - /* FALLTHROUGH */ - case SP: /* FALLTHROUGH */ - case DEL: - RESERVE_OUTBUF(1) - **outbuf = c; - NEXT(1, 1) - break; - default: - if ((c & 0x7f) < 0x20) { /* C0 and C1 */ - RESERVE_OUTBUF(1) - **outbuf = c & 0x7f; - NEXT(1, 1) - } else { - unsigned char charset; - - if (!STATE_GETFLAG(state, F_SHIFTED) && c < 0x80) /* G0 */ - charset = STATE_GETG0(state); - else /* G1 */ - charset = STATE_GETG1(state); - - if (charset & CHARSET_DOUBLEBYTE) { - /* all double byte character sets are in JIS X 0208 here. - * this means that we don't distinguish :1978 from :1983. */ - RESERVE_INBUF(2) - RESERVE_OUTBUF(1) - TRYMAP_DEC(jisx0208, **outbuf, c & 0x7f, - (*inbuf)[1] & 0x7f) { - NEXT(2, 1) - } else - return 2; - } else if (charset == CHARSET_ASCII) { - RESERVE_OUTBUF(1) - **outbuf = c & 0x7f; - NEXT(1, 1) - } else if (charset == CHARSET_JISX0201_R) { - RESERVE_OUTBUF(1) - JISX0201_R_DECODE(c & 0x7f, **outbuf) - else - return 1; - NEXT(1, 1) - } else - return MBERR_INTERNAL; - } - } - } - - return 0; -} - -#include "codecentry.h" -BEGIN_CODEC_REGISTRY(iso_2022_jp) - MAPOPEN(ja_JP) - IMPORTMAP_DEC(jisx0208) - IMPORTMAP_ENC(jisxcommon) - MAPCLOSE() -END_CODEC_REGISTRY(iso_2022_jp) - -/* - * ex: ts=8 sts=4 et - */ +/* + * _iso_2022_jp.c: the ISO-2022-JP codec (RFC1468) + * + * Copyright (C) 2003 Hye-Shik Chang <pe...@Fr...>. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * $Id: _iso_2022_jp.c,v 1.5 2003/07/06 10:10:31 perky Exp $ + */ + +#include "codeccommon.h" +#include "iso2022common.h" +#include "maps/alg_jisx0201.h" + +ENCMAP(jisxcommon) +DECMAP(jisx0208) + +#define HAVE_ENCODER_INIT +ENCODER_INIT(iso_2022_jp) +{ + state->i = 0; + STATE_SETG0(state, CHARSET_ASCII) + STATE_SETG1(state, CHARSET_ASCII) + return 0; +} + +#define HAVE_ENCODER_RESET +ENCODER_RESET(iso_2022_jp) +{ + if (STATE_GETG0(state) != CHARSET_ASCII) { + RESERVE_OUTBUF(3) + WRITE3(ESC, '(', 'B') + STATE_SETG0(state, CHARSET_ASCII) + NEXT_OUT(3) + } + return 0; +} + +/* ISO-2022-JP changes designations instead of shifting-out */ + +ENCODER(iso_2022_jp) +{ + while (inleft > 0) { + Py_UNICODE c = **inbuf; + DBCHAR code; + + if (c < 0x80) { + switch (STATE_GETG0(state)) { + case CHARSET_ASCII: + WRITE1(c) + NEXT(1, 1) + break; + case CHARSET_JISX0201_R: + JISX0201_R_ENCODE(c, code) + else { /* FALLTHROUGH (yay!) */ + default: + WRITE3(ESC, '(', 'B') + NEXT_OUT(3) + STATE_SETG0(state, CHARSET_ASCII) + code = c; + } + WRITE1(code) + NEXT(1, 1) + break; + } + if (c == '\n') + STATE_CLEARFLAG(state, F_SHIFTED) + } else UCS4INVALID(c) + else { + unsigned char charset; + + charset = STATE_GETG0(state); + if (charset == CHARSET_JISX0201_R) { + code = DBCINV; + JISX0201_R_ENCODE(c, code) + if (code != DBCINV) { + WRITE1(code) + NEXT(1, 1) + continue; + } + } + + TRYMAP_ENC(jisxcommon, code, c) { + if (code & 0x8000) /* MSB set: JIS X 0212 */ + return 1; + if (charset != CHARSET_JISX0208) { + WRITE3(ESC, '$', 'B') + STATE_SETG0(state, CHARSET_JISX0208) + NEXT_OUT(3) + } + WRITE2(code >> 8, code & 0xff) + NEXT(1, 2) + } else { + JISX0201_R_ENCODE(c, code) + else + return 1; + /* if (charset == CHARSET_JISX0201_R) : already checked */ + WRITE4(ESC, '(', 'J', code) + STATE_SETG0(state, CHARSET_JISX0201_R) + NEXT(1, 4) + } + } + } + + return 0; +} + +#define HAVE_DECODER_INIT +DECODER_INIT(iso_2022_jp) +{ + state->i = 0; + STATE_SETG0(state, CHARSET_ASCII) + STATE_SETG1(state, CHARSET_ASCII) + return 0; +} + +#define HAVE_DECODER_RESET +DECODER_RESET(iso_2022_jp) +{ + STATE_CLEARFLAG(state, F_SHIFTED) + return 0; +} + +DECODER(iso_2022_jp) +{ + while (inleft > 0) { + unsigned char c = **inbuf; + + if (STATE_GETFLAG(state, F_ESCTHROUGHOUT)) { + /* ESC throughout mode: for non-iso2022 escape sequences */ + RESERVE_OUTBUF(1) + **outbuf = c; /* assume as ISO-8859-1 */ + NEXT(1, 1) + if (IS_ESCEND(c)) { + STATE_CLEARFLAG(state, F_ESCTHROUGHOUT) + } + continue; + } + + switch (c) { + case ESC: + RESERVE_INBUF(2) + if (IS_ISO2022ESC((*inbuf)[1])) { + int eslen; + + eslen = iso2022esclen(*inbuf, inleft); + if (eslen < 0) + return eslen == MBERR_INTERNAL ? 1 : eslen; + + if (eslen == 3) { + unsigned char charset; + + if ((*inbuf)[1] == '$') { + if ((*inbuf)[2] == '@' || (*inbuf)[2] == 'B') { + charset = (*inbuf)[2] | CHARSET_DOUBLEBYTE; + STATE_SETG0(state, charset); + } else + return 3; + } else { + if ((*inbuf)[2] == 'B' || (*inbuf)[2] == 'J') + charset = (*inbuf)[2]; + else + return 3; + + if ((*inbuf)[1] == '(') { + STATE_SETG0(state, charset) + } else if ((*inbuf)[1] == ')') { + STATE_SETG1(state, charset) + } else + return 3; + } + } else + return eslen; + NEXT_IN(eslen) + } else { + STATE_SETFLAG(state, F_ESCTHROUGHOUT) + **outbuf = ESC; + NEXT(1, 1) + } + break; + case SI: + STATE_CLEARFLAG(state, F_SHIFTED) + NEXT_IN(1) + break; + case SO: + STATE_SETFLAG(state, F_SHIFTED) + NEXT_IN(1) + break; + case '\n': + STATE_CLEARFLAG(state, F_SHIFTED) + /* FALLTHROUGH */ + case SP: /* FALLTHROUGH */ + case DEL: + RESERVE_OUTBUF(1) + **outbuf = c; + NEXT(1, 1) + break; + default: + if ((c & 0x7f) < 0x20) { /* C0 and C1 */ + RESERVE_OUTBUF(1) + **outbuf = c & 0x7f; + NEXT(1, 1) + } else { + unsigned char charset; + + if (!STATE_GETFLAG(state, F_SHIFTED) && c < 0x80) /* G0 */ + charset = STATE_GETG0(state); + else /* G1 */ + charset = STATE_GETG1(state); + + if (charset & CHARSET_DOUBLEBYTE) { + /* all double byte character sets are in JIS X 0208 here. + * this means that we don't distinguish :1978 from :1983. */ + RESERVE_INBUF(2) + RESERVE_OUTBUF(1) + TRYMAP_DEC(jisx0208, **outbuf, c & 0x7f, + (*inbuf)[1] & 0x7f) { + NEXT(2, 1) + } else + return 2; + } else if (charset == CHARSET_ASCII) { + RESERVE_OUTBUF(1) + **outbuf = c & 0x7f; + NEXT(1, 1) + } else if (charset == CHARSET_JISX0201_R) { + RESERVE_OUTBUF(1) + JISX0201_R_DECODE(c & 0x7f, **outbuf) + else + return 1; + NEXT(1, 1) + } else + return MBERR_INTERNAL; + } + } + } + + return 0; +} + +#include "codecentry.h" +BEGIN_CODEC_REGISTRY(iso_2022_jp) + MAPOPEN(ja_JP) + IMPORTMAP_DEC(jisx0208) + IMPORTMAP_ENC(jisxcommon) + MAPCLOSE() +END_CODEC_REGISTRY(iso_2022_jp) + +/* + * ex: ts=8 sts=4 et + */ 1.4 +295 -295 cjkcodecs/src/_iso_2022_jp_1.c Index: _iso_2022_jp_1.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/_iso_2022_jp_1.c,v retrieving revision 1.3 retrieving revision 1.4 diff -u -r1.3 -r1.4 --- _iso_2022_jp_1.c 5 Jun 2003 09:56:22 -0000 1.3 +++ _iso_2022_jp_1.c 6 Jul 2003 10:10:31 -0000 1.4 @@ -1,295 +1,295 @@ -/* - * _iso_2022_jp_1.c: the ISO-2022-JP-1 codec (RFC2237) - * - * Copyright (C) 2003 Hye-Shik Chang <pe...@Fr...>. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, - * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING - * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * $Id: _iso_2022_jp_1.c,v 1.3 2003/06/05 09:56:22 perky Exp $ - */ - -#include "codeccommon.h" -#include "iso2022common.h" -#include "maps/alg_jisx0201.h" - -ENCMAP(jisxcommon) -DECMAP(jisx0208) -DECMAP(jisx0212) - -#define HAVE_ENCODER_INIT -ENCODER_INIT(iso_2022_jp_1) -{ - state->i = 0; - STATE_SETG0(state, CHARSET_ASCII) - STATE_SETG1(state, CHARSET_ASCII) - return 0; -} - -#define HAVE_ENCODER_RESET -ENCODER_RESET(iso_2022_jp_1) -{ - if (STATE_GETG0(state) != CHARSET_ASCII) { - RESERVE_OUTBUF(3) - PAVE3(ESC, '(', 'B') - STATE_SETG0(state, CHARSET_ASCII) - NEXT_OUT(3) - } - return 0; -} - -/* ISO-2022-JP-1 changes designations instead of shifting-out */ - -ENCODER(iso_2022_jp_1) -{ - while (inleft > 0) { - Py_UNICODE c = **inbuf; - DBCHAR code; - - if (c < 0x80) { - switch (STATE_GETG0(state)) { - case CHARSET_ASCII: - PAVE1(c) - NEXT(1, 1) - break; - case CHARSET_JISX0201_R: - JISX0201_R_ENCODE(c, code) - else { /* FALLTHROUGH (yay!) */ - default: - PAVE3(ESC, '(', 'B') - NEXT_OUT(3) - STATE_SETG0(state, CHARSET_ASCII) - code = c; - } - PAVE1(code) - NEXT(1, 1) - break; - } - if (c == '\n') - STATE_CLEARFLAG(state, F_SHIFTED) - } else UCS4INVALID(c) - else { - unsigned char charset; - - charset = STATE_GETG0(state); - if (charset == CHARSET_JISX0201_R) { - code = DBCINV; - JISX0201_R_ENCODE(c, code) - if (code != DBCINV) { - PAVE1(code) - NEXT(1, 1) - continue; - } - } - - TRYMAP_ENC(jisxcommon, code, c) { - if (code & 0x8000) { /* MSB set: JIS X 0212 */ - if (charset != CHARSET_JISX0212) { - PAVE4(ESC, '$', '(', 'D') - STATE_SETG0(state, CHARSET_JISX0212) - NEXT_OUT(4) - } - PAVE2((code >> 8) & 0x7f, code & 0x7f) - } else { /* MSB unset: JIS X 0208 */ - if (charset != CHARSET_JISX0208) { - PAVE3(ESC, '$', 'B') - STATE_SETG0(state, CHARSET_JISX0208) - NEXT_OUT(3) - } - PAVE2(code >> 8, code & 0xff) - } - NEXT(1, 2) - } else { - JISX0201_R_ENCODE(c, code) - else - return 1; - /* if (charset == CHARSET_JISX0201_R) : already checked */ - PAVE4(ESC, '(', 'J', code) - STATE_SETG0(state, CHARSET_JISX0201_R) - NEXT(1, 4) - } - } - } - - return 0; -} - -#define HAVE_DECODER_INIT -DECODER_INIT(iso_2022_jp_1) -{ - state->i = 0; - STATE_SETG0(state, CHARSET_ASCII) - STATE_SETG1(state, CHARSET_ASCII) - return 0; -} - -#define HAVE_DECODER_RESET -DECODER_RESET(iso_2022_jp_1) -{ - STATE_CLEARFLAG(state, F_SHIFTED) - return 0; -} - -DECODER(iso_2022_jp_1) -{ - while (inleft > 0) { - unsigned char c = **inbuf; - - if (STATE_GETFLAG(state, F_ESCTHROUGHOUT)) { - /* ESC throughout mode: for non-iso2022 escape sequences */ - RESERVE_OUTBUF(1) - **outbuf = c; /* assume as ISO-8859-1 */ - NEXT(1, 1) - if (IS_ESCEND(c)) { - STATE_CLEARFLAG(state, F_ESCTHROUGHOUT) - } - continue; - } - - switch (c) { - case ESC: - RESERVE_INBUF(2) - if (IS_ISO2022ESC((*inbuf)[1])) { - int eslen; - - eslen = iso2022esclen(*inbuf, inleft); - if (eslen < 0) - return eslen == MBERR_INTERNAL ? 1 : eslen; - - if (eslen == 3) { - unsigned char charset; - - if ((*inbuf)[1] == '$') { - if ((*inbuf)[2] == '@' || (*inbuf)[2] == 'B') { - charset = (*inbuf)[2] | CHARSET_DOUBLEBYTE; - STATE_SETG0(state, charset); - } else - return 3; - } else { - if ((*inbuf)[2] == 'B' || (*inbuf)[2] == 'J') - charset = (*inbuf)[2]; - else - return 3; - - if ((*inbuf)[1] == '(') { - STATE_SETG0(state, charset) - } else if ((*inbuf)[1] == ')') { - STATE_SETG1(state, charset) - } else - return 3; - } - } else if (eslen == 4) { - if ((*inbuf)[1] == '$' && (*inbuf)[3] == 'D') { - if ((*inbuf)[2] == '(') { - STATE_SETG0(state, CHARSET_JISX0212) - } else if ((*inbuf)[2] == ')') { - STATE_SETG1(state, CHARSET_JISX0212) - } else - return 4; - } else - return 4; - } else - return eslen; - NEXT_IN(eslen) - } else { - STATE_SETFLAG(state, F_ESCTHROUGHOUT) - **outbuf = ESC; - NEXT(1, 1) - } - break; - case SI: - STATE_CLEARFLAG(state, F_SHIFTED) - NEXT_IN(1) - break; - case SO: - STATE_SETFLAG(state, F_SHIFTED) - NEXT_IN(1) - break; - case '\n': - STATE_CLEARFLAG(state, F_SHIFTED) - /* FALLTHROUGH */ - case SP: /* FALLTHROUGH */ - case DEL: - RESERVE_OUTBUF(1) - **outbuf = c; - NEXT(1, 1) - break; - default: - if ((c & 0x7f) < 0x20) { /* C0 and C1 */ - RESERVE_OUTBUF(1) - **outbuf = c & 0x7f; - NEXT(1, 1) - } else { - unsigned char charset; - - if (!STATE_GETFLAG(state, F_SHIFTED) && c < 0x80) /* G0 */ - charset = STATE_GETG0(state); - else /* G1 */ - charset = STATE_GETG1(state); - - if (charset & CHARSET_DOUBLEBYTE) { - RESERVE_INBUF(2) - RESERVE_OUTBUF(1) - if (charset == CHARSET_JISX0208 || - charset == CHARSET_JISX0208_O) { - TRYMAP_DEC(jisx0208, **outbuf, c & 0x7f, - (*inbuf)[1] & 0x7f); - else return 2; - } else if (charset == CHARSET_JISX0212) { - TRYMAP_DEC(jisx0212, **outbuf, c & 0x7f, - (*inbuf)[1] & 0x7f); - else return 2; - } else - return MBERR_INTERNAL; - NEXT(2, 1) - } else if (charset == CHARSET_ASCII) { - RESERVE_OUTBUF(1) - **outbuf = c & 0x7f; - NEXT(1, 1) - } else if (charset == CHARSET_JISX0201_R) { - RESERVE_OUTBUF(1) - JISX0201_R_DECODE(c & 0x7f, **outbuf) - else - return 1; - NEXT(1, 1) - } else - return MBERR_INTERNAL; - } - } - } - - return 0; -} - -#include "codecentry.h" -BEGIN_CODEC_REGISTRY(iso_2022_jp_1) - MAPOPEN(ja_JP) - IMPORTMAP_DEC(jisx0208) - IMPORTMAP_DEC(jisx0212) - IMPORTMAP_ENC(jisxcommon) - MAPCLOSE() -END_CODEC_REGISTRY(iso_2022_jp_1) - -/* - * ex: ts=8 sts=4 et - */ +/* + * _iso_2022_jp_1.c: the ISO-2022-JP-1 codec (RFC2237) + * + * Copyright (C) 2003 Hye-Shik Chang <pe...@Fr...>. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * $Id: _iso_2022_jp_1.c,v 1.4 2003/07/06 10:10:31 perky Exp $ + */ + +#include "codeccommon.h" +#include "iso2022common.h" +#include "maps/alg_jisx0201.h" + +ENCMAP(jisxcommon) +DECMAP(jisx0208) +DECMAP(jisx0212) + +#define HAVE_ENCODER_INIT +ENCODER_INIT(iso_2022_jp_1) +{ + state->i = 0; + STATE_SETG0(state, CHARSET_ASCII) + STATE_SETG1(state, CHARSET_ASCII) + return 0; +} + +#define HAVE_ENCODER_RESET +ENCODER_RESET(iso_2022_jp_1) +{ + if (STATE_GETG0(state) != CHARSET_ASCII) { + RESERVE_OUTBUF(3) + WRITE3(ESC, '(', 'B') + STATE_SETG0(state, CHARSET_ASCII) + NEXT_OUT(3) + } + return 0; +} + +/* ISO-2022-JP-1 changes designations instead of shifting-out */ + +ENCODER(iso_2022_jp_1) +{ + while (inleft > 0) { + Py_UNICODE c = **inbuf; + DBCHAR code; + + if (c < 0x80) { + switch (STATE_GETG0(state)) { + case CHARSET_ASCII: + WRITE1(c) + NEXT(1, 1) + break; + case CHARSET_JISX0201_R: + JISX0201_R_ENCODE(c, code) + else { /* FALLTHROUGH (yay!) */ + default: + WRITE3(ESC, '(', 'B') + NEXT_OUT(3) + STATE_SETG0(state, CHARSET_ASCII) + code = c; + } + WRITE1(code) + NEXT(1, 1) + break; + } + if (c == '\n') + STATE_CLEARFLAG(state, F_SHIFTED) + } else UCS4INVALID(c) + else { + unsigned char charset; + + charset = STATE_GETG0(state); + if (charset == CHARSET_JISX0201_R) { + code = DBCINV; + JISX0201_R_ENCODE(c, code) + if (code != DBCINV) { + WRITE1(code) + NEXT(1, 1) + continue; + } + } + + TRYMAP_ENC(jisxcommon, code, c) { + if (code & 0x8000) { /* MSB set: JIS X 0212 */ + if (charset != CHARSET_JISX0212) { + WRITE4(ESC, '$', '(', 'D') + STATE_SETG0(state, CHARSET_JISX0212) + NEXT_OUT(4) + } + WRITE2((code >> 8) & 0x7f, code & 0x7f) + } else { /* MSB unset: JIS X 0208 */ + if (charset != CHARSET_JISX0208) { + WRITE3(ESC, '$', 'B') + STATE_SETG0(state, CHARSET_JISX0208) + NEXT_OUT(3) + } + WRITE2(code >> 8, code & 0xff) + } + NEXT(1, 2) + } else { + JISX0201_R_ENCODE(c, code) + else + return 1; + /* if (charset == CHARSET_JISX0201_R) : already checked */ + WRITE4(ESC, '(', 'J', code) + STATE_SETG0(state, CHARSET_JISX0201_R) + NEXT(1, 4) + } + } + } + + return 0; +} + +#define HAVE_DECODER_INIT +DECODER_INIT(iso_2022_jp_1) +{ + state->i = 0; + STATE_SETG0(state, CHARSET_ASCII) + STATE_SETG1(state, CHARSET_ASCII) + return 0; +} + +#define HAVE_DECODER_RESET +DECODER_RESET(iso_2022_jp_1) +{ + STATE_CLEARFLAG(state, F_SHIFTED) + return 0; +} + +DECODER(iso_2022_jp_1) +{ + while (inleft > 0) { + unsigned char c = **inbuf; + + if (STATE_GETFLAG(state, F_ESCTHROUGHOUT)) { + /* ESC throughout mode: for non-iso2022 escape sequences */ + RESERVE_OUTBUF(1) + **outbuf = c; /* assume as ISO-8859-1 */ + NEXT(1, 1) + if (IS_ESCEND(c)) { + STATE_CLEARFLAG(state, F_ESCTHROUGHOUT) + } + continue; + } + + switch (c) { + case ESC: + RESERVE_INBUF(2) + if (IS_ISO2022ESC((*inbuf)[1])) { + int eslen; + + eslen = iso2022esclen(*inbuf, inleft); + if (eslen < 0) + return eslen == MBERR_INTERNAL ? 1 : eslen; + + if (eslen == 3) { + unsigned char charset; + + if ((*inbuf)[1] == '$') { + if ((*inbuf)[2] == '@' || (*inbuf)[2] == 'B') { + charset = (*inbuf)[2] | CHARSET_DOUBLEBYTE; + STATE_SETG0(state, charset); + } else + return 3; + } else { + if ((*inbuf)[2] == 'B' || (*inbuf)[2] == 'J') + charset = (*inbuf)[2]; + else + return 3; + + if ((*inbuf)[1] == '(') { + STATE_SETG0(state, charset) + } else if ((*inbuf)[1] == ')') { + STATE_SETG1(state, charset) + } else + return 3; + } + } else if (eslen == 4) { + if ((*inbuf)[1] == '$' && (*inbuf)[3] == 'D') { + if ((*inbuf)[2] == '(') { + STATE_SETG0(state, CHARSET_JISX0212) + } else if ((*inbuf)[2] == ')') { + STATE_SETG1(state, CHARSET_JISX0212) + } else + return 4; + } else + return 4; + } else + return eslen; + NEXT_IN(eslen) + } else { + STATE_SETFLAG(state, F_ESCTHROUGHOUT) + **outbuf = ESC; + NEXT(1, 1) + } + break; + case SI: + STATE_CLEARFLAG(state, F_SHIFTED) + NEXT_IN(1) + break; + case SO: + STATE_SETFLAG(state, F_SHIFTED) + NEXT_IN(1) + break; + case '\n': + STATE_CLEARFLAG(state, F_SHIFTED) + /* FALLTHROUGH */ + case SP: /* FALLTHROUGH */ + case DEL: + RESERVE_OUTBUF(1) + **outbuf = c; + NEXT(1, 1) + break; + default: + if ((c & 0x7f) < 0x20) { /* C0 and C1 */ + RESERVE_OUTBUF(1) + **outbuf = c & 0x7f; + NEXT(1, 1) + } else { + unsigned char charset; + + if (!STATE_GETFLAG(state, F_SHIFTED) && c < 0x80) /* G0 */ + charset = STATE_GETG0(state); + else /* G1 */ + charset = STATE_GETG1(state); + + if (charset & CHARSET_DOUBLEBYTE) { + RESERVE_INBUF(2) + RESERVE_OUTBUF(1) + if (charset == CHARSET_JISX0208 || + charset == CHARSET_JISX0208_O) { + TRYMAP_DEC(jisx0208, **outbuf, c & 0x7f, + (*inbuf)[1] & 0x7f); + else return 2; + } else if (charset == CHARSET_JISX0212) { + TRYMAP_DEC(jisx0212, **outbuf, c & 0x7f, + (*inbuf)[1] & 0x7f); + else return 2; + } else + return MBERR_INTERNAL; + NEXT(2, 1) + } else if (charset == CHARSET_ASCII) { + RESERVE_OUTBUF(1) + **outbuf = c & 0x7f; + NEXT(1, 1) + } else if (charset == CHARSET_JISX0201_R) { + RESERVE_OUTBUF(1) + JISX0201_R_DECODE(c & 0x7f, **outbuf) + else + return 1; + NEXT(1, 1) + } else + return MBERR_INTERNAL; + } + } + } + + return 0; +} + +#include "codecentry.h" +BEGIN_CODEC_REGISTRY(iso_2022_jp_1) + MAPOPEN(ja_JP) + IMPORTMAP_DEC(jisx0208) + IMPORTMAP_DEC(jisx0212) + IMPORTMAP_ENC(jisxcommon) + MAPCLOSE() +END_CODEC_REGISTRY(iso_2022_jp_1) + +/* + * ex: ts=8 sts=4 et + */ 1.6 +235 -235 cjkcodecs/src/_iso_2022_kr.c Index: _iso_2022_kr.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/_iso_2022_kr.c,v retrieving revision 1.5 retrieving revision 1.6 diff -u -r1.5 -r1.6 --- _iso_2022_kr.c 2 Jun 2003 10:52:48 -0000 1.5 +++ _iso_2022_kr.c 6 Jul 2003 10:10:31 -0000 1.6 @@ -1,235 +1,235 @@ -/* - * _iso_2022_kr.c: the ISO-2022-KR codec (RFC1557) - * - * Copyright (C) 2003 Hye-Shik Chang <pe...@Fr...>. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, - * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING - * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * $Id: _iso_2022_kr.c,v 1.5 2003/06/02 10:52:48 perky Exp $ - */ - -#include "codeccommon.h" -#include "iso2022common.h" - -ENCMAP(cp949) -DECMAP(ksx1001) - -#define HAVE_ENCODER_INIT -ENCODER_INIT(iso_2022_kr) -{ - state->i = 0; - STATE_SETG0(state, CHARSET_ASCII) - STATE_SETG1(state, CHARSET_ASCII) - return 0; -} - -#define HAVE_ENCODER_RESET -ENCODER_RESET(iso_2022_kr) -{ - if (STATE_GETFLAG(state, F_SHIFTED)) { - RESERVE_OUTBUF(1) - **outbuf = SI; - NEXT_OUT(1) - STATE_CLEARFLAG(state, F_SHIFTED) - } - return 0; -} - -ENCODER(iso_2022_kr) -{ - while (inleft > 0) { - Py_UNICODE c = **inbuf; - DBCHAR code; - - if (c < 0x80) { - if (STATE_GETFLAG(state, F_SHIFTED)) { - PAVE2(SI, c) - STATE_CLEARFLAG(state, F_SHIFTED) - NEXT(1, 2) - } else { - PAVE1(c) - NEXT(1, 1) - } - if (c == '\n') - STATE_CLEARFLAG(state, F_SHIFTED) - } else UCS4INVALID(c) - else { - if (STATE_GETG1(state) != CHARSET_KSX1001) { - PAVE4(ESC, '$', ')', 'C') - STATE_SETG1(state, CHARSET_KSX1001) - NEXT_OUT(4) - } - - if (!STATE_GETFLAG(state, F_SHIFTED)) { - PAVE1(SO) - STATE_SETFLAG(state, F_SHIFTED) - NEXT_OUT(1) - } - - TRYMAP_ENC(cp949, code, c) { - if (code & 0x8000) /* MSB set: CP949 */ - return 1; - PAVE2(code >> 8, code & 0xff) - NEXT(1, 2) - } else - return 1; - } - } - - return 0; -} - -#define HAVE_DECODER_INIT -DECODER_INIT(iso_2022_kr) -{ - state->i = 0; - STATE_SETG0(state, CHARSET_ASCII) - STATE_SETG1(state, CHARSET_ASCII) - return 0; -} - -#define HAVE_DECODER_RESET -DECODER_RESET(iso_2022_kr) -{ - STATE_CLEARFLAG(state, F_SHIFTED) - return 0; -} - -DECODER(iso_2022_kr) -{ - while (inleft > 0) { - unsigned char c = **inbuf; - - if (STATE_GETFLAG(state, F_ESCTHROUGHOUT)) { - /* ESC throughout mode: for non-iso2022 escape sequences */ - RESERVE_OUTBUF(1) - **outbuf = c; /* assume as ISO-8859-1 */ - NEXT(1, 1) - if (IS_ESCEND(c)) { - STATE_CLEARFLAG(state, F_ESCTHROUGHOUT) - } - continue; - } - - switch (c) { - case ESC: - RESERVE_INBUF(2) - if (IS_ISO2022ESC((*inbuf)[1])) { - int eslen; - - eslen = iso2022esclen(*inbuf, inleft); - if (eslen < 0) - return eslen == MBERR_INTERNAL ? 1 : eslen; - - if (eslen == 3) { - if ((*inbuf)[2] == 'B') { /* ASCII */ - if ((*inbuf)[1] == '(') { - STATE_SETG0(state, CHARSET_ASCII) - } else if ((*inbuf)[1] == ')') { - STATE_SETG1(state, CHARSET_ASCII) - } else - return 3; - } else - return 3; - } else if (eslen == 4) { - if ((*inbuf)[1] == '$' && (*inbuf)[3] == 'C') { - /* KS X 1001 */ - if ((*inbuf)[2] == '(') { - STATE_SETG0(state, CHARSET_KSX1001) - } else if ((*inbuf)[2] == ')') { - STATE_SETG1(state, CHARSET_KSX1001) - } else - return 4; - } else - return 4; - } else - return eslen; - NEXT_IN(eslen) - } else { - ... [truncated message content] |