[KoCo-CVS] [Commit] cjkcodecs/src _iso_2022_jp_3.c codeccommon.h
Brought to you by:
perky
From: Hye-Shik C. <pe...@us...> - 2003-07-08 07:02:05
|
perky 03/07/08 00:02:04 Modified: src codeccommon.h Added: src _iso_2022_jp_3.c Log: Add the ISO-2022-JP-3 codec. Revision Changes Path 1.20 +11 -1 cjkcodecs/src/codeccommon.h Index: codeccommon.h =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/codeccommon.h,v retrieving revision 1.19 retrieving revision 1.20 diff -u -r1.19 -r1.20 --- codeccommon.h 8 Jul 2003 04:40:44 -0000 1.19 +++ codeccommon.h 8 Jul 2003 07:02:03 -0000 1.20 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: codeccommon.h,v 1.19 2003/07/08 04:40:44 perky Exp $ + * $Id: codeccommon.h,v 1.20 2003/07/08 07:02:03 perky Exp $ */ #include "Python.h" @@ -87,6 +87,16 @@ #define RESERVE_OUTBUF(n) \ if (outleft < (n)) \ return MBERR_TOOSMALL; + +#define IN1 ((*inbuf)[0]) +#define IN2 ((*inbuf)[1]) +#define IN3 ((*inbuf)[2]) +#define IN4 ((*inbuf)[3]) + +#define OUT1(c) ((*outbuf)[0]) = (c); +#define OUT2(c) ((*outbuf)[1]) = (c); +#define OUT3(c) ((*outbuf)[2]) = (c); +#define OUT4(c) ((*outbuf)[3]) = (c); #define WRITE1(c1) \ RESERVE_OUTBUF(1) \ 1.1 cjkcodecs/src/_iso_2022_jp_3.c Index: _iso_2022_jp_3.c =================================================================== /* * _iso_2022_jp_3.c: the ISO-2022-JP-3 codec (JIS X 0213) * * Copyright (C) 2003 Hye-Shik Chang <pe...@Fr...>. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * $Id: _iso_2022_jp_3.c,v 1.1 2003/07/08 07:02:03 perky Exp $ */ #define USING_BINARY_PAIR_SEARCH #include "codeccommon.h" #include "iso2022common.h" #include "maps/map_jisx0213_pairs.h" #include "maps/alg_jisx0201.h" ENCMAP(jisxcommon) DECMAP(jisx0208) DECMAP(jisx0212) ENCMAP(jisx0213_bmp) DECMAP(jisx0213_1_bmp) DECMAP(jisx0213_2_bmp) ENCMAP(jisx0213_emp) DECMAP(jisx0213_1_emp) DECMAP(jisx0213_2_emp) #define EMPBASE 0x20000 #define HAVE_ENCODER_INIT ENCODER_INIT(iso_2022_jp_3) { state->i = 0; STATE_SETG0(state, CHARSET_ASCII) STATE_SETG1(state, CHARSET_ASCII) return 0; } #define HAVE_ENCODER_RESET ENCODER_RESET(iso_2022_jp_3) { if (STATE_GETG0(state) != CHARSET_ASCII) { WRITE3(ESC, '(', 'B') STATE_SETG0(state, CHARSET_ASCII) NEXT_OUT(3) } return 0; } ENCODER(iso_2022_jp_3) { while (inleft > 0) { unsigned char charset; ucs4_t c = IN1; DBCHAR code; size_t insize = 1; if (c < 0x80) { switch (STATE_GETG0(state)) { case CHARSET_ASCII: WRITE1(c) NEXT(1, 1) break; default: WRITE4(ESC, '(', 'B', c) STATE_SETG0(state, CHARSET_ASCII) NEXT(1, 4) break; } if (c == '\n') STATE_CLEARFLAG(state, F_SHIFTED) continue; } #if Py_UNICODE_SIZE == 2 if (c >> 10 == 0xd800 >> 10) { /* high surrogate */ RESERVE_INBUF(2) if (IN2 >> 10 == 0xdc00 >> 10) { /* low surrogate */ c = 0x10000 + ((c - 0xd800) << 10) + ((ucs4_t)IN2 - 0xdc00); insize = 2; } } #endif if (c <= 0xffff) { TRYMAP_ENC(jisx0213_bmp, code, c) { if (code == MULTIC) { if (inleft < 2) { if (flags & MBENC_FLUSH) { code = find_pairencmap(c, 0, jisx0213_pairencmap, JISX0213_ENCPAIRS); if (code == DBCINV) return 1; } else return MBERR_TOOFEW; } else { code = find_pairencmap(c, IN2, jisx0213_pairencmap, JISX0213_ENCPAIRS); if (code == DBCINV) { code = find_pairencmap(c, 0, jisx0213_pairencmap, JISX0213_ENCPAIRS); if (code == DBCINV) return 1; } else insize = 2; } } } else if (c == 0xff3c) /* F/W REVERSE SOLIDUS */ code = 0x2140; else return 1; } else if (c >> 16 == EMPBASE >> 16) { TRYMAP_ENC(jisx0213_emp, code, c & 0xffff); else return insize; } else return insize; charset = STATE_GETG0(state); if (code & 0x8000) { /* MSB set: Plane 2 */ if (charset != CHARSET_JISX0213_2) { WRITE4(ESC, '$', '(', 'P') STATE_SETG0(state, CHARSET_JISX0213_2) NEXT_OUT(4) } WRITE2((code >> 8) & 0x7f, code & 0x7f) } else { /* MSB unset: Plane 1 */ if (charset != CHARSET_JISX0213_1) { WRITE4(ESC, '$', '(', 'O') STATE_SETG0(state, CHARSET_JISX0213_1) NEXT_OUT(4) } WRITE2(code >> 8, code & 0xff) } NEXT(insize, 2) } return 0; } #define HAVE_DECODER_INIT DECODER_INIT(iso_2022_jp_3) { state->i = 0; STATE_SETG0(state, CHARSET_ASCII) STATE_SETG1(state, CHARSET_ASCII) return 0; } #define HAVE_DECODER_RESET DECODER_RESET(iso_2022_jp_3) { STATE_CLEARFLAG(state, F_SHIFTED) return 0; } DECODER(iso_2022_jp_3) { while (inleft > 0) { unsigned char c = IN1; if (STATE_GETFLAG(state, F_ESCTHROUGHOUT)) { /* ESC throughout mode: for non-iso2022 escape sequences */ RESERVE_OUTBUF(1) OUT1(c) /* assume as ISO-8859-1 */ NEXT(1, 1) if (IS_ESCEND(c)) { STATE_CLEARFLAG(state, F_ESCTHROUGHOUT) } continue; } switch (c) { case ESC: RESERVE_INBUF(2) if (IS_ISO2022ESC(IN2)) { int eslen; unsigned char charset; eslen = iso2022esclen(*inbuf, inleft); if (eslen < 0) return eslen == MBERR_INTERNAL ? 1 : eslen; if (eslen == 3) { if (IN2 == '$') { if (IN3 == 'B') { charset = IN3 | CHARSET_DOUBLEBYTE; STATE_SETG0(state, charset); } else return 3; } else { if (IN3 == 'B') charset = IN3; else return 3; if (IN2 == '(') { STATE_SETG0(state, charset) } else if (IN2 == ')') { STATE_SETG1(state, charset) } else return 3; } } else if (eslen == 4) { if (IN2 == '$' && (IN4 == 'O' || IN4 == 'P')) { charset = IN4 | CHARSET_DOUBLEBYTE; if (IN3 == '(') { STATE_SETG0(state, charset) } else if (IN3 == ')') { STATE_SETG1(state, charset) } else return 4; } else return 4; } else return eslen; NEXT_IN(eslen) } else { STATE_SETFLAG(state, F_ESCTHROUGHOUT) OUT1(ESC) NEXT(1, 1) } break; case SI: STATE_CLEARFLAG(state, F_SHIFTED) NEXT_IN(1) break; case SO: STATE_SETFLAG(state, F_SHIFTED) NEXT_IN(1) break; case '\n': STATE_CLEARFLAG(state, F_SHIFTED) /* FALLTHROUGH */ case SP: /* FALLTHROUGH */ case DEL: RESERVE_OUTBUF(1) OUT1(c) NEXT(1, 1) break; default: if ((c & 0x7f) < 0x20) { /* C0 and C1 */ RESERVE_OUTBUF(1) OUT1(c & 0x7f) NEXT(1, 1) } else { unsigned char charset; ucs4_t code; if (!STATE_GETFLAG(state, F_SHIFTED) && c < 0x80) /* G0 */ charset = STATE_GETG0(state); else /* G1 */ charset = STATE_GETG1(state); if (charset & CHARSET_DOUBLEBYTE) { RESERVE_INBUF(2) RESERVE_OUTBUF(1) if (charset == CHARSET_JISX0213_1) { TRYMAP_DEC(jisx0213_1_bmp, **outbuf, c & 0x7f, IN2 & 0x7f); else TRYMAP_DEC(jisx0213_1_emp, code, c & 0x7f, IN2 & 0x7f) { PUTUCS4(EMPBASE | code) NEXT_IN(2) continue; } else return 2; } else if (charset == CHARSET_JISX0213_2) { TRYMAP_DEC(jisx0213_2_bmp, **outbuf, c & 0x7f, IN2 & 0x7f); else TRYMAP_DEC(jisx0213_2_emp, code, c & 0x7f, IN2 & 0x7f) { PUTUCS4(EMPBASE | code) NEXT_IN(2) continue; } else return 2; } else return MBERR_INTERNAL; NEXT(2, 1) } else if (charset == CHARSET_ASCII) { RESERVE_OUTBUF(1) OUT1(c & 0x7f) NEXT(1, 1) } else return MBERR_INTERNAL; } } } return 0; } #include "codecentry.h" BEGIN_CODEC_REGISTRY(iso_2022_jp_3) MAPOPEN(ja_JP) IMPORTMAP_DEC(jisx0208) IMPORTMAP_DEC(jisx0212) IMPORTMAP_ENC(jisxcommon) IMPORTMAP_ENC(jisx0213_bmp) IMPORTMAP_DEC(jisx0213_1_bmp) IMPORTMAP_DEC(jisx0213_2_bmp) IMPORTMAP_ENC(jisx0213_emp) IMPORTMAP_DEC(jisx0213_1_emp) IMPORTMAP_DEC(jisx0213_2_emp) MAPCLOSE() END_CODEC_REGISTRY(iso_2022_jp_3) /* * ex: ts=8 sts=4 et */ |