Thread: [KoCo-CVS] [Commit] cjkcodecs/src _iso_2022_jp.c _iso_2022_jp_1.c _iso_2022_jp_3.c _iso_2022_kr.c is
Brought to you by:
perky
From: Hye-Shik C. <pe...@us...> - 2003-07-09 18:47:49
|
perky 03/07/09 11:47:47 Modified: src _iso_2022_jp.c _iso_2022_jp_1.c _iso_2022_jp_3.c _iso_2022_kr.c iso2022common.h Log: Revamp iso-2022 decoder implementations. Revision Changes Path 1.7 +33 -112 cjkcodecs/src/_iso_2022_jp.c Index: _iso_2022_jp.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/_iso_2022_jp.c,v retrieving revision 1.6 retrieving revision 1.7 diff -u -r1.6 -r1.7 --- _iso_2022_jp.c 7 Jul 2003 08:26:19 -0000 1.6 +++ _iso_2022_jp.c 9 Jul 2003 18:47:47 -0000 1.7 @@ -26,9 +26,12 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: _iso_2022_jp.c,v 1.6 2003/07/07 08:26:19 perky Exp $ + * $Id: _iso_2022_jp.c,v 1.7 2003/07/09 18:47:47 perky Exp $ */ +#define ISO2022_DESIGNATIONS \ + CHARSET_ASCII, CHARSET_JISX0201_R, CHARSET_JISX0208, CHARSET_JISX0208_O + #include "codeccommon.h" #include "iso2022common.h" #include "maps/alg_jisx0201.h" @@ -39,7 +42,7 @@ #define HAVE_ENCODER_INIT ENCODER_INIT(iso_2022_jp) { - state->i = 0; + STATE_CLEARFLAGS(state) STATE_SETG0(state, CHARSET_ASCII) STATE_SETG1(state, CHARSET_ASCII) return 0; @@ -129,7 +132,7 @@ #define HAVE_DECODER_INIT DECODER_INIT(iso_2022_jp) { - state->i = 0; + STATE_CLEARFLAGS(state) STATE_SETG0(state, CHARSET_ASCII) STATE_SETG1(state, CHARSET_ASCII) return 0; @@ -144,118 +147,36 @@ DECODER(iso_2022_jp) { - while (inleft > 0) { - unsigned char c = **inbuf; - - if (STATE_GETFLAG(state, F_ESCTHROUGHOUT)) { - /* ESC throughout mode: for non-iso2022 escape sequences */ - RESERVE_OUTBUF(1) - **outbuf = c; /* assume as ISO-8859-1 */ - NEXT(1, 1) - if (IS_ESCEND(c)) { - STATE_CLEARFLAG(state, F_ESCTHROUGHOUT) - } - continue; - } + ISO2022_LOOP_BEGIN + unsigned char charset, c2; - switch (c) { - case ESC: - RESERVE_INBUF(2) - if (IS_ISO2022ESC((*inbuf)[1])) { - int eslen; - - eslen = iso2022esclen(*inbuf, inleft); - if (eslen < 0) - return eslen == MBERR_INTERNAL ? 1 : eslen; - - if (eslen == 3) { - unsigned char charset; - - if ((*inbuf)[1] == '$') { - if ((*inbuf)[2] == '@' || (*inbuf)[2] == 'B') { - charset = (*inbuf)[2] | CHARSET_DOUBLEBYTE; - STATE_SETG0(state, charset); - } else - return 3; - } else { - if ((*inbuf)[2] == 'B' || (*inbuf)[2] == 'J') - charset = (*inbuf)[2]; - else - return 3; - - if ((*inbuf)[1] == '(') { - STATE_SETG0(state, charset) - } else if ((*inbuf)[1] == ')') { - STATE_SETG1(state, charset) - } else - return 3; - } - } else - return eslen; - NEXT_IN(eslen) - } else { - STATE_SETFLAG(state, F_ESCTHROUGHOUT) - **outbuf = ESC; - NEXT(1, 1) - } - break; - case SI: - STATE_CLEARFLAG(state, F_SHIFTED) - NEXT_IN(1) - break; - case SO: - STATE_SETFLAG(state, F_SHIFTED) - NEXT_IN(1) - break; - case '\n': - STATE_CLEARFLAG(state, F_SHIFTED) - /* FALLTHROUGH */ - case SP: /* FALLTHROUGH */ - case DEL: - RESERVE_OUTBUF(1) - **outbuf = c; - NEXT(1, 1) - break; - default: - if ((c & 0x7f) < 0x20) { /* C0 and C1 */ - RESERVE_OUTBUF(1) - **outbuf = c & 0x7f; - NEXT(1, 1) - } else { - unsigned char charset; + ISO2022_GETCHARSET(charset, c, c2) - if (!STATE_GETFLAG(state, F_SHIFTED) && c < 0x80) /* G0 */ - charset = STATE_GETG0(state); - else /* G1 */ - charset = STATE_GETG1(state); - - if (charset & CHARSET_DOUBLEBYTE) { - /* all double byte character sets are in JIS X 0208 here. - * this means that we don't distinguish :1978 from :1983. */ - RESERVE_INBUF(2) - RESERVE_OUTBUF(1) - TRYMAP_DEC(jisx0208, **outbuf, c & 0x7f, - (*inbuf)[1] & 0x7f) { - NEXT(2, 1) - } else - return 2; - } else if (charset == CHARSET_ASCII) { - RESERVE_OUTBUF(1) - **outbuf = c & 0x7f; - NEXT(1, 1) - } else if (charset == CHARSET_JISX0201_R) { - RESERVE_OUTBUF(1) - JISX0201_R_DECODE(c & 0x7f, **outbuf) - else - return 1; - NEXT(1, 1) - } else - return MBERR_INTERNAL; - } - } - } + if (charset & CHARSET_DOUBLEBYTE) { + /* all double byte character sets are in JIS X 0208 here. + * this means that we don't distinguish :1978 from :1983. */ + RESERVE_INBUF(2) + RESERVE_OUTBUF(1) + c2 &= IN2; + TRYMAP_DEC(jisx0208, **outbuf, c, c2) { + NEXT(2, 1) + } else + return 2; + } else if (charset == CHARSET_ASCII) { + RESERVE_OUTBUF(1) + OUT1(c) + NEXT(1, 1) + } else if (charset == CHARSET_JISX0201_R) { + RESERVE_OUTBUF(1) + JISX0201_R_DECODE(c, **outbuf) + else + return 1; + NEXT(1, 1) + } else + return MBERR_INTERNAL; + ISO2022_LOOP_END - return 0; + return 0; } #include "codecentry.h" 1.6 +37 -127 cjkcodecs/src/_iso_2022_jp_1.c Index: _iso_2022_jp_1.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/_iso_2022_jp_1.c,v retrieving revision 1.5 retrieving revision 1.6 diff -u -r1.5 -r1.6 --- _iso_2022_jp_1.c 7 Jul 2003 08:26:19 -0000 1.5 +++ _iso_2022_jp_1.c 9 Jul 2003 18:47:47 -0000 1.6 @@ -26,9 +26,13 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: _iso_2022_jp_1.c,v 1.5 2003/07/07 08:26:19 perky Exp $ + * $Id: _iso_2022_jp_1.c,v 1.6 2003/07/09 18:47:47 perky Exp $ */ +#define ISO2022_DESIGNATIONS \ + CHARSET_ASCII, CHARSET_JISX0201_R, CHARSET_JISX0208, \ + CHARSET_JISX0208_O, CHARSET_JISX0212 + #include "codeccommon.h" #include "iso2022common.h" #include "maps/alg_jisx0201.h" @@ -40,7 +44,7 @@ #define HAVE_ENCODER_INIT ENCODER_INIT(iso_2022_jp_1) { - state->i = 0; + STATE_CLEARFLAGS(state) STATE_SETG0(state, CHARSET_ASCII) STATE_SETG1(state, CHARSET_ASCII) return 0; @@ -137,7 +141,7 @@ #define HAVE_DECODER_INIT DECODER_INIT(iso_2022_jp_1) { - state->i = 0; + STATE_CLEARFLAGS(state) STATE_SETG0(state, CHARSET_ASCII) STATE_SETG1(state, CHARSET_ASCII) return 0; @@ -152,133 +156,39 @@ DECODER(iso_2022_jp_1) { - while (inleft > 0) { - unsigned char c = **inbuf; + ISO2022_LOOP_BEGIN + unsigned char charset, c2; - if (STATE_GETFLAG(state, F_ESCTHROUGHOUT)) { - /* ESC throughout mode: for non-iso2022 escape sequences */ - RESERVE_OUTBUF(1) - **outbuf = c; /* assume as ISO-8859-1 */ - NEXT(1, 1) - if (IS_ESCEND(c)) { - STATE_CLEARFLAG(state, F_ESCTHROUGHOUT) - } - continue; - } + ISO2022_GETCHARSET(charset, c, c2) - switch (c) { - case ESC: - RESERVE_INBUF(2) - if (IS_ISO2022ESC((*inbuf)[1])) { - int eslen; - - eslen = iso2022esclen(*inbuf, inleft); - if (eslen < 0) - return eslen == MBERR_INTERNAL ? 1 : eslen; - - if (eslen == 3) { - unsigned char charset; - - if ((*inbuf)[1] == '$') { - if ((*inbuf)[2] == '@' || (*inbuf)[2] == 'B') { - charset = (*inbuf)[2] | CHARSET_DOUBLEBYTE; - STATE_SETG0(state, charset); - } else - return 3; - } else { - if ((*inbuf)[2] == 'B' || (*inbuf)[2] == 'J') - charset = (*inbuf)[2]; - else - return 3; - - if ((*inbuf)[1] == '(') { - STATE_SETG0(state, charset) - } else if ((*inbuf)[1] == ')') { - STATE_SETG1(state, charset) - } else - return 3; - } - } else if (eslen == 4) { - if ((*inbuf)[1] == '$' && (*inbuf)[3] == 'D') { - if ((*inbuf)[2] == '(') { - STATE_SETG0(state, CHARSET_JISX0212) - } else if ((*inbuf)[2] == ')') { - STATE_SETG1(state, CHARSET_JISX0212) - } else - return 4; - } else - return 4; - } else - return eslen; - NEXT_IN(eslen) - } else { - STATE_SETFLAG(state, F_ESCTHROUGHOUT) - **outbuf = ESC; - NEXT(1, 1) - } - break; - case SI: - STATE_CLEARFLAG(state, F_SHIFTED) - NEXT_IN(1) - break; - case SO: - STATE_SETFLAG(state, F_SHIFTED) - NEXT_IN(1) - break; - case '\n': - STATE_CLEARFLAG(state, F_SHIFTED) - /* FALLTHROUGH */ - case SP: /* FALLTHROUGH */ - case DEL: - RESERVE_OUTBUF(1) - **outbuf = c; - NEXT(1, 1) - break; - default: - if ((c & 0x7f) < 0x20) { /* C0 and C1 */ - RESERVE_OUTBUF(1) - **outbuf = c & 0x7f; - NEXT(1, 1) - } else { - unsigned char charset; - - if (!STATE_GETFLAG(state, F_SHIFTED) && c < 0x80) /* G0 */ - charset = STATE_GETG0(state); - else /* G1 */ - charset = STATE_GETG1(state); - - if (charset & CHARSET_DOUBLEBYTE) { - RESERVE_INBUF(2) - RESERVE_OUTBUF(1) - if (charset == CHARSET_JISX0208 || - charset == CHARSET_JISX0208_O) { - TRYMAP_DEC(jisx0208, **outbuf, c & 0x7f, - (*inbuf)[1] & 0x7f); - else return 2; - } else if (charset == CHARSET_JISX0212) { - TRYMAP_DEC(jisx0212, **outbuf, c & 0x7f, - (*inbuf)[1] & 0x7f); - else return 2; - } else - return MBERR_INTERNAL; - NEXT(2, 1) - } else if (charset == CHARSET_ASCII) { - RESERVE_OUTBUF(1) - **outbuf = c & 0x7f; - NEXT(1, 1) - } else if (charset == CHARSET_JISX0201_R) { - RESERVE_OUTBUF(1) - JISX0201_R_DECODE(c & 0x7f, **outbuf) - else - return 1; - NEXT(1, 1) - } else - return MBERR_INTERNAL; - } - } - } + if (charset & CHARSET_DOUBLEBYTE) { + RESERVE_INBUF(2) + RESERVE_OUTBUF(1) + c2 &= IN2; + if (charset == CHARSET_JISX0208 || charset == CHARSET_JISX0208_O) { + TRYMAP_DEC(jisx0208, **outbuf, c, c2); + else return 2; + } else if (charset == CHARSET_JISX0212) { + TRYMAP_DEC(jisx0212, **outbuf, c, c2); + else return 2; + } else + return MBERR_INTERNAL; + NEXT(2, 1) + } else if (charset == CHARSET_ASCII) { + RESERVE_OUTBUF(1) + OUT1(c) + NEXT(1, 1) + } else if (charset == CHARSET_JISX0201_R) { + RESERVE_OUTBUF(1) + JISX0201_R_DECODE(c, **outbuf) + else + return 1; + NEXT(1, 1) + } else + return MBERR_INTERNAL; + ISO2022_LOOP_END - return 0; + return 0; } #include "codecentry.h" 1.5 +39 -132 cjkcodecs/src/_iso_2022_jp_3.c Index: _iso_2022_jp_3.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/_iso_2022_jp_3.c,v retrieving revision 1.4 retrieving revision 1.5 diff -u -r1.4 -r1.5 --- _iso_2022_jp_3.c 8 Jul 2003 08:47:02 -0000 1.4 +++ _iso_2022_jp_3.c 9 Jul 2003 18:47:47 -0000 1.5 @@ -26,10 +26,12 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: _iso_2022_jp_3.c,v 1.4 2003/07/08 08:47:02 perky Exp $ + * $Id: _iso_2022_jp_3.c,v 1.5 2003/07/09 18:47:47 perky Exp $ */ #define USING_BINARY_PAIR_SEARCH +#define ISO2022_DESIGNATIONS \ + CHARSET_ASCII, CHARSET_JISX0208, CHARSET_JISX0213_1, CHARSET_JISX0213_2 #include "codeccommon.h" #include "iso2022common.h" #include "maps/map_jisx0213_pairs.h" @@ -49,7 +51,7 @@ #define HAVE_ENCODER_INIT ENCODER_INIT(iso_2022_jp_3) { - state->i = 0; + STATE_CLEARFLAGS(state) STATE_SETG0(state, CHARSET_ASCII) STATE_SETG1(state, CHARSET_ASCII) return 0; @@ -163,7 +165,7 @@ #define HAVE_DECODER_INIT DECODER_INIT(iso_2022_jp_3) { - state->i = 0; + STATE_CLEARFLAGS(state) STATE_SETG0(state, CHARSET_ASCII) STATE_SETG1(state, CHARSET_ASCII) return 0; @@ -178,141 +180,46 @@ DECODER(iso_2022_jp_3) { - while (inleft > 0) { - unsigned char c = IN1; + ISO2022_LOOP_BEGIN + unsigned char charset, c2; + ucs4_t code; - if (STATE_GETFLAG(state, F_ESCTHROUGHOUT)) { - /* ESC throughout mode: for non-iso2022 escape sequences */ - RESERVE_OUTBUF(1) - OUT1(c) /* assume as ISO-8859-1 */ - NEXT(1, 1) - if (IS_ESCEND(c)) { - STATE_CLEARFLAG(state, F_ESCTHROUGHOUT) - } - continue; - } + ISO2022_GETCHARSET(charset, c, c2) - switch (c) { - case ESC: + if (charset & CHARSET_DOUBLEBYTE) { RESERVE_INBUF(2) - if (IS_ISO2022ESC(IN2)) { - int eslen; - unsigned char charset; - - eslen = iso2022esclen(*inbuf, inleft); - if (eslen < 0) - return eslen == MBERR_INTERNAL ? 1 : eslen; - - if (eslen == 3) { - if (IN2 == '$') { - if (IN3 == 'B') { - charset = IN3 | CHARSET_DOUBLEBYTE; - STATE_SETG0(state, charset); - } else - return 3; - } else { - if (IN3 == 'B') - charset = IN3; - else - return 3; - - if (IN2 == '(') { - STATE_SETG0(state, charset) - } else if (IN2 == ')') { - STATE_SETG1(state, charset) - } else - return 3; - } - } else if (eslen == 4) { - if (IN2 == '$' && (IN4 == 'O' || IN4 == 'P')) { - charset = IN4 | CHARSET_DOUBLEBYTE; - if (IN3 == '(') { - STATE_SETG0(state, charset) - } else if (IN3 == ')') { - STATE_SETG1(state, charset) - } else - return 4; - } else - return 4; - } else - return eslen; - NEXT_IN(eslen) - } else { - STATE_SETFLAG(state, F_ESCTHROUGHOUT) - OUT1(ESC) - NEXT(1, 1) - } - break; - case SI: - STATE_CLEARFLAG(state, F_SHIFTED) - NEXT_IN(1) - break; - case SO: - STATE_SETFLAG(state, F_SHIFTED) - NEXT_IN(1) - break; - case '\n': - STATE_CLEARFLAG(state, F_SHIFTED) - /* FALLTHROUGH */ - case SP: /* FALLTHROUGH */ - case DEL: + RESERVE_OUTBUF(1) + c2 &= IN2; + if (charset == CHARSET_JISX0213_1) { + if (c == 0x21 && c2 == 0x40) **outbuf = 0xff3c; + else TRYMAP_DEC(jisx0208, **outbuf, c, c2); + else TRYMAP_DEC(jisx0213_1_bmp, **outbuf, c, c2); + else TRYMAP_DEC(jisx0213_1_emp, code, c, c2) { + PUTUCS4(EMPBASE | code) + NEXT_IN(2) + continue; + } else TRYMAP_DEC(jisx0213_pair, code, c, c2) { + WRITE2(code >> 16, code & 0xffff) + NEXT(2, 2) + continue; + } else return 2; + } else if (charset == CHARSET_JISX0213_2) { + TRYMAP_DEC(jisx0213_2_bmp, **outbuf, c, c2); + else TRYMAP_DEC(jisx0213_2_emp, code, c, c2) { + PUTUCS4(EMPBASE | code) + NEXT_IN(2) + continue; + } else return 2; + } else + return MBERR_INTERNAL; + NEXT(2, 1) + } else if (charset == CHARSET_ASCII) { RESERVE_OUTBUF(1) OUT1(c) NEXT(1, 1) - break; - default: - if ((c & 0x7f) < 0x20) { /* C0 and C1 */ - RESERVE_OUTBUF(1) - OUT1(c & 0x7f) - NEXT(1, 1) - } else { - unsigned char charset, c2; - ucs4_t code; - - if (!STATE_GETFLAG(state, F_SHIFTED) && c < 0x80) { /* G0 */ - charset = STATE_GETG0(state); - c2 = IN2; - } else { /* G1 */ - charset = STATE_GETG1(state); - c &= 0x7f; - c2 = IN2 & 0x7f; - } - - if (charset & CHARSET_DOUBLEBYTE) { - RESERVE_INBUF(2) - RESERVE_OUTBUF(1) - if (charset == CHARSET_JISX0213_1) { - if (c == 0x21 && c2 == 0x40) **outbuf = 0xff3c; - else TRYMAP_DEC(jisx0208, **outbuf, c, c2); - else TRYMAP_DEC(jisx0213_1_bmp, **outbuf, c, c2); - else TRYMAP_DEC(jisx0213_1_emp, code, c, c2) { - PUTUCS4(EMPBASE | code) - NEXT_IN(2) - continue; - } else TRYMAP_DEC(jisx0213_pair, code, c, c2) { - WRITE2(code >> 16, code & 0xffff) - NEXT(2, 2) - continue; - } else return 2; - } else if (charset == CHARSET_JISX0213_2) { - TRYMAP_DEC(jisx0213_2_bmp, **outbuf, c, c2); - else TRYMAP_DEC(jisx0213_2_emp, code, c, c2) { - PUTUCS4(EMPBASE | code) - NEXT_IN(2) - continue; - } else return 2; - } else - return MBERR_INTERNAL; - NEXT(2, 1) - } else if (charset == CHARSET_ASCII) { - RESERVE_OUTBUF(1) - OUT1(c) - NEXT(1, 1) - } else - return MBERR_INTERNAL; - } - } - } + } else + return MBERR_INTERNAL; + ISO2022_LOOP_END return 0; } 1.8 +25 -103 cjkcodecs/src/_iso_2022_kr.c Index: _iso_2022_kr.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/_iso_2022_kr.c,v retrieving revision 1.7 retrieving revision 1.8 diff -u -r1.7 -r1.8 --- _iso_2022_kr.c 7 Jul 2003 08:26:19 -0000 1.7 +++ _iso_2022_kr.c 9 Jul 2003 18:47:47 -0000 1.8 @@ -26,9 +26,12 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: _iso_2022_kr.c,v 1.7 2003/07/07 08:26:19 perky Exp $ + * $Id: _iso_2022_kr.c,v 1.8 2003/07/09 18:47:47 perky Exp $ */ +#define ISO2022_DESIGNATIONS \ + CHARSET_ASCII, CHARSET_KSX1001 + #include "codeccommon.h" #include "iso2022common.h" @@ -38,7 +41,7 @@ #define HAVE_ENCODER_INIT ENCODER_INIT(iso_2022_kr) { - state->i = 0; + STATE_CLEARFLAGS(state) STATE_SETG0(state, CHARSET_ASCII) STATE_SETG1(state, CHARSET_ASCII) return 0; @@ -49,7 +52,7 @@ { if (STATE_GETFLAG(state, F_SHIFTED)) { RESERVE_OUTBUF(1) - **outbuf = SI; + OUT1(SI) NEXT_OUT(1) STATE_CLEARFLAG(state, F_SHIFTED) } @@ -103,7 +106,7 @@ #define HAVE_DECODER_INIT DECODER_INIT(iso_2022_kr) { - state->i = 0; + STATE_CLEARFLAGS(state) STATE_SETG0(state, CHARSET_ASCII) STATE_SETG1(state, CHARSET_ASCII) return 0; @@ -118,108 +121,27 @@ DECODER(iso_2022_kr) { - while (inleft > 0) { - unsigned char c = **inbuf; - - if (STATE_GETFLAG(state, F_ESCTHROUGHOUT)) { - /* ESC throughout mode: for non-iso2022 escape sequences */ - RESERVE_OUTBUF(1) - **outbuf = c; /* assume as ISO-8859-1 */ - NEXT(1, 1) - if (IS_ESCEND(c)) { - STATE_CLEARFLAG(state, F_ESCTHROUGHOUT) - } - continue; - } + ISO2022_LOOP_BEGIN + unsigned char charset, c2; - switch (c) { - case ESC: - RESERVE_INBUF(2) - if (IS_ISO2022ESC((*inbuf)[1])) { - int eslen; - - eslen = iso2022esclen(*inbuf, inleft); - if (eslen < 0) - return eslen == MBERR_INTERNAL ? 1 : eslen; - - if (eslen == 3) { - if ((*inbuf)[2] == 'B') { /* ASCII */ - if ((*inbuf)[1] == '(') { - STATE_SETG0(state, CHARSET_ASCII) - } else if ((*inbuf)[1] == ')') { - STATE_SETG1(state, CHARSET_ASCII) - } else - return 3; - } else - return 3; - } else if (eslen == 4) { - if ((*inbuf)[1] == '$' && (*inbuf)[3] == 'C') { - /* KS X 1001 */ - if ((*inbuf)[2] == '(') { - STATE_SETG0(state, CHARSET_KSX1001) - } else if ((*inbuf)[2] == ')') { - STATE_SETG1(state, CHARSET_KSX1001) - } else - return 4; - } else - return 4; - } else - return eslen; - NEXT_IN(eslen) - } else { - STATE_SETFLAG(state, F_ESCTHROUGHOUT) - **outbuf = ESC; - NEXT(1, 1) - } - break; - case SI: - STATE_CLEARFLAG(state, F_SHIFTED) - NEXT_IN(1) - break; - case SO: - STATE_SETFLAG(state, F_SHIFTED) - NEXT_IN(1) - break; - case '\n': - STATE_CLEARFLAG(state, F_SHIFTED) - /* FALLTHROUGH */ - case SP: /* FALLTHROUGH */ - case DEL: - RESERVE_OUTBUF(1) - **outbuf = c; - NEXT(1, 1) - break; - default: - if ((c & 0x7f) < 0x20) { /* C0 and C1 */ - RESERVE_OUTBUF(1) - **outbuf = c & 0x7f; - NEXT(1, 1) - } else { - unsigned char charset; + ISO2022_GETCHARSET(charset, c, c2) - if (!STATE_GETFLAG(state, F_SHIFTED) && c < 0x80) /* G0 */ - charset = STATE_GETG0(state); - else /* G1 */ - charset = STATE_GETG1(state); - - if (charset & CHARSET_DOUBLEBYTE) { - /* all double byte character sets are in KS X 1001 here */ - RESERVE_INBUF(2) - RESERVE_OUTBUF(1) - TRYMAP_DEC(ksx1001, **outbuf, c & 0x7f, (*inbuf)[1] & 0x7f){ - NEXT(2, 1) - } else - return 2; - } else { - RESERVE_OUTBUF(1) - **outbuf = c & 0x7f; - NEXT(1, 1) - } - } - } + if (charset & CHARSET_DOUBLEBYTE) { + /* all double byte character sets are in KS X 1001 here */ + RESERVE_INBUF(2) + RESERVE_OUTBUF(1) + c2 &= IN2; + TRYMAP_DEC(ksx1001, **outbuf, c, c2) { + NEXT(2, 1) + } else + return 2; + } else { + RESERVE_OUTBUF(1) + OUT1(c); + NEXT(1, 1) } - - return 0; + ISO2022_LOOP_END + return 0; } #include "codecentry.h" 1.3 +134 -17 cjkcodecs/src/iso2022common.h Index: iso2022common.h =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/iso2022common.h,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- iso2022common.h 6 Jul 2003 17:32:25 -0000 1.2 +++ iso2022common.h 9 Jul 2003 18:47:47 -0000 1.3 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: iso2022common.h,v 1.2 2003/07/06 17:32:25 perky Exp $ + * $Id: iso2022common.h,v 1.3 2003/07/09 18:47:47 perky Exp $ */ /* This ISO-2022 implementation is intended to comply ECMA-43 Level 1 @@ -85,35 +85,152 @@ #define F_SHIFTED 0x01 #define F_ESCTHROUGHOUT 0x02 -#define STATE_SETG0(s, v) ((s)->c[0]) = (v); -#define STATE_GETG0(s) ((s)->c[0]) +#define STATE_SETG(dn, s, v) ((s)->c[dn]) = (v); +#define STATE_GETG(dn, s) ((s)->c[dn]) -#define STATE_SETG1(s, v) ((s)->c[1]) = (v); -#define STATE_GETG1(s) ((s)->c[1]) - -#define STATE_SETG2(s, v) ((s)->c[2]) = (v); -#define STATE_GETG2(s) ((s)->c[2]) - -#define STATE_SETG3(s, v) ((s)->c[3]) = (v); -#define STATE_GETG3(s) ((s)->c[3]) +#define STATE_SETG0(s, v) STATE_SETG(0, s, v) +#define STATE_GETG0(s) STATE_GETG(0, s) +#define STATE_SETG1(s, v) STATE_SETG(1, s, v) +#define STATE_GETG1(s) STATE_GETG(1, s) +#define STATE_SETG2(s, v) STATE_SETG(2, s, v) +#define STATE_GETG2(s) STATE_GETG(2, s) +#define STATE_SETG3(s, v) STATE_SETG(3, s, v) +#define STATE_GETG3(s) STATE_GETG(3, s) #define STATE_SETFLAG(s, f) ((s)->c[4]) |= (f); #define STATE_GETFLAG(s, f) ((s)->c[4] & (f)) #define STATE_CLEARFLAG(s, f) ((s)->c[4]) &= ~(f); +#define STATE_CLEARFLAGS(s) ((s)->c[4]) = 0; + +#define ISO2022_GETCHARSET(charset, c1, c2mask) \ + if (STATE_GETFLAG(state, F_SHIFTED) || (c) >= 0x80) { /* G1 */ \ + (charset) = STATE_GETG1(state); \ + (c) &= 0x7f; (c2mask) = 0x7f; \ + } else { /* G1 */ \ + (charset) = STATE_GETG0(state); \ + (c2mask) = 0xff; \ + } + +#define ISO2022_BASECASES(c1) \ + case ESC: \ + RESERVE_INBUF(2) \ + if (IS_ISO2022ESC(IN2)) { \ + int err; \ + err = iso2022processesc(state, inbuf, &inleft); \ + if (err != 0) \ + return err; \ + } else { \ + STATE_SETFLAG(state, F_ESCTHROUGHOUT) \ + OUT1(ESC) \ + NEXT(1, 1) \ + } \ + break; \ + case SI: \ + STATE_CLEARFLAG(state, F_SHIFTED) \ + NEXT_IN(1) \ + break; \ + case SO: \ + STATE_SETFLAG(state, F_SHIFTED) \ + NEXT_IN(1) \ + break; \ + case '\n': \ + STATE_CLEARFLAG(state, F_SHIFTED) \ + /* FALLTHROUGH */ \ + case SP: /* FALLTHROUGH */ \ + case DEL: \ + RESERVE_OUTBUF(1) \ + OUT1(c1) \ + NEXT(1, 1) \ + break; + +#define ISO2022_ESCTHROUGHOUT(c) \ + if (STATE_GETFLAG(state, F_ESCTHROUGHOUT)) { \ + /* ESC throughout mode: for non-iso2022 escape sequences */ \ + RESERVE_OUTBUF(1) \ + OUT1(c) /* assume as ISO-8859-1 */ \ + NEXT(1, 1) \ + if (IS_ESCEND(c)) { \ + STATE_CLEARFLAG(state, F_ESCTHROUGHOUT) \ + } \ + continue; \ + } + +#define ISO2022_LOOP_BEGIN \ + while (inleft > 0) { \ + unsigned char c = IN1; \ + ISO2022_ESCTHROUGHOUT(c) \ + switch(c) { \ + ISO2022_BASECASES(c) \ + default: \ + if ((c & 0x7f) < 0x20) { /* C0 and C1 */ \ + RESERVE_OUTBUF(1) \ + OUT1(c & 0x7f) \ + NEXT(1, 1) \ + } else { +#define ISO2022_LOOP_END \ + } \ + } \ + } static int -iso2022esclen(const unsigned char *s, size_t len) +iso2022processesc(MultibyteCodec_State *state, + const unsigned char **inbuf, size_t *inleft) { - int i; + unsigned char charset, designation; + int i, esclen; for (i = 1;i < MAX_ESCSEQLEN;i++) { - if (i >= len) + if (i >= *inleft) return MBERR_TOOFEW; - if (IS_ESCEND(s[i])) - return i + 1; + if (IS_ESCEND((*inbuf)[i])) { + esclen = i + 1; + break; + } + } + + if (i >= MAX_ESCSEQLEN) + return 1; /* unterminated escape sequence */ + + switch (esclen) { + case 3: + if (IN2 == '$') { + charset = IN3 | CHARSET_DOUBLEBYTE; + designation = 0; + } else { + charset = IN3; + if (IN2 == '(') designation = 0; + else if (IN2 == ')') designation = 1; + else return 3; + } + break; + case 4: + if (IN2 != '$') + return 4; + + charset = IN4 | CHARSET_DOUBLEBYTE; + if (IN3 == '(') designation = 0; + else if (IN3 == ')') designation = 1; + else return 4; + break; + default: + return esclen; + } + + { /* raise error when the charset is not designated for this encoding */ + const unsigned char dsgs[] = {ISO2022_DESIGNATIONS, '\x00'}; + + for (i = 0; dsgs[i] != '\x00'; i++) + if (dsgs[i] == charset) + break; + + if (dsgs[i] == '\x00') + return esclen; } - return MBERR_INTERNAL; /* unterminated escape sequence */ + STATE_SETG(designation, state, charset) + *inleft -= esclen; + (*inbuf) += esclen; + return 0; } /* |