[KoCo-CVS] [Commit] KoreanCodecs/src Setup.in cp949_codec.h euckr_codec.h twobytestream.c
Brought to you by:
perky
From: Chang <pe...@us...> - 2002-04-28 19:44:47
|
perky 02/04/27 17:51:51 Modified: src Setup.in cp949_codec.h euckr_codec.h Removed: src twobytestream.c Log: Fix several bugs on previous cp949, euc-kr codecs. - Handle error on trailing uncompleted character. - Raise on error='strict' with right data. Revision Changes Path 1.4 +0 -1 KoreanCodecs/src/Setup.in Index: Setup.in =================================================================== RCS file: /cvsroot/koco/KoreanCodecs/src/Setup.in,v retrieving revision 1.3 retrieving revision 1.4 diff -u -r1.3 -r1.4 --- Setup.in 27 Apr 2002 04:48:37 -0000 1.3 +++ Setup.in 28 Apr 2002 00:51:51 -0000 1.4 @@ -1,4 +1,3 @@ *shared* _koco _koco.c hangul hangul.c -twobytestream twobytestream.c 1.2 +19 -8 KoreanCodecs/src/cp949_codec.h Index: cp949_codec.h =================================================================== RCS file: /cvsroot/koco/KoreanCodecs/src/cp949_codec.h,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- cp949_codec.h 26 Apr 2002 21:11:13 -0000 1.1 +++ cp949_codec.h 28 Apr 2002 00:51:51 -0000 1.2 @@ -4,10 +4,10 @@ * KoreanCodecs CP949 Codec C Implementation * * Author : Hye-Shik Chang <pe...@fa...> - * Date : $Date: 2002/04/26 21:11:13 $ + * Date : $Date: 2002/04/28 00:51:51 $ * Created : 15 March 2002 * - * $Revision: 1.1 $ + * $Revision: 1.2 $ */ static char cp949_decode__doc__[] = "CP949 decoder"; @@ -30,7 +30,20 @@ destcur = destptr = PyMem_New(Py_UNICODE, arglen+1); for (srccur = argstr, srcend = argstr + arglen; srccur < srcend; srccur++) { - if ((*srccur & 0x80) && (srccur+1 < srcend)) { + if (*srccur & 0x80) { + if (srccur+1 >= srcend) { + switch (errtype) { + case error_strict: + PyMem_Del(destptr); + PyErr_Format(PyExc_UnicodeError, + "CP949 decoding error: invalid character \\x%02x", *srccur); + return NULL; + case error_replace: + *(destcur++) = UNIFIL; + break; + case error_ignore: break; + } + } else { if (uhc_decode_hint[*srccur]) { /* UHC page0 region */ codemap = uhc_decode_map[*srccur & 0x7F]; /* codemap DOES have all maps on 0x81-0xA0, alphabet area can't on this */ @@ -63,25 +76,23 @@ goto invalid; *(destcur++) = code; srccur++; - continue; } else { -invalid: srccur++; /* skip 2byte */ - switch (errtype) { +invalid: switch (errtype) { case error_strict: PyMem_Del(destptr); PyErr_Format(PyExc_UnicodeError, "CP949 decoding error: invalid character \\x%02x%02x", srccur[0], srccur[1]); return NULL; - break; case error_replace: *(destcur++) = UNIFIL; break; - /* case error_ignore: break; */ + case error_ignore: break; } - continue; + srccur++; } } + } } else *(destcur++) = *srccur; } 1.2 +22 -8 KoreanCodecs/src/euckr_codec.h Index: euckr_codec.h =================================================================== RCS file: /cvsroot/koco/KoreanCodecs/src/euckr_codec.h,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- euckr_codec.h 26 Apr 2002 21:11:13 -0000 1.1 +++ euckr_codec.h 28 Apr 2002 00:51:51 -0000 1.2 @@ -4,10 +4,10 @@ * KoreanCodecs EUC-KR Codec C Implementation * * Author : Hye-Shik Chang <pe...@fa...> - * Date : $Date: 2002/04/26 21:11:13 $ + * Date : $Date: 2002/04/28 00:51:51 $ * Created : 15 March 2002 * - * $Revision: 1.1 $ + * $Revision: 1.2 $ */ static char euc_kr_decode__doc__[] = "EUC-KR decoder"; @@ -30,7 +30,21 @@ destcur = destptr = PyMem_New(Py_UNICODE, arglen+1); for (srccur = argstr, srcend = argstr + arglen; srccur < srcend; srccur++) { - if ((*srccur & 0x80) && (srccur+1 < srcend)) { + if (*srccur & 0x80) { + if (srccur+1 >= srcend) { + switch (errtype) { + case error_strict: + PyMem_Del(destptr); + PyErr_Format(PyExc_UnicodeError, + "EUC-KR decoding error: invalid character \\x%02x", *srccur); + return NULL; + case error_replace: + *(destcur++) = UNIFIL; + break; + case error_ignore: + break; + } + } else { codemap = ksc5601_decode_map[*srccur & 0x7F]; if (!codemap) goto invalid; @@ -40,24 +54,23 @@ goto invalid; *(destcur++) = code; srccur++; - continue; } else { -invalid: srccur++; /* skip 2byte */ - switch (errtype) { +invalid: switch (errtype) { case error_strict: PyMem_Del(destptr); PyErr_Format(PyExc_UnicodeError, "EUC-KR decoding error: invalid character \\x%02x%02x", srccur[0], srccur[1]); return NULL; - break; case error_replace: *(destcur++) = UNIFIL; break; - /* case error_ignore: break; */ + case error_ignore: + break; } - continue; + srccur++; } + } } else *(destcur++) = *srccur; } @@ -66,6 +79,7 @@ PyMem_Del(destptr); return r; } + static char euc_kr_encode__doc__[] = "EUC-KR encoder"; |