Thread: [KoCo-CVS] [Commit] KoreanCodecs/src cp949_codec.c euckr_codec.c _koco.c
Brought to you by:
perky
From: Chang <pe...@us...> - 2002-04-26 21:06:17
|
perky 02/04/26 14:06:10 Modified: src _koco.c Added: src cp949_codec.c euckr_codec.c Log: - Split euc-kr and cp949 codec from _koco.c Revision Changes Path 1.14 +7 -242 KoreanCodecs/src/_koco.c Index: _koco.c =================================================================== RCS file: /cvsroot/koco/KoreanCodecs/src/_koco.c,v retrieving revision 1.13 retrieving revision 1.14 diff -u -r1.13 -r1.14 --- _koco.c 26 Apr 2002 08:21:54 -0000 1.13 +++ _koco.c 26 Apr 2002 21:06:09 -0000 1.14 @@ -4,14 +4,14 @@ * KoreanCodecs C Implementations * * Author : Hye-Shik Chang <pe...@fa...> - * Date : $Date: 2002/04/26 08:21:54 $ + * Date : $Date: 2002/04/26 21:06:09 $ * Created : 15 March 2002 * - * $Revision: 1.13 $ + * $Revision: 1.14 $ */ static char *version = -"$Id: _koco.c,v 1.13 2002/04/26 08:21:54 perky Exp $"; +"$Id: _koco.c,v 1.14 2002/04/26 21:06:09 perky Exp $"; #define UNIFIL 0xfffd @@ -65,246 +65,11 @@ } } +#define KOCOCODEC_INCLUDE +#include "euckr_codec.c" +#include "cp949_codec.c" +#undef KOCOCODEC_INCLUDE -static char euc_kr_decode__doc__[] = "EUC-KR decoder"; - -static PyObject * -euc_kr_decode(PyObject *self, PyObject *args) -{ - unsigned char *argstr, *srccur, *srcend; - int arglen, errtype = error_strict; - char *errors = NULL; - Py_UNICODE *destptr, *destcur, *codemap, code; - PyObject *r; - - if (!PyArg_ParseTuple(args, "s#|z:euc_kr_decode", &argstr, &arglen, &errors)) - return NULL; - - errtype = error_type(errors); - if (errtype == error_undef) - return NULL; - - destcur = destptr = PyMem_New(Py_UNICODE, arglen+1); - for (srccur = argstr, srcend = argstr + arglen; srccur < srcend; srccur++) { - if ((*srccur & 0x80) && (srccur+1 < srcend)) { - codemap = ksc5601_decode_map[*srccur & 0x7F]; - if (!codemap) - goto invalid; - if (ksc5601_decode_bottom <= srccur[1] && srccur[1] <= ksc5601_decode_top) { - code = codemap[srccur[1] - ksc5601_decode_bottom]; - if (code == UNIFIL) - goto invalid; - *(destcur++) = code; - srccur++; - continue; - } else { -invalid: srccur++; /* skip 2byte */ - switch (errtype) { - case error_strict: - PyMem_Del(destptr); - PyErr_Format(PyExc_UnicodeError, - "EUC-KR decoding error: invalid character \\x%02x%02x", - srccur[0], srccur[1]); - return NULL; - break; - case error_replace: - *(destcur++) = UNIFIL; - break; - /* case error_ignore: break; */ - } - continue; - } - } else - *(destcur++) = *srccur; - } - - r = codec_tuple(PyUnicode_FromUnicode(destptr, destcur-destptr), arglen); - PyMem_Del(destptr); - return r; -} - -static char euc_kr_encode__doc__[] = "EUC-KR encoder"; - -static PyObject * -euc_kr_encode(PyObject *self, PyObject *args) -{ - Py_UNICODE *argptr, *srccur, *srcend; - int arglen, errtype = error_strict; - char *errors = NULL; - unsigned char *destptr, *destcur, *decbuf; - PyObject *r; - - if (!PyArg_ParseTuple(args, "u#|z:euc_kr_encode", &argptr, &arglen, &errors)) - return NULL; - - errtype = error_type(errors); - if (errtype == error_undef) - return NULL; - - destcur = destptr = PyMem_New(unsigned char, arglen*2+1); - for (srccur = argptr, srcend = argptr + arglen; srccur < srcend; srccur++) { - if (*srccur <= 0x7F) - *(destcur++) = (unsigned char)*srccur; - else - if((decbuf = _ksc5601_encode(*srccur)) == 0) { - switch (errtype) { - case error_strict: - PyMem_Del(destptr); - PyErr_Format(PyExc_UnicodeError, - "EUC-KR encoding error: invalid character \\u%04x", - *srccur); - return NULL; - break; - case error_replace: - *(destcur++) = 0xa1; - *(destcur++) = 0xa1; - break; - /* case error_ignore: break; */ - } - } else { - *(destcur++) = decbuf[0]; - *(destcur++) = decbuf[1]; - } - } - - r = codec_tuple(PyString_FromStringAndSize((char*)destptr, destcur - destptr), arglen); - PyMem_Del(destptr); - return r; -} - -static char cp949_decode__doc__[] = "CP949 decoder"; - -static PyObject * -cp949_decode(PyObject *self, PyObject *args) -{ - unsigned char *argstr, *srccur, *srcend; - int arglen, errtype = error_strict; - char *errors = NULL; - Py_UNICODE *destptr, *destcur, *codemap, code; - PyObject *r; - - if (!PyArg_ParseTuple(args, "s#|z:cp949_decode", &argstr, &arglen, &errors)) - return NULL; - - errtype = error_type(errors); - if (errtype == error_undef) - return NULL; - - destcur = destptr = PyMem_New(Py_UNICODE, arglen+1); - for (srccur = argstr, srcend = argstr + arglen; srccur < srcend; srccur++) { - if ((*srccur & 0x80) && (srccur+1 < srcend)) { - if (uhc_decode_hint[*srccur]) { /* UHC page0 region */ - codemap = uhc_decode_map[*srccur & 0x7F]; - /* codemap DOES have all maps on 0x81-0xA0, alphabet area can't on this */ - if (uhc_page0_bottom <= srccur[1] && srccur[1] <= uhc_page0_top) { - code = codemap[srccur[1] - uhc_page0_bottom]; - if (code == UNIFIL) - goto invalid; - *(destcur++) = code; - srccur++; /* skip 2byte */ - } else - goto invalid; - } else if (uhc_decode_hint[srccur[1]]) { /* UHC page1 region */ - codemap = uhc_decode_map[*srccur & 0x7F]; - if (!codemap) - goto invalid; - /* srccur[1] has tested already */ - code = codemap[srccur[1] - uhc_page1_bottom]; - if (code == UNIFIL) - goto invalid; - *(destcur++) = code; - srccur++; /* skip 2byte */ - } else { - /* ksc5601 area */ - codemap = ksc5601_decode_map[*srccur & 0x7F]; - if (!codemap) - goto invalid; - if (ksc5601_decode_bottom <= srccur[1] && srccur[1] <= ksc5601_decode_top) { - code = codemap[srccur[1] - ksc5601_decode_bottom]; - if (code == UNIFIL) - goto invalid; - *(destcur++) = code; - srccur++; - continue; - } else { -invalid: srccur++; /* skip 2byte */ - switch (errtype) { - case error_strict: - PyMem_Del(destptr); - PyErr_Format(PyExc_UnicodeError, - "CP949 decoding error: invalid character \\x%02x%02x", - srccur[0], srccur[1]); - return NULL; - break; - case error_replace: - *(destcur++) = UNIFIL; - break; - /* case error_ignore: break; */ - } - continue; - } - } - } else - *(destcur++) = *srccur; - } - - r = codec_tuple(PyUnicode_FromUnicode(destptr, destcur-destptr), arglen); - PyMem_Del(destptr); - return r; -} - -static char cp949_encode__doc__[] = "CP949 encoder"; - -static PyObject * -cp949_encode(PyObject *self, PyObject *args) -{ - Py_UNICODE *argptr, *srccur, *srcend; - int arglen, errtype = error_strict; - char *errors = NULL; - unsigned char *destptr, *destcur, *decbuf; - PyObject *r; - - if (!PyArg_ParseTuple(args, "u#|z:cp949_encode", &argptr, &arglen, &errors)) - return NULL; - - errtype = error_type(errors); - if (errtype == error_undef) - return NULL; - - destcur = destptr = PyMem_New(unsigned char, arglen*2+1); - for (srccur = argptr, srcend = argptr + arglen; srccur < srcend; srccur++) { - if (*srccur <= 0x7F) - *(destcur++) = (unsigned char)*srccur; - else { - decbuf = _ksc5601_encode(*srccur); - if (!decbuf) - decbuf = _uhc_encode(*srccur); - if(decbuf == 0) { - switch (errtype) { - case error_strict: - PyMem_Del(destptr); - PyErr_Format(PyExc_UnicodeError, - "CP949 encoding error: invalid character \\u%04x", - *srccur); - return NULL; - break; - case error_replace: - *(destcur++) = 0xa1; - *(destcur++) = 0xa1; - break; - /* case error_ignore: break; */ - } - } else { - *(destcur++) = decbuf[0]; - *(destcur++) = decbuf[1]; - } - } - } - - r = codec_tuple(PyString_FromStringAndSize((char*)destptr, destcur - destptr), arglen); - PyMem_Del(destptr); - return r; -} /* List of methods defined in the module */ 1.1 KoreanCodecs/src/cp949_codec.c Index: cp949_codec.c =================================================================== /* * cp949_codec.c * * KoreanCodecs CP949 Codec C Implementation * * Author : Hye-Shik Chang <pe...@fa...> * Date : $Date: 2002/04/26 21:06:10 $ * Created : 15 March 2002 * * $Revision: 1.1 $ */ #ifdef KOCOCODEC_INCLUDE static char cp949_decode__doc__[] = "CP949 decoder"; static PyObject * cp949_decode(PyObject *self, PyObject *args) { unsigned char *argstr, *srccur, *srcend; int arglen, errtype = error_strict; char *errors = NULL; Py_UNICODE *destptr, *destcur, *codemap, code; PyObject *r; if (!PyArg_ParseTuple(args, "s#|z:cp949_decode", &argstr, &arglen, &errors)) return NULL; errtype = error_type(errors); if (errtype == error_undef) return NULL; destcur = destptr = PyMem_New(Py_UNICODE, arglen+1); for (srccur = argstr, srcend = argstr + arglen; srccur < srcend; srccur++) { if ((*srccur & 0x80) && (srccur+1 < srcend)) { if (uhc_decode_hint[*srccur]) { /* UHC page0 region */ codemap = uhc_decode_map[*srccur & 0x7F]; /* codemap DOES have all maps on 0x81-0xA0, alphabet area can't on this */ if (uhc_page0_bottom <= srccur[1] && srccur[1] <= uhc_page0_top) { code = codemap[srccur[1] - uhc_page0_bottom]; if (code == UNIFIL) goto invalid; *(destcur++) = code; srccur++; /* skip 2byte */ } else goto invalid; } else if (uhc_decode_hint[srccur[1]]) { /* UHC page1 region */ codemap = uhc_decode_map[*srccur & 0x7F]; if (!codemap) goto invalid; /* srccur[1] has tested already */ code = codemap[srccur[1] - uhc_page1_bottom]; if (code == UNIFIL) goto invalid; *(destcur++) = code; srccur++; /* skip 2byte */ } else { /* ksc5601 area */ codemap = ksc5601_decode_map[*srccur & 0x7F]; if (!codemap) goto invalid; if (ksc5601_decode_bottom <= srccur[1] && srccur[1] <= ksc5601_decode_top) { code = codemap[srccur[1] - ksc5601_decode_bottom]; if (code == UNIFIL) goto invalid; *(destcur++) = code; srccur++; continue; } else { invalid: srccur++; /* skip 2byte */ switch (errtype) { case error_strict: PyMem_Del(destptr); PyErr_Format(PyExc_UnicodeError, "CP949 decoding error: invalid character \\x%02x%02x", srccur[0], srccur[1]); return NULL; break; case error_replace: *(destcur++) = UNIFIL; break; /* case error_ignore: break; */ } continue; } } } else *(destcur++) = *srccur; } r = codec_tuple(PyUnicode_FromUnicode(destptr, destcur-destptr), arglen); PyMem_Del(destptr); return r; } static char cp949_encode__doc__[] = "CP949 encoder"; static PyObject * cp949_encode(PyObject *self, PyObject *args) { Py_UNICODE *argptr, *srccur, *srcend; int arglen, errtype = error_strict; char *errors = NULL; unsigned char *destptr, *destcur, *decbuf; PyObject *r; if (!PyArg_ParseTuple(args, "u#|z:cp949_encode", &argptr, &arglen, &errors)) return NULL; errtype = error_type(errors); if (errtype == error_undef) return NULL; destcur = destptr = PyMem_New(unsigned char, arglen*2+1); for (srccur = argptr, srcend = argptr + arglen; srccur < srcend; srccur++) { if (*srccur <= 0x7F) *(destcur++) = (unsigned char)*srccur; else { decbuf = _ksc5601_encode(*srccur); if (!decbuf) decbuf = _uhc_encode(*srccur); if(decbuf == 0) { switch (errtype) { case error_strict: PyMem_Del(destptr); PyErr_Format(PyExc_UnicodeError, "CP949 encoding error: invalid character \\u%04x", *srccur); return NULL; break; case error_replace: *(destcur++) = 0xa1; *(destcur++) = 0xa1; break; /* case error_ignore: break; */ } } else { *(destcur++) = decbuf[0]; *(destcur++) = decbuf[1]; } } } r = codec_tuple(PyString_FromStringAndSize((char*)destptr, destcur - destptr), arglen); PyMem_Del(destptr); return r; } #endif 1.1 KoreanCodecs/src/euckr_codec.c Index: euckr_codec.c =================================================================== /* * euckr_codec.c * * KoreanCodecs EUC-KR Codec C Implementation * * Author : Hye-Shik Chang <pe...@fa...> * Date : $Date: 2002/04/26 21:06:10 $ * Created : 15 March 2002 * * $Revision: 1.1 $ */ #ifdef KOCOCODEC_INCLUDE static char euc_kr_decode__doc__[] = "EUC-KR decoder"; static PyObject * euc_kr_decode(PyObject *self, PyObject *args) { unsigned char *argstr, *srccur, *srcend; int arglen, errtype = error_strict; char *errors = NULL; Py_UNICODE *destptr, *destcur, *codemap, code; PyObject *r; if (!PyArg_ParseTuple(args, "s#|z:euc_kr_decode", &argstr, &arglen, &errors)) return NULL; errtype = error_type(errors); if (errtype == error_undef) return NULL; destcur = destptr = PyMem_New(Py_UNICODE, arglen+1); for (srccur = argstr, srcend = argstr + arglen; srccur < srcend; srccur++) { if ((*srccur & 0x80) && (srccur+1 < srcend)) { codemap = ksc5601_decode_map[*srccur & 0x7F]; if (!codemap) goto invalid; if (ksc5601_decode_bottom <= srccur[1] && srccur[1] <= ksc5601_decode_top) { code = codemap[srccur[1] - ksc5601_decode_bottom]; if (code == UNIFIL) goto invalid; *(destcur++) = code; srccur++; continue; } else { invalid: srccur++; /* skip 2byte */ switch (errtype) { case error_strict: PyMem_Del(destptr); PyErr_Format(PyExc_UnicodeError, "EUC-KR decoding error: invalid character \\x%02x%02x", srccur[0], srccur[1]); return NULL; break; case error_replace: *(destcur++) = UNIFIL; break; /* case error_ignore: break; */ } continue; } } else *(destcur++) = *srccur; } r = codec_tuple(PyUnicode_FromUnicode(destptr, destcur-destptr), arglen); PyMem_Del(destptr); return r; } static char euc_kr_encode__doc__[] = "EUC-KR encoder"; static PyObject * euc_kr_encode(PyObject *self, PyObject *args) { Py_UNICODE *argptr, *srccur, *srcend; int arglen, errtype = error_strict; char *errors = NULL; unsigned char *destptr, *destcur, *decbuf; PyObject *r; if (!PyArg_ParseTuple(args, "u#|z:euc_kr_encode", &argptr, &arglen, &errors)) return NULL; errtype = error_type(errors); if (errtype == error_undef) return NULL; destcur = destptr = PyMem_New(unsigned char, arglen*2+1); for (srccur = argptr, srcend = argptr + arglen; srccur < srcend; srccur++) { if (*srccur <= 0x7F) *(destcur++) = (unsigned char)*srccur; else if((decbuf = _ksc5601_encode(*srccur)) == 0) { switch (errtype) { case error_strict: PyMem_Del(destptr); PyErr_Format(PyExc_UnicodeError, "EUC-KR encoding error: invalid character \\u%04x", *srccur); return NULL; break; case error_replace: *(destcur++) = 0xa1; *(destcur++) = 0xa1; break; /* case error_ignore: break; */ } } else { *(destcur++) = decbuf[0]; *(destcur++) = decbuf[1]; } } r = codec_tuple(PyString_FromStringAndSize((char*)destptr, destcur - destptr), arglen); PyMem_Del(destptr); return r; } #endif |