[KoCo-CVS] [Commit] KoreanCodecs/src euckr_stream.h _koco.c
Brought to you by:
perky
From: Chang <pe...@us...> - 2002-04-28 19:45:33
|
perky 02/04/27 21:46:53 Modified: src _koco.c Added: src euckr_stream.h Log: - Add StreamReader C implementation for EUC-KR codec (lacks readlines() now) Revision Changes Path 1.16 +60 -17 KoreanCodecs/src/_koco.c Index: _koco.c =================================================================== RCS file: /cvsroot/koco/KoreanCodecs/src/_koco.c,v retrieving revision 1.15 retrieving revision 1.16 diff -u -r1.15 -r1.16 --- _koco.c 26 Apr 2002 21:11:13 -0000 1.15 +++ _koco.c 28 Apr 2002 04:46:52 -0000 1.16 @@ -4,18 +4,30 @@ * KoreanCodecs C Implementations * * Author : Hye-Shik Chang <pe...@fa...> - * Date : $Date: 2002/04/26 21:11:13 $ + * Date : $Date: 2002/04/28 04:46:52 $ * Created : 15 March 2002 * - * $Revision: 1.15 $ + * $Revision: 1.16 $ */ static char *version = -"$Id: _koco.c,v 1.15 2002/04/26 21:11:13 perky Exp $"; +"$Id: _koco.c,v 1.16 2002/04/28 04:46:52 perky Exp $"; #define UNIFIL 0xfffd #include "Python.h" + +typedef int *state_t; +#define STATE_EXIST 0x100 +#define HAS_STATE(c) ((*(c))&STATE_EXIST) +#define GET_STATE(c) (unsigned char)((*(c))&0xFF) +#define REMOVE_STATE(c) ((*(c))&=0xFE00) +#define SET_STATE(c, v) (*(c)=STATE_EXIST|(v)) + +#ifndef max +#define max(a, b) ((a)<(b) ? (b) : (a)) +#endif + #include "_koco_ksc5601.h" #include "_koco_uhc.h" @@ -24,8 +36,8 @@ enum { error_strict, error_ignore, error_replace, error_undef }; -static -PyObject *codec_tuple(PyObject *unicode, int len) +static PyObject * +codec_tuple(PyObject *unicode, int len) { PyObject *v, *w; @@ -46,7 +58,8 @@ return v; } -int error_type(const char *errors) +static int +error_type(const char *errors) { if (errors == NULL || strcmp(errors, "strict") == 0) { return error_strict; @@ -65,8 +78,31 @@ } } +static PyObject * +PyClass_New_WithMethods(const char *name, PyMethodDef *methods) +{ + PyMethodDef *def; + + PyObject *classDict = PyDict_New(); + PyObject *className = PyString_FromString(name); + PyObject *newClass = PyClass_New(NULL, classDict, className); + Py_DECREF(classDict); + Py_DECREF(className); + + for (def = methods; def->ml_name != NULL; def++) { + PyObject *func = PyCFunction_New(def, NULL); + PyObject *method = PyMethod_New(func, NULL, newClass); + PyDict_SetItemString(classDict, def->ml_name, method); + Py_DECREF(method); + Py_DECREF(func); + } + + return newClass; +} + #include "euckr_codec.h" #include "cp949_codec.h" +#include "euckr_stream.h" /* List of methods defined in the module */ @@ -85,20 +121,27 @@ void init_koco(void) { - PyObject *m, *d; + PyObject *m, *d, *t; + + /* Create the module and add the functions */ + m = Py_InitModule("_koco", _koco_methods); - /* Create the module and add the functions */ - m = Py_InitModule("_koco", _koco_methods); + /* Add some symbolic constants to the module */ + d = PyModule_GetDict(m); - /* Add some symbolic constants to the module */ - d = PyModule_GetDict(m); + t = PyClass_New_WithMethods("euc_kr_StreamReader", euc_kr_StreamReader_methods); + PyDict_SetItemString(d, "euc_kr_StreamReader", t); + Py_DECREF(t); - PyDict_SetItemString(d, "version", PyString_FromString(version)); + t = PyString_FromString(version); + PyDict_SetItemString(d, "version", t); + Py_DECREF(t); - ErrorObject = PyErr_NewException("_koco.error", NULL, NULL); - PyDict_SetItemString(d, "error", ErrorObject); + ErrorObject = PyErr_NewException("_koco.error", NULL, NULL); + PyDict_SetItemString(d, "error", ErrorObject); + Py_DECREF(ErrorObject); - /* Check for errors */ - if (PyErr_Occurred()) - Py_FatalError("can't initialize the _koco module"); + /* Check for errors */ + if (PyErr_Occurred()) + Py_FatalError("can't initialize the _koco module"); } 1.1 KoreanCodecs/src/euckr_stream.h Index: euckr_stream.h =================================================================== /* * euckr_stream.c * * KoreanCodecs EUC-KR StreamReader C Implementation * * Author : Hye-Shik Chang <pe...@fa...> * Date : $Date: 2002/04/28 04:46:52 $ * Created : 28 April 2002 * * $Revision: 1.1 $ */ static PyObject * __euc_kr_decode(state_t state, char *s, int slen, int errtype) { unsigned char *srccur, *srcend; Py_UNICODE *destptr, *destcur, *codemap, code; PyObject *r; destcur = destptr = PyMem_New(Py_UNICODE, slen+1); srccur = s; srcend = s + slen; if (HAS_STATE(state)) { unsigned char c = GET_STATE(state); if (c & 0x80) { if (slen > 0) { codemap = ksc5601_decode_map[c & 0x7F]; if (!codemap) goto invalid_state; if (ksc5601_decode_bottom <= *srccur && *srccur <= ksc5601_decode_top) { code = codemap[*srccur - ksc5601_decode_bottom]; if (code == UNIFIL) goto invalid_state; *(destcur++) = code; srccur++; } else { invalid_state: switch (errtype) { case error_strict: PyErr_Format(PyExc_UnicodeError, "EUC-KR decoding error: invalid character \\x%02x%02x", c, srccur[0]); r = NULL; goto out; case error_replace: *(destcur++) = UNIFIL; break; case error_ignore: break; } srccur++; } } else { /* keep state */ r = PyUnicode_FromUnicode(NULL, 0); goto out; } } else *(destcur++) = c; REMOVE_STATE(state); } for (; srccur < srcend; srccur++) { if (*srccur & 0x80) { if (srccur+1 >= srcend) /* state out */ SET_STATE(state, *srccur); else { codemap = ksc5601_decode_map[*srccur & 0x7F]; if (!codemap) goto invalid; if (ksc5601_decode_bottom <= srccur[1] && srccur[1] <= ksc5601_decode_top) { code = codemap[srccur[1] - ksc5601_decode_bottom]; if (code == UNIFIL) goto invalid; *(destcur++) = code; srccur++; } else { invalid: switch (errtype) { case error_strict: PyErr_Format(PyExc_UnicodeError, "EUC-KR decoding error: invalid character \\x%02x%02x", srccur[0], srccur[1]); r = NULL; goto out; case error_replace: *(destcur++) = UNIFIL; break; case error_ignore: break; } srccur++; } } } else *(destcur++) = *srccur; } r = PyUnicode_FromUnicode(destptr, destcur-destptr); out: PyMem_Del(destptr); return r; } static void state_t_destroy(void *obj) { PyMem_Del(obj); } static char euc_kr_StreamReader___init____doc__[] = "euc_kr_StreamReader.__init__()"; static PyObject* euc_kr_StreamReader___init__(PyObject *typeself, PyObject *args, PyObject *kwargs) { PyObject *self, *stateobj; PyObject *stream, *errors = NULL; state_t state; static char *kwlist[] = {"self", "stream", "errors", NULL}; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|O:__init__", kwlist, &self, &stream, &errors)) return NULL; PyObject_SetAttrString(self, "stream", stream); if (errors) PyObject_SetAttrString(self, "errors", errors); else { errors = PyString_FromString("strict"); PyObject_SetAttrString(self, "errors", errors); Py_DECREF(errors); } state = PyMem_New(/*state_t*/int, 1); REMOVE_STATE(state); stateobj = PyCObject_FromVoidPtr((void*)state, state_t_destroy); PyObject_SetAttrString(self, "_state", stateobj); Py_DECREF(stateobj); Py_INCREF(Py_None); return Py_None; } static char euc_kr_StreamReader_read__doc__[] = "euc_kr_StreamReader.read()"; static PyObject* euc_kr_StreamReader_read(PyObject *typeself, PyObject *args) { PyObject *self, *tmp, *r = NULL; PyObject *stream, *stateobj; state_t state; int size = -1, errtype; if (!PyArg_ParseTuple(args, "O|i:read", &self, &size)) return NULL; if (size == 0) return PyUnicode_FromUnicode(NULL, 0); if ((stream = PyObject_GetAttrString(self, "stream")) == NULL) return NULL; if ((tmp = PyObject_GetAttrString(self, "errors")) == NULL) { Py_DECREF(stream); return NULL; } errtype = error_type(PyString_AsString(tmp)); Py_DECREF(tmp); if (errtype == error_undef) return NULL; if ((stateobj = PyObject_GetAttrString(self, "_state")) == NULL) { Py_DECREF(stream); return NULL; } if ((state = (state_t)PyCObject_AsVoidPtr(stateobj)) == NULL) goto out; if (size < 0) tmp = PyObject_CallMethod(stream, "read", NULL); /* without tuple */ else tmp = PyObject_CallMethod(stream, "read", "i", size); if (tmp == NULL) goto out; r = __euc_kr_decode( state, PyString_AS_STRING(tmp), PyString_GET_SIZE(tmp), errtype ); out: Py_DECREF(stream); Py_DECREF(stateobj); return r; } static char euc_kr_StreamReader_readline__doc__[] = "euc_kr_StreamReader.readline()"; static PyObject* euc_kr_StreamReader_readline(PyObject *typeself, PyObject *args) { PyObject *self, *tmp, *r = NULL; PyObject *stream, *stateobj; state_t state; int size = -1, errtype; if (!PyArg_ParseTuple(args, "O|i:read", &self, &size)) return NULL; if (size == 0) return PyUnicode_FromUnicode(NULL, 0); if ((stream = PyObject_GetAttrString(self, "stream")) == NULL) return NULL; if ((tmp = PyObject_GetAttrString(self, "errors")) == NULL) { Py_DECREF(stream); return NULL; } errtype = error_type(PyString_AsString(tmp)); Py_DECREF(tmp); if (errtype == error_undef) return NULL; if ((stateobj = PyObject_GetAttrString(self, "_state")) == NULL) { Py_DECREF(stream); return NULL; } if ((state = (state_t)PyCObject_AsVoidPtr(stateobj)) == NULL) goto out; if (size < 0) tmp = PyObject_CallMethod(stream, "readline", NULL); /* without tuple */ else tmp = PyObject_CallMethod(stream, "readline", "i", size); if (tmp == NULL) goto out; r = __euc_kr_decode( state, PyString_AS_STRING(tmp), PyString_GET_SIZE(tmp), errtype ); out: Py_DECREF(stream); Py_DECREF(stateobj); return r; } struct PyMethodDef euc_kr_StreamReader_methods[] = { {"__init__", (PyCFunction) euc_kr_StreamReader___init__, METH_VARARGS | METH_KEYWORDS, euc_kr_StreamReader___init____doc__}, {"read", (PyCFunction) euc_kr_StreamReader_read, METH_VARARGS, euc_kr_StreamReader_read__doc__}, {"readline", (PyCFunction) euc_kr_StreamReader_readline, METH_VARARGS, euc_kr_StreamReader_readline__doc__}, {NULL,}, }; |