Thread: [KoCo-CVS] [Commit] KoreanCodecs/src koco_stream.h _koco.c euckr_stream.h
Brought to you by:
perky
From: Chang <pe...@us...> - 2002-04-28 21:10:08
|
perky 02/04/27 23:16:07 Modified: src _koco.c Added: src koco_stream.h Removed: src euckr_stream.h Log: - Rename euckr_stream.h to koco_stream.h - make euc_kr_StreamReader more generalized. - Add full support for StreamReader (read, readline, readlines, reset) - Fix some garbage leaking Revision Changes Path 1.17 +12 -8 KoreanCodecs/src/_koco.c Index: _koco.c =================================================================== RCS file: /cvsroot/koco/KoreanCodecs/src/_koco.c,v retrieving revision 1.16 retrieving revision 1.17 diff -u -r1.16 -r1.17 --- _koco.c 28 Apr 2002 04:46:52 -0000 1.16 +++ _koco.c 28 Apr 2002 06:16:07 -0000 1.17 @@ -4,24 +4,28 @@ * KoreanCodecs C Implementations * * Author : Hye-Shik Chang <pe...@fa...> - * Date : $Date: 2002/04/28 04:46:52 $ + * Date : $Date: 2002/04/28 06:16:07 $ * Created : 15 March 2002 * - * $Revision: 1.16 $ + * $Revision: 1.17 $ */ static char *version = -"$Id: _koco.c,v 1.16 2002/04/28 04:46:52 perky Exp $"; +"$Id: _koco.c,v 1.17 2002/04/28 06:16:07 perky Exp $"; #define UNIFIL 0xfffd #include "Python.h" -typedef int *state_t; +typedef int state_t; +typedef struct _streaminfo { + int state; + PyObject* (*decoder)(state_t*, char*, int slen, int errtype, PyObject* (*finalizer)(const Py_UNICODE *, int)); +} streaminfo; #define STATE_EXIST 0x100 #define HAS_STATE(c) ((*(c))&STATE_EXIST) #define GET_STATE(c) (unsigned char)((*(c))&0xFF) -#define REMOVE_STATE(c) ((*(c))&=0xFE00) +#define RESET_STATE(c) ((*(c))&=0xFE00) #define SET_STATE(c, v) (*(c)=STATE_EXIST|(v)) #ifndef max @@ -102,7 +106,7 @@ #include "euckr_codec.h" #include "cp949_codec.h" -#include "euckr_stream.h" +#include "koco_stream.h" /* List of methods defined in the module */ @@ -129,8 +133,8 @@ /* Add some symbolic constants to the module */ d = PyModule_GetDict(m); - t = PyClass_New_WithMethods("euc_kr_StreamReader", euc_kr_StreamReader_methods); - PyDict_SetItemString(d, "euc_kr_StreamReader", t); + t = PyClass_New_WithMethods("StreamReader", StreamReader_methods); + PyDict_SetItemString(d, "StreamReader", t); Py_DECREF(t); t = PyString_FromString(version); 1.1 KoreanCodecs/src/koco_stream.h Index: koco_stream.h =================================================================== /* * euckr_stream.c * * KoreanCodecs EUC-KR StreamReader C Implementation * * Author : Hye-Shik Chang <pe...@fa...> * Date : $Date: 2002/04/28 06:16:07 $ * Created : 28 April 2002 * * $Revision: 1.1 $ */ static PyObject * __euc_kr_decode( state_t *state, char *s, int slen, int errtype, PyObject* (*finalizer)(const Py_UNICODE *, int) ) { unsigned char *srccur, *srcend; Py_UNICODE *destptr, *destcur, *codemap, code; PyObject *r; destcur = destptr = PyMem_New(Py_UNICODE, slen+1); srccur = s; srcend = s + slen; if (HAS_STATE(state)) { unsigned char c = GET_STATE(state); if (c & 0x80) { if (slen > 0) { codemap = ksc5601_decode_map[c & 0x7F]; if (!codemap) goto invalid_state; if (ksc5601_decode_bottom <= *srccur && *srccur <= ksc5601_decode_top) { code = codemap[*srccur - ksc5601_decode_bottom]; if (code == UNIFIL) goto invalid_state; *(destcur++) = code; srccur++; } else { invalid_state: switch (errtype) { case error_strict: PyErr_Format(PyExc_UnicodeError, "EUC-KR decoding error: invalid character \\x%02x%02x", c, srccur[0]); r = NULL; goto out; case error_replace: *(destcur++) = UNIFIL; break; case error_ignore: break; } srccur++; } } else { /* keep state */ r = PyUnicode_FromUnicode(NULL, 0); goto out; } } else *(destcur++) = c; RESET_STATE(state); } for (; srccur < srcend; srccur++) { if (*srccur & 0x80) { if (srccur+1 >= srcend) /* state out */ SET_STATE(state, *srccur); else { codemap = ksc5601_decode_map[*srccur & 0x7F]; if (!codemap) goto invalid; if (ksc5601_decode_bottom <= srccur[1] && srccur[1] <= ksc5601_decode_top) { code = codemap[srccur[1] - ksc5601_decode_bottom]; if (code == UNIFIL) goto invalid; *(destcur++) = code; srccur++; } else { invalid: switch (errtype) { case error_strict: PyErr_Format(PyExc_UnicodeError, "EUC-KR decoding error: invalid character \\x%02x%02x", srccur[0], srccur[1]); r = NULL; goto out; case error_replace: *(destcur++) = UNIFIL; break; case error_ignore: break; } srccur++; } } } else *(destcur++) = *srccur; } r = finalizer(destptr, destcur-destptr); out: PyMem_Del(destptr); return r; } PyObject* readline_finalizer(const Py_UNICODE *data, int datalen) { PyObject *list, *uobj; const Py_UNICODE *linestart = data; if ((list = PyList_New(0)) == NULL) return NULL; for (;datalen--; data++) { if (*data == '\n') { append: if ((uobj = PyUnicode_FromUnicode(linestart, data-linestart+1)) == NULL) { Py_DECREF(list); return NULL; } if (PyList_Append(list, uobj) == -1) { Py_DECREF(list); return NULL; } Py_DECREF(uobj); linestart = data+1; } } if (linestart < data) goto append; /* datalen < 0 here */ return list; } static void streaminfo_destroy(void *obj) { PyMem_Del(obj); } static char StreamReader___init____doc__[] = "StreamReader.__init__()"; static PyObject* StreamReader___init__(PyObject *typeself, PyObject *args, PyObject *kwargs) { PyObject *self, *stnfoobj, *encodingobj; PyObject *stream, *errors = NULL; streaminfo *stnfo; char *encoding; static char *kwlist[] = {"self", "stream", "errors", NULL}; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|O:__init__", kwlist, &self, &stream, &errors)) return NULL; if ((encodingobj = PyObject_GetAttrString(self, "encoding")) == NULL) return NULL; if ((encoding = PyString_AsString(encodingobj)) == NULL) return NULL; stnfo = PyMem_New(streaminfo, 1); RESET_STATE(&(stnfo->state)); if (!strcmp(encoding, "euc-kr")) stnfo->decoder = __euc_kr_decode; else if (!strcmp(encoding, "cp949")) stnfo->decoder = __euc_kr_decode; else { PyMem_Del(stnfo); PyErr_Format(PyExc_UnicodeError, "can't initialize StreamReader: not supported encoding '%s'", encoding); return NULL; } stnfoobj = PyCObject_FromVoidPtr((void*)stnfo, streaminfo_destroy); PyObject_SetAttrString(self, "_streaminfo", stnfoobj); Py_DECREF(stnfoobj); PyObject_SetAttrString(self, "stream", stream); if (errors) PyObject_SetAttrString(self, "errors", errors); else { errors = PyString_FromString("strict"); PyObject_SetAttrString(self, "errors", errors); Py_DECREF(errors); } Py_INCREF(Py_None); return Py_None; } static char StreamReader_read__doc__[] = "StreamReader.read()"; static PyObject* StreamReader_read(PyObject *typeself, PyObject *args) { PyObject *self, *tmp, *r = NULL; PyObject *stream, *stnfoobj; streaminfo *stnfo; int size = -1, errtype; if (!PyArg_ParseTuple(args, "O|i:read", &self, &size)) return NULL; if (size == 0) return PyUnicode_FromUnicode(NULL, 0); if ((stream = PyObject_GetAttrString(self, "stream")) == NULL) return NULL; if ((tmp = PyObject_GetAttrString(self, "errors")) == NULL) { Py_DECREF(stream); return NULL; } errtype = error_type(PyString_AsString(tmp)); Py_DECREF(tmp); if (errtype == error_undef) return NULL; if ((stnfoobj = PyObject_GetAttrString(self, "_streaminfo")) == NULL) { Py_DECREF(stream); return NULL; } if ((stnfo = (streaminfo*)PyCObject_AsVoidPtr(stnfoobj)) == NULL) goto out; if (size < 0) tmp = PyObject_CallMethod(stream, "read", NULL); /* without tuple */ else tmp = PyObject_CallMethod(stream, "read", "i", size); if (tmp == NULL) goto out; r = stnfo->decoder( &(stnfo->state), PyString_AS_STRING(tmp), PyString_GET_SIZE(tmp), errtype, PyUnicode_FromUnicode ); Py_DECREF(tmp); out: Py_DECREF(stream); Py_DECREF(stnfoobj); return r; } static char StreamReader_readline__doc__[] = "StreamReader.readline()"; static PyObject* StreamReader_readline(PyObject *typeself, PyObject *args) { PyObject *self, *tmp, *r = NULL; PyObject *stream, *stnfoobj; streaminfo *stnfo; int size = -1, errtype; if (!PyArg_ParseTuple(args, "O|i:readline", &self, &size)) return NULL; if (size == 0) return PyUnicode_FromUnicode(NULL, 0); if ((stream = PyObject_GetAttrString(self, "stream")) == NULL) return NULL; if ((tmp = PyObject_GetAttrString(self, "errors")) == NULL) { Py_DECREF(stream); return NULL; } errtype = error_type(PyString_AsString(tmp)); Py_DECREF(tmp); if (errtype == error_undef) return NULL; if ((stnfoobj = PyObject_GetAttrString(self, "_streaminfo")) == NULL) { Py_DECREF(stream); return NULL; } if ((stnfo = (streaminfo*)PyCObject_AsVoidPtr(stnfoobj)) == NULL) goto out; if (size < 0) tmp = PyObject_CallMethod(stream, "readline", NULL); /* without tuple */ else tmp = PyObject_CallMethod(stream, "readline", "i", size); if (tmp == NULL) goto out; r = stnfo->decoder( &(stnfo->state), PyString_AS_STRING(tmp), PyString_GET_SIZE(tmp), errtype, PyUnicode_FromUnicode ); Py_DECREF(tmp); out: Py_DECREF(stream); Py_DECREF(stnfoobj); return r; } static char StreamReader_readlines__doc__[] = "StreamReader.readlines()"; static PyObject* StreamReader_readlines(PyObject *typeself, PyObject *args) { PyObject *self, *tmp, *r = NULL; PyObject *stream, *stnfoobj; streaminfo *stnfo; int size = -1, errtype; if (!PyArg_ParseTuple(args, "O|i:readlines", &self, &size)) return NULL; if (size == 0) return PyUnicode_FromUnicode(NULL, 0); if ((stream = PyObject_GetAttrString(self, "stream")) == NULL) return NULL; if ((tmp = PyObject_GetAttrString(self, "errors")) == NULL) { Py_DECREF(stream); return NULL; } errtype = error_type(PyString_AsString(tmp)); Py_DECREF(tmp); if (errtype == error_undef) return NULL; if ((stnfoobj = PyObject_GetAttrString(self, "_streaminfo")) == NULL) { Py_DECREF(stream); return NULL; } if ((stnfo = (streaminfo*)PyCObject_AsVoidPtr(stnfoobj)) == NULL) goto out; if (size < 0) tmp = PyObject_CallMethod(stream, "read", NULL); /* without tuple */ else tmp = PyObject_CallMethod(stream, "read", "i", size); if (tmp == NULL) goto out; r = stnfo->decoder( &(stnfo->state), PyString_AS_STRING(tmp), PyString_GET_SIZE(tmp), errtype, readline_finalizer ); Py_DECREF(tmp); out: Py_DECREF(stream); Py_DECREF(stnfoobj); return r; } static char StreamReader_reset__doc__[] = "StreamReader.reset()"; static PyObject* StreamReader_reset(PyObject *typeself, PyObject *args) { PyObject *self, *stnfoobj; streaminfo *stnfo; if (!PyArg_ParseTuple(args, "O|:reset", &self)) return NULL; if ((stnfoobj = PyObject_GetAttrString(self, "_streaminfo")) == NULL) return NULL; if ((stnfo = (streaminfo*)PyCObject_AsVoidPtr(stnfoobj)) != NULL) RESET_STATE(&(stnfo->state)); Py_DECREF(stnfoobj); Py_INCREF(Py_None); return Py_None; } struct PyMethodDef StreamReader_methods[] = { {"__init__", (PyCFunction) StreamReader___init__, METH_VARARGS | METH_KEYWORDS, StreamReader___init____doc__}, {"read", (PyCFunction) StreamReader_read, METH_VARARGS, StreamReader_read__doc__}, {"readline", (PyCFunction) StreamReader_readline, METH_VARARGS, StreamReader_readline__doc__}, {"readlines",(PyCFunction) StreamReader_readlines, METH_VARARGS, StreamReader_readlines__doc__}, {"reset", (PyCFunction) StreamReader_reset, METH_VARARGS, StreamReader_reset__doc__}, {NULL,}, }; |