[KoCo-CVS] [Commit] cjkcodecs/src multibytecodec.c
Brought to you by:
perky
From: Hye-Shik C. <pe...@us...> - 2003-05-20 06:42:41
|
perky 03/05/19 23:42:40 Modified: src multibytecodec.c Log: Implement StreamReader. Revision Changes Path 1.10 +315 -16 cjkcodecs/src/multibytecodec.c Index: multibytecodec.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/multibytecodec.c,v retrieving revision 1.9 retrieving revision 1.10 diff -u -r1.9 -r1.10 --- multibytecodec.c 20 May 2003 04:16:56 -0000 1.9 +++ multibytecodec.c 20 May 2003 06:42:40 -0000 1.10 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: multibytecodec.c,v 1.9 2003/05/20 04:16:56 perky Exp $ + * $Id: multibytecodec.c,v 1.10 2003/05/20 06:42:40 perky Exp $ */ #include "Python.h" @@ -62,6 +62,9 @@ are 'ignore' and 'replace' as well as any other name registerd with\n\ codecs.register_error that is able to handle UnicodeDecodeErrors."); +PyDoc_STRVAR(MultibyteCodec_StreamReader__doc__, +"I.StreamReader(stream[, errors]) -> StreamReader instance"); + PyDoc_STRVAR(MultibyteCodec_StreamWriter__doc__, "I.StreamWriter(stream[, errors]) -> StreamWriter instance"); @@ -70,6 +73,8 @@ static PyObject *multibytecodec_encode(MultibyteCodec *, MultibyteCodec_State *, const Py_UNICODE *, int, PyObject *); +static PyObject *mbstreamreader_create(MultibyteCodec *, + PyObject *, const char *); static PyObject *mbstreamwriter_create(MultibyteCodec *, PyObject *, const char *); @@ -134,23 +139,23 @@ goto errorexit; \ } -static int +static int expand_decodebuffer(MultibyteDecodeBuffer *buf, int esize) -{ +{ int orgpos, orgsize; - + orgpos = (int)(buf->outbuf - PyUnicode_AS_UNICODE(buf->outobj)); orgsize = PyUnicode_GET_SIZE(buf->outobj); if (PyUnicode_Resize(&buf->outobj, orgsize + ( esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize)) == -1) return -1; - + buf->outbuf = PyUnicode_AS_UNICODE(buf->outobj) + orgpos; buf->outbuf_end = PyUnicode_AS_UNICODE(buf->outobj) + PyUnicode_GET_SIZE(buf->outobj); - - return 0; -} + + return 0; +} #define RESERVE_DECODEBUFFER(buf, s) { \ if ((s) < 1 || (buf)->outbuf + (s) > (buf)->outbuf_end) \ if (expand_decodebuffer(buf, s) == -1) \ @@ -227,7 +232,7 @@ goto errorexit; } else if (PyUnicodeEncodeError_SetStart(buf->excobj, start) != 0 || - PyUnicodeEncodeError_SetEnd(buf->excobj, end) != 0 || + PyUnicodeEncodeError_SetEnd(buf->excobj, end) != 0 || PyUnicodeEncodeError_SetReason(buf->excobj, reason) != 0) goto errorexit; @@ -359,8 +364,8 @@ if (retobj == NULL) goto errorexit; - if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 || - !PyUnicode_Check((retuni = PyTuple_GET_ITEM(retobj, 0))) || + if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 || + !PyUnicode_Check((retuni = PyTuple_GET_ITEM(retobj, 0))) || !PyInt_Check(PyTuple_GET_ITEM(retobj, 1))) { PyErr_SetString(PyExc_ValueError, "decoding error handler must return (unicode, int) tuple"); @@ -383,7 +388,7 @@ "position %d from error handler out of bounds", newpos); goto errorexit; } - buf->inbuf = buf->inbuf_top + newpos; + buf->inbuf = buf->inbuf_top + newpos; Py_DECREF(retobj); return 0; @@ -481,7 +486,7 @@ static PyObject * MultibyteCodec_Decode(MultibyteCodecObject *self, PyObject *args, PyObject *kwargs) -{ +{ MultibyteCodec_State state; MultibyteDecodeBuffer buf; PyObject *errorcb; @@ -548,9 +553,23 @@ } static PyObject * +MultibyteCodec_StreamReader(MultibyteCodecObject *self, + PyObject *args, PyObject *kwargs) +{ + PyObject *stream; + char *errors = NULL; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|s:StreamReader", + streamkwarglist, &stream, &errors)) + return NULL; + + return mbstreamreader_create(self->codec, stream, errors); +} + +static PyObject * MultibyteCodec_StreamWriter(MultibyteCodecObject *self, PyObject *args, PyObject *kwargs) -{ +{ PyObject *stream; char *errors = NULL; @@ -568,6 +587,9 @@ {"decode", (PyCFunction)MultibyteCodec_Decode, METH_VARARGS | METH_KEYWORDS, MultibyteCodec_Decode__doc__}, + {"StreamReader",(PyCFunction)MultibyteCodec_StreamReader, + METH_VARARGS | METH_KEYWORDS, + MultibyteCodec_StreamReader__doc__}, {"StreamWriter",(PyCFunction)MultibyteCodec_StreamWriter, METH_VARARGS | METH_KEYWORDS, MultibyteCodec_StreamWriter__doc__}, @@ -613,6 +635,258 @@ multibytecodec_methods, /* tp_methods */ }; +static PyObject * +mbstreamreader_iread(MultibyteStreamReaderObject *self, + const char *method, int sizehint) +{ + MultibyteDecodeBuffer buf; + PyObject *cres; + int rsize, r, finalsize = 0; + + if (sizehint == 0) + return PyUnicode_FromUnicode(NULL, 0); + + buf.outobj = buf.excobj = NULL; + cres = NULL; + + for (;;) { + if (sizehint < 0) + cres = PyObject_CallMethod(self->stream, (char *)method, NULL); + else + cres = PyObject_CallMethod(self->stream, + (char *)method, "i", sizehint); + if (cres == NULL) + goto errorexit; + + if (!PyString_Check(cres)) { + PyErr_SetString(PyExc_TypeError, + "stream function returned a non-string object"); + goto errorexit; + } + + if (self->pendingsize > 0) { + PyObject *ctr; + char *ctrdata; + + rsize = PyString_GET_SIZE(cres) + self->pendingsize; + ctr = PyString_FromStringAndSize(NULL, rsize); + if (ctr == NULL) + goto errorexit; + ctrdata = PyString_AS_STRING(ctr); + memcpy(ctrdata, self->pending, self->pendingsize); + memcpy(ctrdata + self->pendingsize, + PyString_AS_STRING(cres), PyString_GET_SIZE(cres)); + Py_DECREF(cres); + cres = ctr; + self->pendingsize = 0; + } + + rsize = PyString_GET_SIZE(cres); + buf.inbuf = buf.inbuf_top = (unsigned char *)PyString_AS_STRING(cres); + buf.inbuf_end = buf.inbuf_top + rsize; + if (buf.outobj == NULL) { + buf.outobj = PyUnicode_FromUnicode(NULL, rsize); + if (buf.outobj == NULL) + goto errorexit; + buf.outbuf = PyUnicode_AS_UNICODE(buf.outobj); + buf.outbuf_end = buf.outbuf + PyUnicode_GET_SIZE(buf.outobj); + } + + r = 0; + if (rsize > 0) + while (buf.inbuf < buf.inbuf_end) { + size_t inleft, outleft; + + inleft = (size_t)(buf.inbuf_end - buf.inbuf); + outleft = (size_t)(buf.outbuf_end - buf.outbuf); + + r = self->codec->decode(&self->state, &buf.inbuf, inleft, + &buf.outbuf, outleft); + if (r == 0 || r == MBERR_TOOFEW) + break; + else if (multibytecodec_decerror(self->codec, + &self->state, &buf, self->errors, r)) + goto errorexit; + } + + if (rsize == 0 || sizehint < 0) { /* end of file */ + if (buf.inbuf < buf.inbuf_end && + multibytecodec_decerror(self->codec, &self->state, &buf, + self->errors, MBERR_TOOFEW)) + goto errorexit; + } + + if (buf.inbuf < buf.inbuf_end) { /* pending sequence exists */ + size_t npendings; + + /* we can't assume that pendingsize is still 0 here. because + * this function can be called recursively from error callback */ + npendings = (size_t)(buf.inbuf_end - buf.inbuf); + if (npendings + self->pendingsize > MAXPENDING) { + PyErr_SetString(PyExc_RuntimeError, + "pending buffer overflow"); + goto errorexit; + } + memcpy(self->pending + self->pendingsize, buf.inbuf, npendings); + self->pendingsize += npendings; + } + + finalsize = (int)(buf.outbuf - PyUnicode_AS_UNICODE(buf.outobj)); + + Py_DECREF(cres); + cres = NULL; + + if (sizehint < 0 || finalsize != 0 || rsize == 0) + break; + + sizehint = 1; /* read 1 more byte and retry */ + } + + if (finalsize != PyUnicode_GET_SIZE(buf.outobj)) + if (PyUnicode_Resize(&buf.outobj, finalsize) == -1) + goto errorexit; + + Py_XDECREF(cres); + Py_XDECREF(buf.excobj); + return buf.outobj; + +errorexit: + Py_XDECREF(cres); + Py_XDECREF(buf.excobj); + Py_XDECREF(buf.outobj); + return NULL; +} + +static PyObject * +mbstreamreader_read(MultibyteStreamReaderObject *self, PyObject *args) +{ + PyObject *sizeobj = NULL; + long size; + + if (!PyArg_ParseTuple(args, "|O:read", &sizeobj)) + return NULL; + + if (sizeobj == Py_None || sizeobj == NULL) + size = -1; + else if (PyInt_Check(sizeobj)) + size = PyInt_AsLong(sizeobj); + else { + PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer"); + return NULL; + } + + return mbstreamreader_iread(self, "read", size); +} + +static PyObject * +mbstreamreader_readline(MultibyteStreamReaderObject *self, PyObject *args) +{ + PyObject *sizeobj = NULL; + long size; + + if (!PyArg_ParseTuple(args, "|O:readline", &sizeobj)) + return NULL; + + if (sizeobj == Py_None || sizeobj == NULL) + size = -1; + else if (PyInt_Check(sizeobj)) + size = PyInt_AsLong(sizeobj); + else { + PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer"); + return NULL; + } + + return mbstreamreader_iread(self, "readline", size); +} + +static PyObject * +mbstreamreader_readlines(MultibyteStreamReaderObject *self, PyObject *args) +{ + PyObject *sizehintobj = NULL, *r, *sr; + long sizehint; + + if (!PyArg_ParseTuple(args, "|O:readlines", &sizehintobj)) + return NULL; + + if (sizehintobj == Py_None || sizehintobj == NULL) + sizehint = -1; + else if (PyInt_Check(sizehintobj)) + sizehint = PyInt_AsLong(sizehintobj); + else { + PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer"); + return NULL; + } + + r = mbstreamreader_iread(self, "read", sizehint); + if (r == NULL) + return NULL; + + sr = PyUnicode_Splitlines(r, 1); + Py_DECREF(r); + return sr; +} + +static PyObject * +mbstreamreader_reset(MultibyteStreamReaderObject *self) +{ + self->state.p = NULL; + self->pendingsize = 0; + + Py_INCREF(Py_None); + return Py_None; +} + +static struct PyMethodDef mbstreamreader_methods[] = { + {"read", (PyCFunction)mbstreamreader_read, + METH_VARARGS, NULL}, + {"readline", (PyCFunction)mbstreamreader_readline, + METH_VARARGS, NULL}, + {"readlines", (PyCFunction)mbstreamreader_readlines, + METH_VARARGS, NULL}, + {"reset", (PyCFunction)mbstreamreader_reset, + METH_NOARGS, NULL}, + {NULL, NULL}, +}; + +static void +mbstreamreader_dealloc(MultibyteStreamWriterObject *self) +{ + PyObject_Del(self); +} + +static PyTypeObject MultibyteStreamReader_Type = { + PyObject_HEAD_INIT(NULL) + 0, /* ob_size */ + "MultibyteStreamReader", /* tp_name */ + sizeof(MultibyteStreamReaderObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)mbstreamreader_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + 0, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iterext */ + mbstreamreader_methods, /* tp_methods */ +}; + static int mbstreamwriter_iwrite(MultibyteStreamWriterObject *self, PyObject *unistr) @@ -755,7 +1029,7 @@ static PyObject * __create_codec(PyObject *ignore, PyObject *arg) { - MultibyteCodecObject *self; + MultibyteCodecObject *self; if (!PyCObject_Check(arg)) { PyErr_SetString(PyExc_ValueError, "argument type invalid"); @@ -768,7 +1042,32 @@ self->codec = PyCObject_AsVoidPtr(arg); - return (PyObject *)self; + return (PyObject *)self; +} + +static PyObject * +mbstreamreader_create(MultibyteCodec *codec, + PyObject *stream, const char *errors) +{ + MultibyteStreamReaderObject *self; + + self = PyObject_New(MultibyteStreamReaderObject, + &MultibyteStreamReader_Type); + if (self == NULL) + return NULL; + + self->errors = get_errorcallback(errors); + if (self->errors == NULL) { + Py_DECREF(self); + return NULL; + } + self->codec = codec; + self->stream = stream; + Py_INCREF(stream); + self->state.p = NULL; + self->pendingsize = 0; + + return (PyObject *)self; } static PyObject * |