[KoCo-CVS] [Commit] cjkcodecs/src multibytecodec.c multibytecodec.h
Brought to you by:
perky
From: Hye-Shik C. <pe...@us...> - 2003-05-19 02:53:51
|
perky 03/05/18 19:53:49 Modified: src multibytecodec.c multibytecodec.h Log: Add basic encoder routines. Revision Changes Path 1.2 +209 -885 cjkcodecs/src/multibytecodec.c Index: multibytecodec.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/multibytecodec.c,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- multibytecodec.c 20 Apr 2003 17:35:31 -0000 1.1 +++ multibytecodec.c 19 May 2003 02:53:49 -0000 1.2 @@ -26,13 +26,19 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: multibytecodec.c,v 1.1 2003/04/20 17:35:31 perky Exp $ + * $Id: multibytecodec.c,v 1.2 2003/05/19 02:53:49 perky Exp $ */ #include "Python.h" #include "multibytecodec.h" -PyDoc_STRVAR(multibyteencoder_doc, +typedef struct { + const Py_UNICODE *inbuf, *inbuf_top, *inbuf_end; + unsigned char *outbuf, *outbuf_end; + PyObject *excobj, *outobj; +} MultibyteEncodeBuffer; + +PyDoc_STRVAR(MultibyteCodec_Encode__doc__, "I.encode(unicode, [,errors]) -> (string, length consumed)\n\ \n\ Return an encoded string version of `unicode'. errors may be given to\n\ @@ -41,26 +47,8 @@ 'ignore', 'replace' and 'xmlcharrefreplace' as well as any other name\n\ registered with codecs.register_error that can handle UnicodeEncodeErrors."); -PyDoc_STRVAR(multibytedecoder_doc, -"I.decode(string, [,errors]) -> (unicodeobject, length consumed)\n\ -\n\ -Decodes `string' using I, an MultibyteDecode instance. errors may be given\n\ -to set a different error handling scheme. Default is 'strict' meaning\n\ -that encoding errors raise a UnicodeDecodeError. Other possible values\n\ -are 'ignore' and 'replace' as well as any other name registerd with\n\ -codecs.register_error that is able to handle UnicodeDecodeErrors."); - static char *kwarglist[] = {"input", "errors", NULL}; -static PyObject *multibyteencoder_encode(PyMultibyteEncoderObject *, - PyMultibyteEncoder_Context *, const Py_UNICODE *, - int, PyObject *, int); -static PyObject *multibytedecoder_decode(PyMultibyteDecoderObject *, - PyMultibyteDecoder_Context *, const char *, - int, PyObject *, int); -static PyObject *mbstreamreader_create(PyMultibyteDecoderObject *, - PyObject *, const char *); - static PyObject * make_tuple(PyObject *unicode, int len) { @@ -99,37 +87,8 @@ return PyCodec_LookupError(errors); } -static const char * -multibyte_strerror(int e) -{ - const char *msg; - - switch (e) { - case MBERR_TOOSMALL: - msg = "not output buffer space"; - break; - case MBERR_TOOFEW: - msg = "incomplete multibyte sequence"; - break; - case MBERR_ILLSEQ: - msg = "illegal multibyte encoding sequence"; - break; - case MBERR_UNDEFINED: - msg = "undefined character or no map"; - break; - case MBERR_INTERNAL: - msg = "internal error"; - break; - default: - msg = "unknown error"; - break; - } - - return msg; -} - static int -expand_encodebuffer(PyMultibyteEncoder_Buffer *buf, int esize) +expand_encodebuffer(MultibyteEncodeBuffer *buf, int esize) { int orgpos, orgsize; @@ -152,103 +111,93 @@ } static int -expand_decodebuffer(PyMultibyteDecoder_Buffer *buf, int esize) -{ - int orgpos, orgsize; +multibytecodec_error(PyMultibyteCodec *codec, + PyMultibyteCodec_State *state, + MultibyteEncodeBuffer *buf, + PyObject *errors, int e) +{ + PyObject *retobj = NULL, *retstr = NULL, *argsobj, *tobj; + const char *reason; + int retstrsize, newpos, start, end, esize; + + if (e == MBERR_TOOSMALL) { + RESERVE_ENCODEBUFFER(buf, -1); + return 0; /* retry it */ + } else if (e > 0) { + reason = "illegal multibyte sequence"; + esize = e; + } else { + switch (e) { + case MBERR_TOOFEW: + reason = "incomplete multibyte sequence"; + esize = (int)(buf->inbuf_end - buf->inbuf); + break; + case MBERR_INTERNAL: + PyErr_SetString(PyExc_RuntimeError, "internal codec error"); + return -1; + default: + PyErr_SetString(PyExc_RuntimeError, "unknown runtime error"); + return -1; + } + } - orgpos = (int)(buf->outbuf - PyUnicode_AS_UNICODE(buf->outobj)); - orgsize = PyUnicode_GET_SIZE(buf->outobj); - if (PyUnicode_Resize(&buf->outobj, orgsize + ( - esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize)) == -1) - return -1; + if (errors == ERROR_REPLACE) { + const Py_UNICODE replchar = '?', *inbuf = &replchar; + int r; - buf->outbuf = PyUnicode_AS_UNICODE(buf->outobj) + orgpos; - buf->outbuf_end = PyUnicode_AS_UNICODE(buf->outobj) - + PyUnicode_GET_SIZE(buf->outobj); + for (;;) { + size_t outleft; - return 0; -} -#define RESERVE_DECODEBUFFER(buf, s) { \ - if ((s) < 1 || (buf)->outbuf + (s) > (buf)->outbuf_end) \ - if (expand_decodebuffer(buf, s) == -1) \ - goto errorexit; \ -} + outleft = (size_t)(buf->outbuf_end - buf->outbuf); + r = codec->encode(state, &inbuf, 1, &buf->outbuf, outleft); + if (r == MBERR_TOOSMALL) { + RESERVE_ENCODEBUFFER(buf, -1); + continue; + } else + break; + } -static int -multibyteencoder_error(PyMultibyteEncoderObject *self, - PyMultibyteEncoder_Context *ctx, - PyMultibyteEncoder_Buffer *buf, - PyMultibyteEncoder_Error *err, - const char *reason, - PyObject *errors) -{ - PyObject *argsobj, *retobj = NULL, *tobj; - PyObject *retstr = NULL, *exc = NULL; - int retstrsize, newpos, e, nonctxbuf; + if (r != 0) { + RESERVE_ENCODEBUFFER(buf, 1); + *buf->outbuf++ = '?'; + } + } + if (errors == ERROR_IGNORE) { + buf->inbuf += esize; + return 0; + } - nonctxbuf = (buf->inbuf_top != err->object); + start = (int)(buf->inbuf - buf->inbuf_top); + end = start + esize; - if (errors == ERROR_REPLACE) { - if (self->codec->putrepl != NULL) { - /* we can't put just '?' here. - * consider utf-16 or iso-2022 shifted state */ - do { - e = self->codec->putrepl(self->hdl, ctx, buf); - if (e == MBERR_TOOSMALL) { - RESERVE_ENCODEBUFFER(buf, -1); - continue; - } - } while (0); - - if (!e) return err->end; - else if (e == MBERR_INTERNAL) - return -1; - /* fall through to put '?' for other errors */ - } - RESERVE_ENCODEBUFFER(buf, 1); - *buf->outbuf++ = '?'; - return err->end; - } else if (errors == ERROR_IGNORE) - return err->end; - - if (!nonctxbuf) { - /* use cached exception object if available */ - if (buf->excobj == NULL) { - exc = PyUnicodeEncodeError_Create(self->hdl->encoding, - err->object, err->objlength, err->start, err->end, reason); - if (exc == NULL) - goto errorexit; - buf->excobj = exc; - } else { - exc = buf->excobj; - if (PyUnicodeEncodeError_SetStart(exc, err->start) != 0) - goto errorexit; - if (PyUnicodeEncodeError_SetEnd(exc, err->end) != 0) - goto errorexit; - if (PyUnicodeEncodeError_SetReason(exc, reason) != 0) - goto errorexit; - } + /* use cached exception object if available */ + if (buf->excobj == NULL) { + buf->excobj = PyUnicodeEncodeError_Create(codec->encoding, + buf->inbuf_top, (int)(buf->inbuf_end - buf->inbuf_top), + start, end, reason); + if (buf->excobj == NULL) + goto errorexit; } else { - exc = PyUnicodeEncodeError_Create(self->hdl->encoding, - err->object, err->objlength, 0, err->objlength, reason); - if (exc == NULL) + if (PyUnicodeEncodeError_SetStart(buf->excobj, start) != 0) + goto errorexit; + if (PyUnicodeEncodeError_SetEnd(buf->excobj, end) != 0) + goto errorexit; + if (PyUnicodeEncodeError_SetReason(buf->excobj, reason) != 0) goto errorexit; } if (errors == ERROR_STRICT) { - PyCodec_StrictErrors(exc); + PyCodec_StrictErrors(buf->excobj); goto errorexit; } - /* `errors' is a real python object from here */ - assert(errors > ERROR_MAX); - +#if 0 argsobj = PyTuple_New(1); if (argsobj == NULL) goto errorexit; - PyTuple_SET_ITEM(argsobj, 0, exc); - Py_INCREF(exc); + PyTuple_SET_ITEM(argsobj, 0, buf->excobj); + Py_INCREF(buf->excobj); retobj = PyObject_CallObject(errors, argsobj); Py_DECREF(argsobj); if (retobj == NULL) @@ -257,12 +206,12 @@ if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 || !PyUnicode_Check((tobj = PyTuple_GET_ITEM(retobj, 0))) || !PyInt_Check(PyTuple_GET_ITEM(retobj, 1))) { - PyErr_SetString(PyExc_ValueError, "encoding error handler " - "must return (unicode, int) tuple"); + PyErr_SetString(PyExc_ValueError, + "encoding error handler must return (unicode, int) tuple"); goto errorexit; } - retstr = multibyteencoder_encode(self, ctx, PyUnicode_AS_UNICODE(tobj), + retstr = multibytecodec_encode(self, ic, PyUnicode_AS_UNICODE(tobj), PyUnicode_GET_SIZE(tobj), ERROR_STRICT, 0); if (retstr == NULL) goto errorexit; @@ -274,405 +223,125 @@ buf->outbuf += retstrsize; newpos = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(retobj, 1)); - if (nonctxbuf) { - if (newpos < 0) - newpos += err->objlength; - if (newpos < 0 || newpos > err->objlength) { - PyErr_Format(PyExc_IndexError, - "position %d from error handler out of bounds", newpos); - goto errorexit; - } - newpos -= err->objlength - err->end + err->start; - /* translating to inbuf position: rewind by (err->end - err->start) */ - } else { - if (newpos < 0) - newpos += INBUFLEN(buf); - if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) { - PyErr_Format(PyExc_IndexError, - "position %d from error handler out of bounds", newpos); - goto errorexit; - } + if (newpos < 0) + newpos += (int)(buf->inbuf_end - buf->inbuf_top); + if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) { + PyErr_Format(PyExc_IndexError, + "position %d from error handler out of bounds", newpos); + goto errorexit; + } + switch (self->unitype) { + case UNIINTERNAL_UTF_8: + if (newpos >= start) /* buf->rinbuf is at 'start' position now */ + buf->rinbuf = skipchars_utf8(buf->rinbuf, newpos - start); + else + buf->rinbuf = skipchars_utf8(buf->rinbuf_top, newpos); + break; + case UNIINTERNAL_UCS_SWAPPED: + buf->rinbuf = buf->rinbuf_top + newpos * Py_UNICODE_SIZE; + break; + default: + break; } + buf->inbuf = buf->inbuf_top + newpos; Py_DECREF(retobj); Py_DECREF(retstr); - if (nonctxbuf) { - Py_DECREF(exc); - } - return newpos; + return 0; +#endif errorexit: Py_XDECREF(retobj); Py_XDECREF(retstr); - if (nonctxbuf) { - Py_XDECREF(exc); - } return -1; } -static PyObject * -multibyteencoder_encode(PyMultibyteEncoderObject *self, - PyMultibyteEncoder_Context *ctx, - const Py_UNICODE *data, int datalen, - PyObject *errors, int doflush) -{ - PyMultibyteEncoder_Buffer buf; - int finalsize; - - if (datalen == 0) - return PyString_FromString(""); - - buf.excobj = NULL; - buf.inbuf = buf.inbuf_top = data; - buf.inbuf_end = buf.inbuf_top + datalen; - buf.outobj = PyString_FromStringAndSize(NULL, datalen * 2); - if (buf.outobj == NULL) - goto errorexit; - buf.outbuf = (unsigned char *)PyString_AS_STRING(buf.outobj); - buf.outbuf_end = buf.outbuf + PyString_GET_SIZE(buf.outobj); - - while (1) { - PyMultibyteEncoder_Error err; - int e, newpos; - - if (buf.inbuf < buf.inbuf_end) { - e = self->codec->encode(self->hdl, ctx, &buf, &err); - if (!e) continue; - } else if (buf.inbuf == buf.inbuf_end) { - if (doflush && self->codec->flush != NULL) { - e = self->codec->flush(self->hdl, ctx, &buf, &err); - if (!e) break; - else if (e != MBERR_TOOSMALL) - doflush = 0; - } else - break; - } else break; - - switch (e) { - case MBERR_TOOSMALL: - RESERVE_ENCODEBUFFER(&buf, -1); - break; - case MBERR_INTERNAL: - /* module sets an exception */ - goto errorexit; - default: - newpos = multibyteencoder_error(self, ctx, &buf, &err, - multibyte_strerror(e), errors); - if (err.object != buf.inbuf_top) - PyMem_Free(err.object); - if (newpos == -1) - goto errorexit; - buf.inbuf = buf.inbuf_top + newpos; - if (self->codec->reset != NULL && self->codec->reset(self->hdl, ctx)) - return NULL; - } - } - - finalsize = (int)((char*)buf.outbuf - PyString_AS_STRING(buf.outobj)); - - if (finalsize != PyString_GET_SIZE(buf.outobj)) - if (_PyString_Resize(&buf.outobj, finalsize) == -1) - goto errorexit; - - Py_XDECREF(buf.excobj); - - return buf.outobj; - -errorexit: - Py_XDECREF(buf.excobj); - Py_XDECREF(buf.outobj); - - return NULL; -} - -static PyObject * -multibyteencoder_call(PyMultibyteEncoderObject *self, - PyObject *args, PyObject *kwargs) +static int +multibytecodec_iencode(PyMultibyteCodec *codec, + PyMultibyteCodec_State *state, + MultibyteEncodeBuffer *buf, + PyObject *errors) { - PyMultibyteEncoder_Context ctx; - Py_UNICODE *data; - PyObject *errorcb, *r; - const char *errors = NULL; - int datalen; - - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "u#|z:encode", - kwarglist, &data, &datalen, &errors)) - return NULL; - - errorcb = get_errorcallback(errors); - if (errorcb == NULL) - return NULL; - - if (self->codec->open != NULL && self->codec->open(self->hdl, &ctx)) - goto errorexit_noclose; - - r = multibyteencoder_encode(self, &ctx, data, datalen, errorcb, 1); - if (r == NULL) - goto errorexit; - - if (errorcb > ERROR_MAX) { - Py_DECREF(errorcb); - } - if (self->codec->close != NULL) - self->codec->close(self->hdl, &ctx); - - return make_tuple(r, datalen); - -errorexit: - if (self->codec->close != NULL) - self->codec->close(self->hdl, &ctx); -errorexit_noclose: - if (errorcb > ERROR_MAX) { - Py_DECREF(errorcb); + for (;;) { + int r; + size_t inleft, outleft; + + /* we don't reuse inleft and outleft here. + * error callbacks can relocate the cursor anywhere on buffer */ + inleft = (size_t)buf->inbuf_end - (size_t)buf->inbuf; + outleft = (size_t)(buf->outbuf_end - buf->outbuf); + r = codec->encode(state, &buf->inbuf, inleft, + &buf->outbuf, outleft); + + if (r == 0) + return 0; + else if (multibytecodec_error(codec, state, buf, errors, r)) + return -1; + else if (buf->inbuf >= buf->inbuf_end) + return 0; } - - return NULL; } -static void -multibyteencoder_dealloc(PyMultibyteEncoderObject *self) -{ - if (self->codec != NULL && self->codec->shutdown != NULL) - self->codec->shutdown(self->hdl); - PyMem_Del(self->hdl); - PyObject_Del(self); -} - -static PyObject * -multibyteencoder_repr(PyMultibyteEncoderObject *self) -{ - return PyString_FromFormat( - "<MultibyteEncoder codec='%s' encoding='%s'>", - self->codec->name, self->hdl->encoding); -} - -static PyTypeObject PyMultibyteEncoder_Type = { - PyObject_HEAD_INIT(NULL) - 0, /*ob_size*/ - "MultibyteEncoder", /*tp_name*/ - sizeof(PyMultibyteEncoderObject), /*tp_basicsize*/ - 0, /*tp_itemsize*/ - /* methods */ - (destructor)multibyteencoder_dealloc, /*tp_dealloc*/ - 0, /*tp_print*/ - 0, /*tp_getattr*/ - 0, /*tp_setattr*/ - 0, /*tp_compare*/ - (reprfunc)multibyteencoder_repr, /*tp_repr*/ - 0, /*tp_as_number*/ - 0, /*tp_as_sequence*/ - 0, /*tp_as_mapping*/ - 0, /*tp_hash*/ - (ternaryfunc)multibyteencoder_call, /*tp_call*/ - 0, /*tp_str*/ - PyObject_GenericGetAttr, /*tp_getattro*/ - 0, /*tp_setattro*/ - 0, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT, /*tp_flags*/ - multibyteencoder_doc, /*tp_doc*/ -}; - static int -multibytedecoder_error(PyMultibyteDecoder_Handle *hdl, - PyMultibyteDecoder_Context *ctx, - PyMultibyteDecoder_Buffer *buf, - PyMultibyteDecoder_Error *err, - const char *reason, - PyObject *errors) +multibytecodec_prepencoderbuf(MultibyteEncodeBuffer *buf, + const Py_UNICODE *data, int datalen) { - PyObject *argsobj, *retobj = NULL; - PyObject *exc = NULL, *retuni = NULL; - int retunisize, newpos, nonctxbuf; - - nonctxbuf = (buf->inbuf_top != err->object); - - if (errors == ERROR_REPLACE) { - RESERVE_DECODEBUFFER(buf, 1); - *buf->outbuf++ = Py_UNICODE_REPLACEMENT_CHARACTER; - return err->end; - } else if (errors == ERROR_IGNORE) - return err->end; - - if (!nonctxbuf) { - /* use cached exception object if available */ - if (buf->excobj == NULL) { - exc = PyUnicodeDecodeError_Create( - hdl->encoding, buf->inbuf_top, - INBUFLEN(buf), err->start, err->end, reason); - if (exc == NULL) - goto errorexit; - buf->excobj = exc; - } else { - exc = buf->excobj; - if (PyUnicodeDecodeError_SetStart(exc, err->start) != 0) - goto errorexit; - if (PyUnicodeDecodeError_SetEnd(exc, err->end) != 0) - goto errorexit; - if (PyUnicodeDecodeError_SetReason(exc, reason) != 0) - goto errorexit; - } - } else { - exc = PyUnicodeDecodeError_Create(hdl->encoding, - err->object, err->objlength, 0, err->objlength, reason); - if (exc == NULL) - goto errorexit; - } - - if (errors == ERROR_STRICT) { - PyCodec_StrictErrors(exc); - goto errorexit; - } - - /* `errors' is a real python object from here */ - assert(errors > ERROR_MAX); - - argsobj = PyTuple_New(1); - if (argsobj == NULL) - goto errorexit; - - PyTuple_SET_ITEM(argsobj, 0, exc); - Py_INCREF(exc); - retobj = PyObject_CallObject(errors, argsobj); - Py_DECREF(argsobj); - if (retobj == NULL) - goto errorexit; - - if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 || - !PyUnicode_Check((retuni = PyTuple_GET_ITEM(retobj, 0))) || - !PyInt_Check(PyTuple_GET_ITEM(retobj, 1))) { - PyErr_SetString(PyExc_ValueError, "decoding error handler " - "must return (unicode, int) tuple"); - goto errorexit; - } - - retunisize = PyUnicode_GET_SIZE(retuni); - if (retunisize > 0) { - RESERVE_DECODEBUFFER(buf, retunisize); - memcpy((char *)buf->outbuf, PyUnicode_AS_DATA(retuni), - retunisize * Py_UNICODE_SIZE); - buf->outbuf += retunisize; - } - - newpos = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(retobj, 1)); - if (nonctxbuf) { - if (newpos < 0) - newpos += err->objlength; - if (newpos < 0 || newpos > err->objlength) { - PyErr_Format(PyExc_IndexError, - "position %d from error handler out of bounds", newpos); - goto errorexit; - } - newpos -= err->objlength - err->end + err->start; - /* translating to inbuf position: rewind by (err->end - err->start) */ - } else { - if (newpos < 0) - newpos += INBUFLEN(buf); - if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) { - PyErr_Format(PyExc_IndexError, - "position %d from error handler out of bounds", newpos); - goto errorexit; - } - } - Py_DECREF(retobj); - if (nonctxbuf) { - Py_DECREF(exc); - } - return newpos; + buf->excobj = NULL; + buf->inbuf = buf->inbuf_top = data; + buf->inbuf_end = buf->inbuf_top + datalen; + buf->outobj = PyString_FromStringAndSize(NULL, datalen * 2 + 16); + if (buf->outobj == NULL) + return -1; + buf->outbuf = (unsigned char *)PyString_AS_STRING(buf->outobj); + buf->outbuf_end = buf->outbuf + PyString_GET_SIZE(buf->outobj); -errorexit: - Py_XDECREF(retobj); - if (nonctxbuf) { - Py_XDECREF(exc); - } - return -1; + return 0; } static PyObject * -multibytedecoder_decode(PyMultibyteDecoderObject *self, - PyMultibyteDecoder_Context *ctx, - const char *data, int datalen, - PyObject *errors, int doflush) +multibytecodec_encode(PyMultibyteCodec *codec, + PyMultibyteCodec_State *state, + const Py_UNICODE *data, int datalen, + PyObject *errors) { - PyMultibyteDecoder_Buffer buf; - int finalsize; + MultibyteEncodeBuffer buf; + int finalsize; if (datalen == 0) - return PyUnicode_FromUnicode(NULL, 0); + return PyString_FromString(""); - buf.excobj = NULL; - buf.inbuf = buf.inbuf_top = (unsigned char *)data; - buf.inbuf_end = buf.inbuf_top + datalen; - buf.outobj = PyUnicode_FromUnicode(NULL, datalen); - if (buf.outobj == NULL) + if (multibytecodec_prepencoderbuf(&buf, data, datalen) == -1) goto errorexit; - buf.outbuf = PyUnicode_AS_UNICODE(buf.outobj); - buf.outbuf_end = buf.outbuf + PyUnicode_GET_SIZE(buf.outobj); - - while (1) { - PyMultibyteDecoder_Error err; - int e, newpos; - - if (buf.inbuf < buf.inbuf_end) { - e = self->codec->decode(self->hdl, ctx, &buf, &err); - if (!e) continue; - } else if (buf.inbuf == buf.inbuf_end) { - if (doflush && self->codec->flush != NULL) { - e = self->codec->flush(self->hdl, ctx, &buf, &err); - if (!e) break; - else if (e != MBERR_TOOSMALL) - doflush = 0; - } else - break; - } else break; - switch (e) { - case MBERR_TOOSMALL: - RESERVE_DECODEBUFFER(&buf, -1); - break; - case MBERR_INTERNAL: - /* module sets an exception */ - goto errorexit; - default: - newpos = multibytedecoder_error(self->hdl, ctx, &buf, &err, - multibyte_strerror(e), errors); - if (err.object != buf.inbuf_top) - PyMem_Free(err.object); - if (newpos == -1) - goto errorexit; - buf.inbuf = buf.inbuf_top + newpos; - if (self->codec->reset != NULL && self->codec->reset(self->hdl, ctx)) - return NULL; - } - } + if (multibytecodec_iencode(codec, state, &buf, errors) == -1) + goto errorexit; - finalsize = (int)(buf.outbuf - PyUnicode_AS_UNICODE(buf.outobj)); + finalsize = (int)((char*)buf.outbuf - PyString_AS_STRING(buf.outobj)); - if (finalsize != PyUnicode_GET_SIZE(buf.outobj)) - if (PyUnicode_Resize(&buf.outobj, finalsize) == -1) + if (finalsize != PyString_GET_SIZE(buf.outobj)) + if (_PyString_Resize(&buf.outobj, finalsize) == -1) goto errorexit; Py_XDECREF(buf.excobj); - return buf.outobj; errorexit: Py_XDECREF(buf.excobj); Py_XDECREF(buf.outobj); - return NULL; } static PyObject * -multibytedecoder_call(PyMultibyteDecoderObject *self, +MultibyteCodec_Encode(PyMultibyteCodecObject *self, PyObject *args, PyObject *kwargs) { - PyMultibyteDecoder_Context ctx; - PyObject *errorcb, *r; - const char *errors = NULL; - char *data; - int datalen; + PyMultibyteCodec_State state; + Py_UNICODE *data; + PyObject *errorcb, *r; + const char *errors = NULL; + int datalen; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|z:decode", + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "u#|z:encode", kwarglist, &data, &datalen, &errors)) return NULL; @@ -680,445 +349,100 @@ if (errorcb == NULL) return NULL; - if (self->codec->open != NULL && self->codec->open(self->hdl, &ctx)) - goto errorexit_noclose; - - r = multibytedecoder_decode(self, &ctx, data, datalen, errorcb, 1); + state.p = NULL; + r = multibytecodec_encode(self->codec, &state, data, datalen, errorcb); if (r == NULL) goto errorexit; if (errorcb > ERROR_MAX) { Py_DECREF(errorcb); } - if (self->codec->close != NULL) - self->codec->close(self->hdl, &ctx); - return make_tuple(r, datalen); errorexit: - if (self->codec->close != NULL) - self->codec->close(self->hdl, &ctx); -errorexit_noclose: if (errorcb > ERROR_MAX) { Py_DECREF(errorcb); } - return NULL; } -PyDoc_STRVAR(multibytedecoder_makestream__doc__, -"I.makestream(stream, errors='strict')\n" -"Return an StreamReader instance of `I' multibyte decode."); - -static PyObject * -multibytedecoder_makestream(PyMultibyteDecoderObject *self, - PyObject *args, PyObject *kwargs) -{ - static char *stream_kwarglist[] = {"stream", "errors", NULL}; - PyObject *stream; - char *errors = NULL; - - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|s:makestream", - stream_kwarglist, &stream, &errors)) - return NULL; - - return mbstreamreader_create(self, stream, errors); -} - -static void -multibytedecoder_dealloc(PyMultibyteDecoderObject *self) -{ - if (self->codec != NULL && self->codec->shutdown != NULL) - self->codec->shutdown(self->hdl); - PyMem_Del(self->hdl); - PyObject_Del(self); -} - -static PyObject * -multibytedecoder_repr(PyMultibyteDecoderObject *self) -{ - return PyString_FromFormat( - "<MultibyteDecoder codec='%s' encoding='%s'>", - self->codec->name, self->hdl->encoding); -} - -static struct PyMethodDef multibytedecoder_methods[] = { - {"makestream", (PyCFunction)multibytedecoder_makestream, +static struct PyMethodDef multibytecodec_methods[] = { + {"encode", (PyCFunction)MultibyteCodec_Encode, METH_VARARGS | METH_KEYWORDS, - multibytedecoder_makestream__doc__}, - {NULL, NULL}, -}; - -static PyTypeObject PyMultibyteDecoder_Type = { - PyObject_HEAD_INIT(NULL) - 0, /*ob_size*/ - "MultibyteDecoder", /*tp_name*/ - sizeof(PyMultibyteDecoderObject), /*tp_basicsize*/ - 0, /*tp_itemsize*/ - /* methods */ - (destructor)multibytedecoder_dealloc, /*tp_dealloc*/ - 0, /*tp_print*/ - 0, /*tp_getattr*/ - 0, /*tp_setattr*/ - 0, /*tp_compare*/ - (reprfunc)multibytedecoder_repr, /*tp_repr*/ - 0, /*tp_as_number*/ - 0, /*tp_as_sequence*/ - 0, /*tp_as_mapping*/ - 0, /*tp_hash*/ - (ternaryfunc)multibytedecoder_call, /*tp_call*/ - 0, /*tp_str*/ - PyObject_GenericGetAttr, /*tp_getattro*/ - 0, /*tp_setattro*/ - 0, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT, /*tp_flags*/ - multibytedecoder_doc, /*tp_doc*/ - 0, /*tp_traverse*/ - 0, /*tp_clear*/ - 0, /*tp_richcompare*/ - 0, /*tp_weaklistoffset*/ - 0, /*tp_iter*/ - 0, /*tp_iterext*/ - multibytedecoder_methods, /*tp_methods*/ -}; - -static PyObject * -mbstreamreader_iread(PyMultibyteStreamReaderObject *self, - const char *method, int sizehint) -{ - PyMultibyteDecoder_Buffer buf; - PyObject *cres; - int finalsize = 0, rsize, flushed = 0; - - if (sizehint == 0) - return PyUnicode_FromUnicode(NULL, 0); - - buf.outobj = buf.excobj = NULL; - cres = NULL; - - while (1) { - if (sizehint < 0) - cres = PyObject_CallMethod(self->stream, (char *)method, NULL); - else - cres = PyObject_CallMethod(self->stream, - (char *)method, "i", sizehint); - if (cres == NULL) - goto errorexit; - - if (!PyString_Check(cres)) { - PyErr_SetString(PyExc_TypeError, - "stream function returned a non-string object"); - goto errorexit; - } - - rsize = PyString_GET_SIZE(cres); - buf.inbuf = buf.inbuf_top = (unsigned char *)PyString_AS_STRING(cres); - buf.inbuf_end = buf.inbuf_top + rsize; - if (buf.outobj == NULL) { - buf.outobj = PyUnicode_FromUnicode(NULL, rsize); - if (buf.outobj == NULL) - goto errorexit; - buf.outbuf = PyUnicode_AS_UNICODE(buf.outobj); - buf.outbuf_end = buf.outbuf + PyUnicode_GET_SIZE(buf.outobj); - } - - while (buf.inbuf < buf.inbuf_end || (rsize == 0 && !flushed)) { - PyMultibyteDecoder_Error err; - int e, newpos; - - if (rsize == 0) { /* end of file */ - if (self->codec->flush != NULL) { - e = self->codec->flush(self->hdl, &self->ctx, &buf, &err); - if (e != MBERR_TOOSMALL) flushed = 1; - if (!e) break; - } else { - flushed = 1; - break; - } - } else { - e = self->codec->decode(self->hdl, &self->ctx, &buf, &err); - if (!e) continue; - } - - switch (e) { - case MBERR_TOOSMALL: - RESERVE_DECODEBUFFER(&buf, -1); - break; - case MBERR_INTERNAL: - goto errorexit; - default: - newpos = multibytedecoder_error(self->hdl, &self->ctx, &buf, - &err, multibyte_strerror(e), self->errors); - if (err.object != buf.inbuf_top) - PyMem_Free(err.object); - if (newpos == -1) - goto errorexit; - buf.inbuf = buf.inbuf_top + newpos; - break; - } - } - - finalsize = (int)(buf.outbuf - PyUnicode_AS_UNICODE(buf.outobj)); - - Py_DECREF(cres); - cres = NULL; - - if (sizehint < 0 || finalsize != 0 || rsize == 0) - break; - - sizehint = 1; /* read 1 more byte and retry */ - } - - if (finalsize != PyUnicode_GET_SIZE(buf.outobj)) - if (PyUnicode_Resize(&buf.outobj, finalsize) == -1) - goto errorexit; - - Py_XDECREF(cres); - Py_XDECREF(buf.excobj); - return buf.outobj; - -errorexit: - Py_XDECREF(cres); - Py_XDECREF(buf.excobj); - Py_XDECREF(buf.outobj); - return NULL; -} - -static PyObject * -mbstreamreader_read(PyMultibyteStreamReaderObject *self, PyObject *args) -{ - PyObject *sizeobj = NULL; - long size; - - if (!PyArg_ParseTuple(args, "|O:read", &sizeobj)) - return NULL; - - if (sizeobj == Py_None || sizeobj == NULL) - size = -1; - else if (PyInt_Check(sizeobj)) - size = PyInt_AsLong(sizeobj); - else { - PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer"); - return NULL; - } - - return mbstreamreader_iread(self, "read", size); -} - -static PyObject * -mbstreamreader_readline(PyMultibyteStreamReaderObject *self, PyObject *args) -{ - PyObject *sizeobj = NULL; - long size; - - if (!PyArg_ParseTuple(args, "|O:readline", &sizeobj)) - return NULL; - - if (sizeobj == Py_None || sizeobj == NULL) - size = -1; - else if (PyInt_Check(sizeobj)) - size = PyInt_AsLong(sizeobj); - else { - PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer"); - return NULL; - } - - return mbstreamreader_iread(self, "readline", size); -} - -static PyObject * -mbstreamreader_readlines(PyMultibyteStreamReaderObject *self, PyObject *args) -{ - PyObject *sizehintobj = NULL, *r, *sr; - long sizehint; - - if (!PyArg_ParseTuple(args, "|O:readlines", &sizehintobj)) - return NULL; - - if (sizehintobj == Py_None || sizehintobj == NULL) - sizehint = -1; - else if (PyInt_Check(sizehintobj)) - sizehint = PyInt_AsLong(sizehintobj); - else { - PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer"); - return NULL; - } - - r = mbstreamreader_iread(self, "read", sizehint); - if (r == NULL) - return NULL; - - sr = PyUnicode_Splitlines(r, 1); - Py_DECREF(r); - return sr; -} - -static PyObject * -mbstreamreader_reset(PyMultibyteStreamReaderObject *self, PyObject *args) -{ - if (self->codec->reset != NULL && self->codec->reset(self->hdl, &self->ctx)) - return NULL; - - Py_INCREF(Py_None); - return Py_None; -} - -static struct PyMethodDef mbstreamreader_methods[] = { - {"read", (PyCFunction)mbstreamreader_read, METH_VARARGS, NULL}, - {"readline", (PyCFunction)mbstreamreader_readline, METH_VARARGS, NULL}, - {"readlines", (PyCFunction)mbstreamreader_readlines, METH_VARARGS, NULL}, - {"reset", (PyCFunction)mbstreamreader_reset, METH_NOARGS, NULL}, + MultibyteCodec_Encode__doc__}, {NULL, NULL}, }; static void -mbstreamreader_dealloc(PyMultibyteStreamReaderObject *self) -{ - if (self->codec != NULL && self->codec->close != NULL) - self->codec->close(self->hdl, &self->ctx); - if (self->errors > ERROR_MAX) { - Py_DECREF(self->errors); - } - Py_XDECREF(self->stream); - Py_XDECREF(self->decoder); - PyObject_Del(self); -} - -static PyObject * -mbstreamreader_repr(PyMultibyteStreamReaderObject *self) +multibytecodec_dealloc(PyMultibyteCodecObject *self) { - return PyString_FromFormat( - "<MultibyteStreamReader codec='%s' encoding='%s'>", - self->codec->name, self->hdl->encoding); -} + PyObject_Del(self); +} -static PyTypeObject PyMultibyteStreamReader_Type = { +static PyTypeObject PyMultibyteCodec_Type = { PyObject_HEAD_INIT(NULL) - 0, /*ob_size*/ - "MultibyteStreamReader", /*tp_name*/ - sizeof(PyMultibyteStreamReaderObject), /*tp_basicsize*/ - 0, /*tp_itemsize*/ + 0, /* ob_size */ + "MultibyteCodec", /* tp_name */ + sizeof(PyMultibyteCodecObject), /* tp_basicsize */ + 0, /* tp_itemsize */ /* methods */ - (destructor)mbstreamreader_dealloc, /*tp_dealloc*/ - 0, /*tp_print*/ - 0, /*tp_getattr*/ - 0, /*tp_setattr*/ - 0, /*tp_compare*/ - (reprfunc)mbstreamreader_repr, /*tp_repr*/ - 0, /*tp_as_number*/ - 0, /*tp_as_sequence*/ - 0, /*tp_as_mapping*/ - 0, /*tp_hash*/ - 0, /*tp_call*/ - 0, /*tp_str*/ - PyObject_GenericGetAttr, /*tp_getattro*/ - 0, /*tp_setattro*/ - 0, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT, /*tp_flags*/ - 0, /*tp_doc*/ - 0, /*tp_traverse*/ - 0, /*tp_clear*/ - 0, /*tp_richcompare*/ - 0, /*tp_weaklistoffset*/ - 0, /*tp_iter*/ - 0, /*tp_iterext*/ - mbstreamreader_methods, /*tp_methods*/ + (destructor)multibytecodec_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + 0, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iterext */ + multibytecodec_methods, /* tp_methods */ }; PyObject * -_PyMultibyteEncoder_Create(PyMultibyteEncoder_Codec *codec, - const char *encoding) +__create_codec(PyObject *ignore, PyObject *arg) { - PyMultibyteEncoderObject *self; + PyMultibyteCodecObject *self; - if (strlen(encoding) >= MAXENCODINGLEN) { - PyErr_SetString(PyExc_ValueError, "encoding name too long"); + if (!PyCObject_Check(arg)) { + PyErr_SetString(PyExc_ValueError, "argument type invalid"); return NULL; } - self = PyObject_New(PyMultibyteEncoderObject, &PyMultibyteEncoder_Type); + self = PyObject_New(PyMultibyteCodecObject, &PyMultibyteCodec_Type); if (self == NULL) return NULL; - self->codec = codec; - self->hdl = PyMem_New(PyMultibyteEncoder_Handle, 1); - if (self->hdl == NULL) - return NULL; - strcpy(self->hdl->encoding, encoding); - self->hdl->config = NULL; - - if (codec->init != NULL && codec->init(self->hdl)) { - self->codec = NULL; - Py_DECREF(self); - return NULL; - } + self->codec = PyCObject_AsVoidPtr(arg); - return (PyObject *)self; + return (PyObject *)self; } -PyObject * -_PyMultibyteDecoder_Create(PyMultibyteDecoder_Codec *codec, - const char *encoding) -{ - PyMultibyteDecoderObject *self; - - if (strlen(encoding) >= MAXENCODINGLEN) { - PyErr_SetString(PyExc_ValueError, "encoding name too long"); - return NULL; - } - - self = PyObject_New(PyMultibyteDecoderObject, &PyMultibyteDecoder_Type); - if (self == NULL) - return NULL; - - self->codec = codec; - self->hdl = PyMem_New(PyMultibyteDecoder_Handle, 1); - if (self->hdl == NULL) - return NULL; - strcpy(self->hdl->encoding, encoding); - self->hdl->config = NULL; - - if (codec->init != NULL && codec->init(self->hdl)) { - self->codec = NULL; - Py_DECREF(self); - return NULL; - } - - return (PyObject *)self; -} +static struct PyMethodDef __methods[] = { + {"__create_codec", (PyCFunction)__create_codec, METH_O}, + {NULL, NULL}, +}; -static PyObject * -mbstreamreader_create(PyMultibyteDecoderObject *decoder, - PyObject *stream, const char *errors) +void +initmultibytecodec(void) { - PyMultibyteStreamReaderObject *self; - - self = PyObject_New(PyMultibyteStreamReaderObject, - &PyMultibyteStreamReader_Type); - if (self == NULL) - return NULL; - - self->errors = get_errorcallback(errors); - if (self->errors == NULL) { - self->decoder = self->stream = NULL; - Py_DECREF(self); - return NULL; - } - self->hdl = decoder->hdl; - self->decoder = (PyObject *)decoder; - Py_INCREF(decoder); - self->stream = stream; - Py_INCREF(stream); - - self->codec = decoder->codec; - if (self->codec->open != NULL) - if (self->codec->open(self->hdl, &self->ctx)) { - self->codec = NULL; - Py_DECREF(self); - return NULL; - } + Py_InitModule("multibytecodec", __methods); - return (PyObject *)self; + if (PyErr_Occurred()) + Py_FatalError("can't initialize the multibytecodec module"); } /* 1.3 +7 -7 cjkcodecs/src/multibytecodec.h Index: multibytecodec.h =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/multibytecodec.h,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- multibytecodec.h 18 May 2003 23:21:29 -0000 1.2 +++ multibytecodec.h 19 May 2003 02:53:49 -0000 1.3 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: multibytecodec.h,v 1.2 2003/05/18 23:21:29 perky Exp $ + * $Id: multibytecodec.h,v 1.3 2003/05/19 02:53:49 perky Exp $ */ #ifndef _PYTHON_MULTIBYTECODEC_H_ @@ -41,11 +41,11 @@ } PyMultibyteCodec_State; typedef int (*mbencode_func)(PyMultibyteCodec_State *state, - const Py_UNICODE **inbuf, int *inleft, - unsigned char **outbuf, int *outleft); + const Py_UNICODE **inbuf, int inleft, + unsigned char **outbuf, int outleft); typedef int (*mbdecode_func)(PyMultibyteCodec_State *state, - const unsigned char **inbuf, int *inleft, - Py_UNICODE **outbuf, int *outleft); + const unsigned char **inbuf, int inleft, + Py_UNICODE **outbuf, int outleft); typedef struct { const char *encoding; @@ -62,7 +62,7 @@ typedef struct { PyObject_HEAD PyMultibyteCodec *codec; - PyMultibyteCodec_State *state; + PyMultibyteCodec_State state; unsigned char pending[MAXPENDING]; int pendingsize; PyObject *stream, *errors; @@ -71,7 +71,7 @@ typedef struct { PyObject_HEAD PyMultibyteCodec *codec; - PyMultibyteCodec_State *state; + PyMultibyteCodec_State state; Py_UNICODE pending[MAXPENDING]; int pendingsize; PyObject *stream, *errors; |