Thread: [KoCo-CVS] [Commit] cjkcodecs/src codeccommon.h multibytecodec.c multibytecodec.h
Brought to you by:
perky
From: Hye-Shik C. <pe...@us...> - 2003-05-20 04:11:44
|
perky 03/05/19 21:11:44 Modified: src codeccommon.h multibytecodec.c multibytecodec.h Log: Implement StreamWriter. Revision Changes Path 1.4 +5 -5 cjkcodecs/src/codeccommon.h Index: codeccommon.h =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/codeccommon.h,v retrieving revision 1.3 retrieving revision 1.4 diff -u -r1.3 -r1.4 --- codeccommon.h 19 May 2003 23:07:12 -0000 1.3 +++ codeccommon.h 20 May 2003 04:11:44 -0000 1.4 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: codeccommon.h,v 1.3 2003/05/19 23:07:12 perky Exp $ + * $Id: codeccommon.h,v 1.4 2003/05/20 04:11:44 perky Exp $ */ #include "Python.h" @@ -40,16 +40,16 @@ #define ENCODER(encoding) \ static int encoding##_encode( \ - PyMultibyteCodec_State *state, \ + MultibyteCodec_State *state, \ const Py_UNICODE **inbuf, size_t inleft, \ unsigned char **outbuf, size_t outleft) #define DECODER(encoding) \ static int encoding##_decode( \ - PyMultibyteCodec_State *state, \ + MultibyteCodec_State *state, \ const unsigned char **inbuf, size_t inleft, \ Py_UNICODE **outbuf, size_t outleft) #define CODECDEF(encoding) \ - static PyMultibyteCodec __codec = { \ + static MultibyteCodec __codec = { \ #encoding, encoding##_encode, encoding##_decode \ }; #define NOMETHODS(name) \ @@ -106,7 +106,7 @@ } static PyObject * -createcodec(PyObject *cofunc, PyMultibyteCodec *codec) +createcodec(PyObject *cofunc, MultibyteCodec *codec) { PyObject *args, *r; 1.8 +206 -20 cjkcodecs/src/multibytecodec.c Index: multibytecodec.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/multibytecodec.c,v retrieving revision 1.7 retrieving revision 1.8 diff -u -r1.7 -r1.8 --- multibytecodec.c 19 May 2003 22:56:37 -0000 1.7 +++ multibytecodec.c 20 May 2003 04:11:44 -0000 1.8 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: multibytecodec.c,v 1.7 2003/05/19 22:56:37 perky Exp $ + * $Id: multibytecodec.c,v 1.8 2003/05/20 04:11:44 perky Exp $ */ #include "Python.h" @@ -45,7 +45,7 @@ } MultibyteDecodeBuffer; PyDoc_STRVAR(MultibyteCodec_Encode__doc__, -"I.encode(unicode, [,errors]) -> (string, length consumed)\n\ +"I.encode(unicode[, errors]) -> (string, length consumed)\n\ \n\ Return an encoded string version of `unicode'. errors may be given to\n\ set a different error handling scheme. Default is 'strict' meaning that\n\ @@ -54,7 +54,7 @@ registered with codecs.register_error that can handle UnicodeEncodeErrors."); PyDoc_STRVAR(MultibyteCodec_Decode__doc__, -"I.decode(string, [,errors]) -> (unicodeobject, length consumed)\n\ +"I.decode(string[, errors]) -> (unicodeobject, length consumed)\n\ \n\ Decodes `string' using I, an MultibyteCodec instance. errors may be given\n\ to set a different error handling scheme. Default is 'strict' meaning\n\ @@ -62,10 +62,16 @@ are 'ignore' and 'replace' as well as any other name registerd with\n\ codecs.register_error that is able to handle UnicodeDecodeErrors."); +PyDoc_STRVAR(MultibyteCodec_StreamWriter__doc__, +"I.StreamWriter(stream[, errors]) -> StreamWriter instance"); + static char *codeckwarglist[] = {"input", "errors", NULL}; +static char *streamkwarglist[] = {"stream", "errors", NULL}; -static PyObject *multibytecodec_encode(PyMultibyteCodec *, - PyMultibyteCodec_State *, const Py_UNICODE *, int, PyObject *); +static PyObject *multibytecodec_encode(MultibyteCodec *, + MultibyteCodec_State *, const Py_UNICODE *, int, PyObject *); +static PyObject *mbstreamwriter_create(MultibyteCodec *, + PyObject *, const char *); static PyObject * make_tuple(PyObject *unicode, int len) @@ -152,8 +158,8 @@ } static int -multibytecodec_encerror(PyMultibyteCodec *codec, - PyMultibyteCodec_State *state, +multibytecodec_encerror(MultibyteCodec *codec, + MultibyteCodec_State *state, MultibyteEncodeBuffer *buf, PyObject *errors, int e) { @@ -281,8 +287,8 @@ } static int -multibytecodec_decerror(PyMultibyteCodec *codec, - PyMultibyteCodec_State *state, +multibytecodec_decerror(MultibyteCodec *codec, + MultibyteCodec_State *state, MultibyteDecodeBuffer *buf, PyObject *errors, int e) { @@ -387,8 +393,8 @@ } static PyObject * -multibytecodec_encode(PyMultibyteCodec *codec, - PyMultibyteCodec_State *state, +multibytecodec_encode(MultibyteCodec *codec, + MultibyteCodec_State *state, const Py_UNICODE *data, int datalen, PyObject *errors) { @@ -438,10 +444,10 @@ } static PyObject * -MultibyteCodec_Encode(PyMultibyteCodecObject *self, +MultibyteCodec_Encode(MultibyteCodecObject *self, PyObject *args, PyObject *kwargs) { - PyMultibyteCodec_State state; + MultibyteCodec_State state; Py_UNICODE *data; PyObject *errorcb, *r; const char *errors = NULL; @@ -473,10 +479,10 @@ } static PyObject * -MultibyteCodec_Decode(PyMultibyteCodecObject *self, +MultibyteCodec_Decode(MultibyteCodecObject *self, PyObject *args, PyObject *kwargs) { - PyMultibyteCodec_State state; + MultibyteCodec_State state; MultibyteDecodeBuffer buf; PyObject *errorcb; const char *data, *errors = NULL; @@ -541,6 +547,20 @@ return NULL; } +static PyObject * +MultibyteCodec_StreamWriter(MultibyteCodecObject *self, + PyObject *args, PyObject *kwargs) +{ + PyObject *stream; + char *errors = NULL; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|s:StreamWriter", + streamkwarglist, &stream, &errors)) + return NULL; + + return mbstreamwriter_create(self->codec, stream, errors); +} + static struct PyMethodDef multibytecodec_methods[] = { {"encode", (PyCFunction)MultibyteCodec_Encode, METH_VARARGS | METH_KEYWORDS, @@ -548,20 +568,23 @@ {"decode", (PyCFunction)MultibyteCodec_Decode, METH_VARARGS | METH_KEYWORDS, MultibyteCodec_Decode__doc__}, + {"StreamWriter",(PyCFunction)MultibyteCodec_StreamWriter, + METH_VARARGS | METH_KEYWORDS, + MultibyteCodec_StreamWriter__doc__}, {NULL, NULL}, }; static void -multibytecodec_dealloc(PyMultibyteCodecObject *self) +multibytecodec_dealloc(MultibyteCodecObject *self) { PyObject_Del(self); } -static PyTypeObject PyMultibyteCodec_Type = { +static PyTypeObject MultibyteCodec_Type = { PyObject_HEAD_INIT(NULL) 0, /* ob_size */ "MultibyteCodec", /* tp_name */ - sizeof(PyMultibyteCodecObject), /* tp_basicsize */ + sizeof(MultibyteCodecObject), /* tp_basicsize */ 0, /* tp_itemsize */ /* methods */ (destructor)multibytecodec_dealloc, /* tp_dealloc */ @@ -590,23 +613,186 @@ multibytecodec_methods, /* tp_methods */ }; +static int +mbstreamwriter_iwrite(MultibyteStreamWriterObject *self, + PyObject *unistr) +{ + PyObject *wr, *r = NULL; + int rsize; + + if (!PyUnicode_Check(unistr)) { + PyErr_SetString(PyExc_TypeError, + "only unicode objects are encodable."); + return -1; + } + + rsize = PyUnicode_GET_SIZE(unistr); + if (rsize == 0) + return 0; + + r = multibytecodec_encode(self->codec, &self->state, + (Py_UNICODE *)PyUnicode_AS_UNICODE(unistr), rsize, self->errors); + if (r == NULL) + goto errorexit; + + wr = PyObject_CallMethod(self->stream, "write", "O", r); + if (wr == NULL) + goto errorexit; + + Py_DECREF(r); + Py_DECREF(wr); + return 0; + +errorexit: + Py_XDECREF(r); + return -1; +} + +static PyObject * +mbstreamwriter_write(MultibyteStreamWriterObject *self, PyObject *args) +{ + PyObject *strobj; + + if (!PyArg_ParseTuple(args, "O:write", &strobj)) + return NULL; + + if (mbstreamwriter_iwrite(self, strobj)) + return NULL; + else { + Py_INCREF(Py_None); + return Py_None; + } +} + +static PyObject * +mbstreamwriter_writelines(MultibyteStreamWriterObject *self, PyObject *args) +{ + PyObject *lines, *strobj; + int i, r; + + if (!PyArg_ParseTuple(args, "O:writelines", &lines)) + return NULL; + + if (!PySequence_Check(lines)) { + PyErr_SetString(PyExc_TypeError, "arg must be a sequence object"); + return NULL; + } + + for (i = 0; i < PySequence_Length(lines); i++) { + /* length can be changed even within this loop */ + strobj = PySequence_GetItem(lines, i); + if (strobj == NULL) + return NULL; + + r = mbstreamwriter_iwrite(self, strobj); + Py_DECREF(strobj); + if (r == -1) + return NULL; + } + + Py_INCREF(Py_None); + return Py_None; +} + +static PyObject * +mbstreamwriter_reset(MultibyteStreamWriterObject *self) +{ + self->state.p = NULL; + + Py_INCREF(Py_None); + return Py_None; +} + +static void +mbstreamwriter_dealloc(MultibyteStreamWriterObject *self) +{ + PyObject_Del(self); +} + +static struct PyMethodDef mbstreamwriter_methods[] = { + {"write", (PyCFunction)mbstreamwriter_write, + METH_VARARGS, NULL}, + {"writelines", (PyCFunction)mbstreamwriter_writelines, + METH_VARARGS, NULL}, + {"reset", (PyCFunction)mbstreamwriter_reset, + METH_NOARGS, NULL}, + {NULL, NULL}, +}; + +static PyTypeObject MultibyteStreamWriter_Type = { + PyObject_HEAD_INIT(NULL) + 0, /* ob_size */ + "MultibyteStreamWriter", /* tp_name */ + sizeof(MultibyteStreamWriterObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)mbstreamwriter_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + 0, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iterext */ + mbstreamwriter_methods, /* tp_methods */ +}; + PyObject * __create_codec(PyObject *ignore, PyObject *arg) { - PyMultibyteCodecObject *self; + MultibyteCodecObject *self; if (!PyCObject_Check(arg)) { PyErr_SetString(PyExc_ValueError, "argument type invalid"); return NULL; } - self = PyObject_New(PyMultibyteCodecObject, &PyMultibyteCodec_Type); + self = PyObject_New(MultibyteCodecObject, &MultibyteCodec_Type); if (self == NULL) return NULL; self->codec = PyCObject_AsVoidPtr(arg); return (PyObject *)self; +} + +static PyObject * +mbstreamwriter_create(MultibyteCodec *codec, + PyObject *stream, const char *errors) +{ + MultibyteStreamWriterObject *self; + + self = PyObject_New(MultibyteStreamWriterObject, + &MultibyteStreamWriter_Type); + if (self == NULL) + return NULL; + + self->errors = get_errorcallback(errors); + if (self->errors == NULL) { + Py_DECREF(self); + return NULL; + } + self->codec = codec; + self->stream = stream; + Py_INCREF(stream); + self->state.p = NULL; + + return (PyObject *)self; } static struct PyMethodDef __methods[] = { 1.5 +15 -13 cjkcodecs/src/multibytecodec.h Index: multibytecodec.h =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/multibytecodec.h,v retrieving revision 1.4 retrieving revision 1.5 diff -u -r1.4 -r1.5 --- multibytecodec.h 19 May 2003 06:06:38 -0000 1.4 +++ multibytecodec.h 20 May 2003 04:11:44 -0000 1.5 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: multibytecodec.h,v 1.4 2003/05/19 06:06:38 perky Exp $ + * $Id: multibytecodec.h,v 1.5 2003/05/20 04:11:44 perky Exp $ */ #ifndef _PYTHON_MULTIBYTECODEC_H_ @@ -38,12 +38,12 @@ typedef union { unsigned long i; void *p; -} PyMultibyteCodec_State; +} MultibyteCodec_State; -typedef int (*mbencode_func)(PyMultibyteCodec_State *state, +typedef int (*mbencode_func)(MultibyteCodec_State *state, const Py_UNICODE **inbuf, size_t inleft, unsigned char **outbuf, size_t outleft); -typedef int (*mbdecode_func)(PyMultibyteCodec_State *state, +typedef int (*mbdecode_func)(MultibyteCodec_State *state, const unsigned char **inbuf, size_t inleft, Py_UNICODE **outbuf, size_t outleft); @@ -51,31 +51,33 @@ const char *encoding; mbencode_func encode; mbdecode_func decode; -} PyMultibyteCodec; +} MultibyteCodec; typedef struct { PyObject_HEAD - PyMultibyteCodec *codec; -} PyMultibyteCodecObject; + MultibyteCodec *codec; +} MultibyteCodecObject; #define MAXPENDING 8 typedef struct { PyObject_HEAD - PyMultibyteCodec *codec; - PyMultibyteCodec_State state; + MultibyteCodec *codec; + MultibyteCodec_State state; unsigned char pending[MAXPENDING]; int pendingsize; PyObject *stream, *errors; -} PyMultibyteStreamReaderObject; +} MultibyteStreamReaderObject; typedef struct { PyObject_HEAD - PyMultibyteCodec *codec; - PyMultibyteCodec_State state; + MultibyteCodec *codec; + MultibyteCodec_State state; +#if 0 /* StreamWriter doesn't buffer on the current implementation. */ Py_UNICODE pending[MAXPENDING]; int pendingsize; +#endif PyObject *stream, *errors; -} PyMultibyteStreamWriterObject; +} MultibyteStreamWriterObject; /* positive values for illegal sequences */ #define MBERR_TOOSMALL (-1) /* insufficient output buffer space */ |
From: Hye-Shik C. <pe...@us...> - 2003-07-01 19:35:16
|
perky 03/07/01 12:33:43 Modified: src codeccommon.h multibytecodec.c multibytecodec.h Log: - Prepare buffering encoder framework for jisx0213 and surrogates - Set '\U+xxxxxxxx' instead of '\u..' on python versions under 2.2 Revision Changes Path 1.14 +2 -1 cjkcodecs/src/codeccommon.h Index: codeccommon.h =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/codeccommon.h,v retrieving revision 1.13 retrieving revision 1.14 diff -u -r1.13 -r1.14 --- codeccommon.h 20 Jun 2003 17:22:59 -0000 1.13 +++ codeccommon.h 1 Jul 2003 19:33:43 -0000 1.14 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: codeccommon.h,v 1.13 2003/06/20 17:22:59 perky Exp $ + * $Id: codeccommon.h,v 1.14 2003/07/01 19:33:43 perky Exp $ */ #include "Python.h" @@ -50,6 +50,7 @@ #define ENCODER_RESET(encoding) \ static int encoding##_encode_reset( \ MultibyteCodec_State *state, \ + const Py_UNICODE **inbuf, size_t inleft, \ unsigned char **outbuf, size_t outleft) #define DECODER_INIT(encoding) \ 1.18 +16 -9 cjkcodecs/src/multibytecodec.c Index: multibytecodec.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/multibytecodec.c,v retrieving revision 1.17 retrieving revision 1.18 diff -u -r1.17 -r1.18 --- multibytecodec.c 6 Jun 2003 06:56:01 -0000 1.17 +++ multibytecodec.c 1 Jul 2003 19:33:43 -0000 1.18 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: multibytecodec.c,v 1.17 2003/06/06 06:56:01 perky Exp $ + * $Id: multibytecodec.c,v 1.18 2003/07/01 19:33:43 perky Exp $ */ #include "Python.h" @@ -236,11 +236,18 @@ end = start + esize; #ifdef NO_ERROR_CALLBACKS - if (esize == 1) - PyErr_Format(PyExc_UnicodeError, - "'%s' codec can't encode byte '\\u%04x' in position %d: %s", - codec->encoding, *buf->inbuf, start, reason); - else + if (esize == 1) { +#if Py_UNICODE_SIZE == 4 + if (*buf->inbuf >= 0x10000) + PyErr_Format(PyExc_UnicodeError, + "'%s' codec can't encode byte '\\U%08x' in position %d: %s", + codec->encoding, *buf->inbuf, start, reason); + else +#endif + PyErr_Format(PyExc_UnicodeError, + "'%s' codec can't encode byte '\\u%04x' in position %d: %s", + codec->encoding, *buf->inbuf, start, reason); + } else PyErr_Format(PyExc_UnicodeError, "'%s' codec can't encode bytes in position %d-%d: %s", codec->encoding, start, end, reason); @@ -480,7 +487,7 @@ size_t outleft; outleft = (size_t)(buf.outbuf_end - buf.outbuf); - r = codec->encreset(state, &buf.outbuf, outleft); + r = codec->encreset(state, NULL, 0, &buf.outbuf, outleft); if (r == 0) break; else if (multibytecodec_encerror(codec, state, &buf, errors, r)) @@ -783,7 +790,7 @@ /* we can't assume that pendingsize is still 0 here. because * this function can be called recursively from error callback */ npendings = (size_t)(buf.inbuf_end - buf.inbuf); - if (npendings + self->pendingsize > MAXPENDING) { + if (npendings + self->pendingsize > MAXDECPENDING) { PyErr_SetString(PyExc_RuntimeError, "pending buffer overflow"); goto errorexit; @@ -1062,7 +1069,7 @@ rsbuf_cur = rsbuf_top + rsbufnc; r = self->codec->encreset(&self->state, - &rsbuf_cur, rsbufsiz - rsbufnc); + NULL, 0, &rsbuf_cur, rsbufsiz - rsbufnc); rsbufnc = (size_t)(rsbuf_cur - rsbuf_top); if (r == MBERR_TOOSMALL) continue; 1.8 +7 -3 cjkcodecs/src/multibytecodec.h Index: multibytecodec.h =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/multibytecodec.h,v retrieving revision 1.7 retrieving revision 1.8 diff -u -r1.7 -r1.8 --- multibytecodec.h 31 May 2003 11:50:19 -0000 1.7 +++ multibytecodec.h 1 Jul 2003 19:33:43 -0000 1.8 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: multibytecodec.h,v 1.7 2003/05/31 11:50:19 perky Exp $ + * $Id: multibytecodec.h,v 1.8 2003/07/01 19:33:43 perky Exp $ */ #ifndef _PYTHON_MULTIBYTECODEC_H_ @@ -45,6 +45,7 @@ unsigned char **outbuf, size_t outleft); typedef int (*mbencodeinit_func)(MultibyteCodec_State *state); typedef int (*mbencodereset_func)(MultibyteCodec_State *state, + const Py_UNICODE **inbuf, size_t inleft, unsigned char **outbuf, size_t outleft); typedef int (*mbdecode_func)(MultibyteCodec_State *state, const unsigned char **inbuf, size_t inleft, @@ -67,20 +68,23 @@ MultibyteCodec *codec; } MultibyteCodecObject; -#define MAXPENDING 8 +#define MAXDECPENDING 8 typedef struct { PyObject_HEAD MultibyteCodec *codec; MultibyteCodec_State state; - unsigned char pending[MAXPENDING]; + unsigned char pending[MAXDECPENDING]; int pendingsize; PyObject *stream, *errors; } MultibyteStreamReaderObject; +#define MAXENCPENDING 2 typedef struct { PyObject_HEAD MultibyteCodec *codec; MultibyteCodec_State state; + Py_UNICODE pending[MAXENCPENDING]; + int pendingsize; PyObject *stream, *errors; } MultibyteStreamWriterObject; |