Thread: [KoCo-CVS] [Commit] iconvcodec _iconv_codec.c
Brought to you by:
perky
From: Hye-Shik C. <pe...@us...> - 2003-04-20 21:40:08
|
perky 03/04/20 14:40:06 Modified: . _iconv_codec.c Log: Python/win32 declares Py_UNICODE_USING as null macro Revision Changes Path 1.4 +2 -2 iconvcodec/_iconv_codec.c Index: _iconv_codec.c =================================================================== RCS file: /cvsroot/koco/iconvcodec/_iconv_codec.c,v retrieving revision 1.3 retrieving revision 1.4 diff -u -r1.3 -r1.4 --- _iconv_codec.c 20 Apr 2003 20:45:34 -0000 1.3 +++ _iconv_codec.c 20 Apr 2003 21:40:05 -0000 1.4 @@ -24,14 +24,14 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: _iconv_codec.c,v 1.3 2003/04/20 20:45:34 perky Exp $ + * $Id: _iconv_codec.c,v 1.4 2003/04/20 21:40:05 perky Exp $ */ #include "Python.h" #include <iconv.h> #include "_iconv_codec_compat.h" -#if Py_USING_UNICODE +#ifdef Py_USING_UNICODE # if Py_UNICODE_SIZE == 2 # define UCS_N "UCS-2" # define MBENCODED_LENGTH_MAX 4 |
From: Hye-Shik C. <pe...@us...> - 2003-06-11 05:55:30
|
perky 03/06/10 22:55:29 Modified: . _iconv_codec.c Log: Sigh, it's not cjkcodecs here! :S Revision Changes Path 1.6 +2 -2 iconvcodec/_iconv_codec.c Index: _iconv_codec.c =================================================================== RCS file: /cvsroot/koco/iconvcodec/_iconv_codec.c,v retrieving revision 1.5 retrieving revision 1.6 diff -u -r1.5 -r1.6 --- _iconv_codec.c 11 Jun 2003 05:51:11 -0000 1.5 +++ _iconv_codec.c 11 Jun 2003 05:55:28 -0000 1.6 @@ -24,7 +24,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: _iconv_codec.c,v 1.5 2003/06/11 05:51:11 perky Exp $ + * $Id: _iconv_codec.c,v 1.6 2003/06/11 05:55:28 perky Exp $ */ #include "Python.h" @@ -1060,7 +1060,7 @@ | (Py_UNICODE)(ubuf[5] ^ 0x80); ubuf += 6; } else - return 1; + goto ilseq; #endif } |
From: Hye-Shik C. <pe...@us...> - 2003-06-11 11:06:53
|
perky 03/06/11 04:06:50 Modified: . _iconv_codec.c Log: Use 'UTF-16' internal encodings when Py_UNICODE_SIZE == 2. Revision Changes Path 1.7 +10 -6 iconvcodec/_iconv_codec.c Index: _iconv_codec.c =================================================================== RCS file: /cvsroot/koco/iconvcodec/_iconv_codec.c,v retrieving revision 1.6 retrieving revision 1.7 diff -u -r1.6 -r1.7 --- _iconv_codec.c 11 Jun 2003 05:55:28 -0000 1.6 +++ _iconv_codec.c 11 Jun 2003 11:06:50 -0000 1.7 @@ -24,7 +24,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: _iconv_codec.c,v 1.6 2003/06/11 05:55:28 perky Exp $ + * $Id: _iconv_codec.c,v 1.7 2003/06/11 11:06:50 perky Exp $ */ #include "Python.h" @@ -33,11 +33,9 @@ #ifdef Py_USING_UNICODE # if Py_UNICODE_SIZE == 2 -# define UCS_N "UCS-2" # define MBENCODED_LENGTH_MAX 4 # define _Py_UNICODE_SWAP(c) (Py_UNICODE)((c)>>8 | (c)<<8) # elif Py_UNICODE_SIZE == 4 -# define UCS_N "UCS-4" # define MBENCODED_LENGTH_MAX 6 # define _Py_UNICODE_SWAP(c) (Py_UNICODE)((c)>>24 | \ ((c)&0x00ff0000)>>8 | \ @@ -81,9 +79,15 @@ const char *encoding; uniinternal_type_t type; } uniinternal_modes[] = { - {UCS_N "-INTERNAL", UNIINTERNAL_UCS}, /* GNU libiconv, FreeBSD, APR */ - {UCS_N ENDIANSUFX, UNIINTERNAL_UCS}, /* SunOS */ - {UCS_N, UNIINTERNAL_UCS}, /* GLIBC */ +#if Py_UNICODE_SIZE == 2 +/* Py_UNICODE* may contain surrogate characters */ + {"UTF-16" ENDIANSUFX, UNIINTERNAL_UCS}, + {"UTF16" ENDIANSUFX, UNIINTERNAL_UCS}, +#else + {"UCS-4-INTERNAL", UNIINTERNAL_UCS}, /* GNU libiconv, FreeBSD, APR */ + {"UCS-4" ENDIANSUFX, UNIINTERNAL_UCS}, /* SunOS */ + {"UCS-4", UNIINTERNAL_UCS}, /* GLIBC */ +#endif {"UTF-8", UNIINTERNAL_UTF_8}, /* SunOS(CJK) */ {"\0", UNIINTERNAL_DONTUSE}, }; |
From: Hye-Shik C. <pe...@us...> - 2003-06-11 12:01:59
|
perky 03/06/11 05:01:58 Modified: . _iconv_codec.c Log: Utilize UCS-2 Surrogate-Pair to support ISO-10646 extended planes Revision Changes Path 1.8 +67 -37 iconvcodec/_iconv_codec.c Index: _iconv_codec.c =================================================================== RCS file: /cvsroot/koco/iconvcodec/_iconv_codec.c,v retrieving revision 1.7 retrieving revision 1.8 diff -u -r1.7 -r1.8 --- _iconv_codec.c 11 Jun 2003 11:06:50 -0000 1.7 +++ _iconv_codec.c 11 Jun 2003 12:01:57 -0000 1.8 @@ -24,7 +24,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: _iconv_codec.c,v 1.7 2003/06/11 11:06:50 perky Exp $ + * $Id: _iconv_codec.c,v 1.8 2003/06/11 12:01:57 perky Exp $ */ #include "Python.h" @@ -57,6 +57,14 @@ typedef const char **iconv_arg2_t; #endif +#ifndef ucs4_t +# ifdef uint32_t +typedef uint32_t ucs4_t; +# else +typedef unsigned long ucs4_t; +# endif +#endif + #define ERROR_STRICT (PyObject *)(1) #define ERROR_IGNORE (PyObject *)(2) #define ERROR_REPLACE (PyObject *)(3) @@ -570,22 +578,30 @@ return -1; buf->rinbuf_top = buf->rinbuf = rinbuf; for (; buf->inbuf < buf->inbuf_end; buf->inbuf++) { - Py_UNICODE code = *buf->inbuf; + ucs4_t code = *buf->inbuf; int size; if (code < 0x80) size = 1; else if (code < 0x800) size = 2; + else { #if Py_UNICODE_SIZE == 2 - else size = 3; /* XXX put surrogate characters for EMP! */ -#else - else if (code < 0x10000) size = 3; - else if (code < 0x200000) size = 4; - else if (code < 0x4000000) size = 5; - else size = 6; -#endif + /* Unfold a Surrogate-Pair */ + if (code >= 0xd800 && code < 0xdc00 && + buf->inbuf+1 < buf->inbuf_end && + buf->inbuf[1] >= 0xdc00 && + buf->inbuf[1] < 0xe000) { + code = 0x10000 + ((code - 0xd800) << 10) + + (buf->inbuf[1] - 0xdc00); + buf->inbuf++; + } +#endif + if (code < 0x10000) size = 3; + else if (code < 0x200000) size = 4; + else if (code < 0x4000000) size = 5; + else size = 6; + } switch (size) { -#if Py_UNICODE_SIZE == 4 case 6: rinbuf[5] = 0x80 | (code & 0x3f); code = code >> 6; @@ -601,7 +617,6 @@ code = code >> 6; code |= 0x10000; /* FALLTHROUGH */ -#endif case 3: rinbuf[2] = 0x80 | (code & 0x3f); code = code >> 6; @@ -1000,10 +1015,11 @@ if (nch > 0) RESERVE_DECODEBUFFER(buf, nch) for (ubuf = ubuf_top; ubuf < ubuf_end;) { - int uleft = (int)(ubuf_end - ubuf); + int uleft = (int)(ubuf_end - ubuf); + ucs4_t code; if (*ubuf < 0x80) { - *buf->outbuf++ = (unsigned char)*ubuf++; + code = (unsigned char)*ubuf++; } else if (*ubuf < 0xc2) { ilseq: PyErr_SetString(PyExc_RuntimeError, "iconv returned illegal utf-8 sequence"); @@ -1011,32 +1027,28 @@ } else if (*ubuf < 0xe0) { if (uleft < 2 || !((ubuf[1] ^ 0x80) < 0x40)) goto ilseq; - *buf->outbuf++ = ((Py_UNICODE)(ubuf[0] & 0x1f) << 6) - | (Py_UNICODE)(ubuf[1] ^ 0x80); + code = ((Py_UNICODE)(ubuf[0] & 0x1f) << 6) + | (Py_UNICODE)(ubuf[1] ^ 0x80); ubuf += 2; } else if (*ubuf < 0xf0) { if (uleft < 3 || !((ubuf[1] ^ 0x80) < 0x40 && (ubuf[2] ^ 0x80) < 0x40 && (ubuf[0] >= 0xe1 || ubuf[1] >= 0xa0))) goto ilseq; - *buf->outbuf++ = ((Py_UNICODE)(ubuf[0] & 0x0f) << 12) - | ((Py_UNICODE)(ubuf[1] ^ 0x80) << 6) - | (Py_UNICODE)(ubuf[2] ^ 0x80); + code = ((Py_UNICODE)(ubuf[0] & 0x0f) << 12) + | ((Py_UNICODE)(ubuf[1] ^ 0x80) << 6) + | (Py_UNICODE)(ubuf[2] ^ 0x80); ubuf += 3; } -#if Py_UNICODE_SIZE == 2 - else /* XXX: put surrogate characters here! */ - goto ilseq; -#else else if (*ubuf < 0xf8) { if (uleft < 4 || !((ubuf[1] ^ 0x80) < 0x40 && (ubuf[2] ^ 0x80) < 0x40 && (ubuf[3] ^ 0x80) < 0x40 && (ubuf[0] >= 0xf1 || ubuf[1] >= 0x90))) goto ilseq; - *buf->outbuf++ = ((Py_UNICODE)(ubuf[0] & 0x07) << 18) - | ((Py_UNICODE)(ubuf[1] ^ 0x80) << 12) - | ((Py_UNICODE)(ubuf[2] ^ 0x80) << 6) - | (Py_UNICODE)(ubuf[3] ^ 0x80); + code = ((Py_UNICODE)(ubuf[0] & 0x07) << 18) + | ((Py_UNICODE)(ubuf[1] ^ 0x80) << 12) + | ((Py_UNICODE)(ubuf[2] ^ 0x80) << 6) + | (Py_UNICODE)(ubuf[3] ^ 0x80); ubuf += 4; } else if (*ubuf < 0xfc) { if (uleft < 5 || !((ubuf[1] ^ 0x80) < 0x40 && @@ -1044,11 +1056,11 @@ (ubuf[4] ^ 0x80) < 0x40 && (ubuf[0] >= 0xf9 || ubuf[1] >= 0x88))) goto ilseq; - *buf->outbuf++ = ((Py_UNICODE)(ubuf[0] & 0x03) << 24) - | ((Py_UNICODE)(ubuf[1] ^ 0x80) << 18) - | ((Py_UNICODE)(ubuf[2] ^ 0x80) << 12) - | ((Py_UNICODE)(ubuf[3] ^ 0x80) << 6) - | (Py_UNICODE)(ubuf[4] ^ 0x80); + code = ((Py_UNICODE)(ubuf[0] & 0x03) << 24) + | ((Py_UNICODE)(ubuf[1] ^ 0x80) << 18) + | ((Py_UNICODE)(ubuf[2] ^ 0x80) << 12) + | ((Py_UNICODE)(ubuf[3] ^ 0x80) << 6) + | (Py_UNICODE)(ubuf[4] ^ 0x80); ubuf += 5; } else if (*ubuf < 0xff) { if (uleft < 6 || !((ubuf[1] ^ 0x80) < 0x40 && @@ -1056,16 +1068,34 @@ (ubuf[4] ^ 0x80) < 0x40 && (ubuf[5] ^ 0x80) < 0x40 && (ubuf[0] >= 0xfd || ubuf[1] >= 0x84))) goto ilseq; - *buf->outbuf++ = ((Py_UNICODE)(ubuf[0] & 0x01) << 30) - | ((Py_UNICODE)(ubuf[1] ^ 0x80) << 24) - | ((Py_UNICODE)(ubuf[2] ^ 0x80) << 18) - | ((Py_UNICODE)(ubuf[3] ^ 0x80) << 12) - | ((Py_UNICODE)(ubuf[4] ^ 0x80) << 6) - | (Py_UNICODE)(ubuf[5] ^ 0x80); + code = ((Py_UNICODE)(ubuf[0] & 0x01) << 30) + | ((Py_UNICODE)(ubuf[1] ^ 0x80) << 24) + | ((Py_UNICODE)(ubuf[2] ^ 0x80) << 18) + | ((Py_UNICODE)(ubuf[3] ^ 0x80) << 12) + | ((Py_UNICODE)(ubuf[4] ^ 0x80) << 6) + | (Py_UNICODE)(ubuf[5] ^ 0x80); ubuf += 6; } else goto ilseq; + +#if Py_UNICODE_SIZE == 2 + if (code >= 0x10000) { + if (code >= 0x110000) + goto ilseq; + + if (buf->outbuf_end <= buf->outbuf + 1) { + RESERVE_DECODEBUFFER(buf, -1) + } + *buf->outbuf++ = 0xd800 + ((code - 0x10000) >> 10); + *buf->outbuf++ = 0xdc00 + ((code - 0x10000) & 0x3ff); + } else #endif + { + if (buf->outbuf_end <= buf->outbuf) { + RESERVE_DECODEBUFFER(buf, -1) + } + *buf->outbuf++ = (Py_UNICODE)code; + } } PyMem_Del(ubuf_top); |
From: Hye-Shik C. <pe...@us...> - 2003-06-11 12:07:02
|
perky 03/06/11 05:07:01 Modified: . _iconv_codec.c Log: Use ucs4_t than Py_UNICODE in internal calculations Revision Changes Path 1.9 +21 -21 iconvcodec/_iconv_codec.c Index: _iconv_codec.c =================================================================== RCS file: /cvsroot/koco/iconvcodec/_iconv_codec.c,v retrieving revision 1.8 retrieving revision 1.9 diff -u -r1.8 -r1.9 --- _iconv_codec.c 11 Jun 2003 12:01:57 -0000 1.8 +++ _iconv_codec.c 11 Jun 2003 12:07:01 -0000 1.9 @@ -24,7 +24,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: _iconv_codec.c,v 1.8 2003/06/11 12:01:57 perky Exp $ + * $Id: _iconv_codec.c,v 1.9 2003/06/11 12:07:01 perky Exp $ */ #include "Python.h" @@ -1027,17 +1027,17 @@ } else if (*ubuf < 0xe0) { if (uleft < 2 || !((ubuf[1] ^ 0x80) < 0x40)) goto ilseq; - code = ((Py_UNICODE)(ubuf[0] & 0x1f) << 6) - | (Py_UNICODE)(ubuf[1] ^ 0x80); + code = ((ucs4_t)(ubuf[0] & 0x1f) << 6) + | (ucs4_t)(ubuf[1] ^ 0x80); ubuf += 2; } else if (*ubuf < 0xf0) { if (uleft < 3 || !((ubuf[1] ^ 0x80) < 0x40 && (ubuf[2] ^ 0x80) < 0x40 && (ubuf[0] >= 0xe1 || ubuf[1] >= 0xa0))) goto ilseq; - code = ((Py_UNICODE)(ubuf[0] & 0x0f) << 12) - | ((Py_UNICODE)(ubuf[1] ^ 0x80) << 6) - | (Py_UNICODE)(ubuf[2] ^ 0x80); + code = ((ucs4_t)(ubuf[0] & 0x0f) << 12) + | ((ucs4_t)(ubuf[1] ^ 0x80) << 6) + | (ucs4_t)(ubuf[2] ^ 0x80); ubuf += 3; } else if (*ubuf < 0xf8) { @@ -1045,10 +1045,10 @@ (ubuf[2] ^ 0x80) < 0x40 && (ubuf[3] ^ 0x80) < 0x40 && (ubuf[0] >= 0xf1 || ubuf[1] >= 0x90))) goto ilseq; - code = ((Py_UNICODE)(ubuf[0] & 0x07) << 18) - | ((Py_UNICODE)(ubuf[1] ^ 0x80) << 12) - | ((Py_UNICODE)(ubuf[2] ^ 0x80) << 6) - | (Py_UNICODE)(ubuf[3] ^ 0x80); + code = ((ucs4_t)(ubuf[0] & 0x07) << 18) + | ((ucs4_t)(ubuf[1] ^ 0x80) << 12) + | ((ucs4_t)(ubuf[2] ^ 0x80) << 6) + | (ucs4_t)(ubuf[3] ^ 0x80); ubuf += 4; } else if (*ubuf < 0xfc) { if (uleft < 5 || !((ubuf[1] ^ 0x80) < 0x40 && @@ -1056,11 +1056,11 @@ (ubuf[4] ^ 0x80) < 0x40 && (ubuf[0] >= 0xf9 || ubuf[1] >= 0x88))) goto ilseq; - code = ((Py_UNICODE)(ubuf[0] & 0x03) << 24) - | ((Py_UNICODE)(ubuf[1] ^ 0x80) << 18) - | ((Py_UNICODE)(ubuf[2] ^ 0x80) << 12) - | ((Py_UNICODE)(ubuf[3] ^ 0x80) << 6) - | (Py_UNICODE)(ubuf[4] ^ 0x80); + code = ((ucs4_t)(ubuf[0] & 0x03) << 24) + | ((ucs4_t)(ubuf[1] ^ 0x80) << 18) + | ((ucs4_t)(ubuf[2] ^ 0x80) << 12) + | ((ucs4_t)(ubuf[3] ^ 0x80) << 6) + | (ucs4_t)(ubuf[4] ^ 0x80); ubuf += 5; } else if (*ubuf < 0xff) { if (uleft < 6 || !((ubuf[1] ^ 0x80) < 0x40 && @@ -1068,12 +1068,12 @@ (ubuf[4] ^ 0x80) < 0x40 && (ubuf[5] ^ 0x80) < 0x40 && (ubuf[0] >= 0xfd || ubuf[1] >= 0x84))) goto ilseq; - code = ((Py_UNICODE)(ubuf[0] & 0x01) << 30) - | ((Py_UNICODE)(ubuf[1] ^ 0x80) << 24) - | ((Py_UNICODE)(ubuf[2] ^ 0x80) << 18) - | ((Py_UNICODE)(ubuf[3] ^ 0x80) << 12) - | ((Py_UNICODE)(ubuf[4] ^ 0x80) << 6) - | (Py_UNICODE)(ubuf[5] ^ 0x80); + code = ((ucs4_t)(ubuf[0] & 0x01) << 30) + | ((ucs4_t)(ubuf[1] ^ 0x80) << 24) + | ((ucs4_t)(ubuf[2] ^ 0x80) << 18) + | ((ucs4_t)(ubuf[3] ^ 0x80) << 12) + | ((ucs4_t)(ubuf[4] ^ 0x80) << 6) + | (ucs4_t)(ubuf[5] ^ 0x80); ubuf += 6; } else goto ilseq; |
From: Hye-Shik C. <pe...@us...> - 2003-06-11 12:30:43
|
perky 03/06/11 05:30:40 Modified: . _iconv_codec.c Log: Use a correct format string on python-ucs4 Revision Changes Path 1.10 +5 -1 iconvcodec/_iconv_codec.c Index: _iconv_codec.c =================================================================== RCS file: /cvsroot/koco/iconvcodec/_iconv_codec.c,v retrieving revision 1.9 retrieving revision 1.10 diff -u -r1.9 -r1.10 --- _iconv_codec.c 11 Jun 2003 12:07:01 -0000 1.9 +++ _iconv_codec.c 11 Jun 2003 12:30:38 -0000 1.10 @@ -24,7 +24,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: _iconv_codec.c,v 1.9 2003/06/11 12:07:01 perky Exp $ + * $Id: _iconv_codec.c,v 1.10 2003/06/11 12:30:38 perky Exp $ */ #include "Python.h" @@ -370,7 +370,11 @@ #ifdef LACKS_ERROR_CALLBACKS if (esize == 1) PyErr_Format(PyExc_UnicodeError, +#if Py_UNICODE_SIZE == 2 "'%s' codec can't encode byte '\\u%04x' in position %d: %s", +#else + "'%s' codec can't encode byte '\\u%08lx' in position %d: %s", +#endif self->encoding, *buf->inbuf, start, reason); else PyErr_Format(PyExc_UnicodeError, |
From: Hye-Shik C. <pe...@us...> - 2003-06-11 12:35:31
|
perky 03/06/11 05:35:30 Modified: . _iconv_codec.c Log: Count 0xfe and 0xff as single-byte sequence. Revision Changes Path 1.11 +4 -3 iconvcodec/_iconv_codec.c Index: _iconv_codec.c =================================================================== RCS file: /cvsroot/koco/iconvcodec/_iconv_codec.c,v retrieving revision 1.10 retrieving revision 1.11 diff -u -r1.10 -r1.11 --- _iconv_codec.c 11 Jun 2003 12:30:38 -0000 1.10 +++ _iconv_codec.c 11 Jun 2003 12:35:23 -0000 1.11 @@ -24,7 +24,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: _iconv_codec.c,v 1.10 2003/06/11 12:30:38 perky Exp $ + * $Id: _iconv_codec.c,v 1.11 2003/06/11 12:35:23 perky Exp $ */ #include "Python.h" @@ -230,7 +230,8 @@ else if (*(p) < 0xf0) (p) += 3; \ else if (*(p) < 0xf8) (p) += 4; \ else if (*(p) < 0xfc) (p) += 5; \ - else (p) += 6; + else if (*(p) < 0xfe) (p) += 6; \ + else (p)++; static const unsigned char * skipchars_utf8(const unsigned char *st, int n) @@ -1066,7 +1067,7 @@ | ((ucs4_t)(ubuf[3] ^ 0x80) << 6) | (ucs4_t)(ubuf[4] ^ 0x80); ubuf += 5; - } else if (*ubuf < 0xff) { + } else if (*ubuf < 0xfe) { if (uleft < 6 || !((ubuf[1] ^ 0x80) < 0x40 && (ubuf[2] ^ 0x80) < 0x40 && (ubuf[3] ^ 0x80) < 0x40 && (ubuf[4] ^ 0x80) < 0x40 && (ubuf[5] ^ 0x80) < 0x40 && |
From: Hye-Shik C. <pe...@us...> - 2003-06-11 12:40:19
|
perky 03/06/11 05:40:14 Modified: . _iconv_codec.c Log: Remove craps Revision Changes Path 1.12 +2 -4 iconvcodec/_iconv_codec.c Index: _iconv_codec.c =================================================================== RCS file: /cvsroot/koco/iconvcodec/_iconv_codec.c,v retrieving revision 1.11 retrieving revision 1.12 diff -u -r1.11 -r1.12 --- _iconv_codec.c 11 Jun 2003 12:35:23 -0000 1.11 +++ _iconv_codec.c 11 Jun 2003 12:40:13 -0000 1.12 @@ -24,7 +24,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: _iconv_codec.c,v 1.11 2003/06/11 12:35:23 perky Exp $ + * $Id: _iconv_codec.c,v 1.12 2003/06/11 12:40:13 perky Exp $ */ #include "Python.h" @@ -2073,11 +2073,9 @@ void init_iconv_codec(void) { - PyObject *m; - detect_iconv_endian(); - m = Py_InitModule("_iconv_codec", _iconv_codec_methods); + Py_InitModule("_iconv_codec", _iconv_codec_methods); if (PyErr_Occurred()) Py_FatalError("can't initialize the _iconv_codec module"); |
From: Hye-Shik C. <pe...@us...> - 2003-06-12 05:51:35
|
perky 03/06/11 22:51:33 Modified: . _iconv_codec.c Log: Remove unreachable wastes. Revision Changes Path 1.14 +1 -3 iconvcodec/_iconv_codec.c Index: _iconv_codec.c =================================================================== RCS file: /cvsroot/koco/iconvcodec/_iconv_codec.c,v retrieving revision 1.13 retrieving revision 1.14 diff -u -r1.13 -r1.14 --- _iconv_codec.c 12 Jun 2003 04:11:19 -0000 1.13 +++ _iconv_codec.c 12 Jun 2003 05:51:33 -0000 1.14 @@ -24,7 +24,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: _iconv_codec.c,v 1.13 2003/06/12 04:11:19 perky Exp $ + * $Id: _iconv_codec.c,v 1.14 2003/06/12 05:51:33 perky Exp $ */ #include "Python.h" @@ -775,7 +775,6 @@ "<IconvEncoder from='%s' to='%s' mode='%s'>", self->unicode_encoding, self->encoding, uniinternal_type_names[self->unitype]); - return PyString_FromString("<IconvEncoder>"); } #endif @@ -1279,7 +1278,6 @@ "<IconvDecoder from='%s' to='%s' mode='%s'>", self->encoding, self->unicode_encoding, uniinternal_type_names[self->unitype]); - return PyString_FromString("<IconvDecoder>"); } #endif |
From: Hye-Shik C. <pe...@us...> - 2003-06-16 19:12:46
|
perky 03/06/16 12:12:41 Modified: . _iconv_codec.c Log: Minor code clean ups Revision Changes Path 1.15 +43 -49 iconvcodec/_iconv_codec.c Index: _iconv_codec.c =================================================================== RCS file: /cvsroot/koco/iconvcodec/_iconv_codec.c,v retrieving revision 1.14 retrieving revision 1.15 diff -u -r1.14 -r1.15 --- _iconv_codec.c 12 Jun 2003 05:51:33 -0000 1.14 +++ _iconv_codec.c 16 Jun 2003 19:12:41 -0000 1.15 @@ -24,7 +24,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: _iconv_codec.c,v 1.14 2003/06/12 05:51:33 perky Exp $ + * $Id: _iconv_codec.c,v 1.15 2003/06/16 19:12:41 perky Exp $ */ #include "Python.h" @@ -127,10 +127,10 @@ const char *unicode_encoding; int unitype; size_t (*iconvwrap)( - iconv_t cd, iconv_arg2_t inbuf, size_t * inbytesleft, - char* *outbuf, size_t * outbytesleft); + iconv_t cd, iconv_arg2_t inbuf, size_t *inbytesleft, + char **outbuf, size_t *outbytesleft); size_t (*iconvwrap2)(iconv_t cd, IconvDecoderBuffer *buf, - int inleft, int outleft); + size_t inleft, size_t outleft); /* an alternative wrapper: for utf8 backend */ } IconvDecoderObject; @@ -187,9 +187,6 @@ { PyObject *v, *w; - if (unicode == NULL) - return NULL; - v = PyTuple_New(2); if (v == NULL) { Py_DECREF(unicode); @@ -225,7 +222,7 @@ } #define UTF8NEXTCHAR(p) \ - if (*(p) < 128) (p)++; \ + if (*(p) < 0x80) (p)++; \ else if (*(p) < 0xe0) (p) += 2; \ else if (*(p) < 0xf0) (p) += 3; \ else if (*(p) < 0xf8) (p) += 4; \ @@ -374,7 +371,7 @@ #if Py_UNICODE_SIZE == 2 "'%s' codec can't encode byte '\\u%04x' in position %d: %s", #else - "'%s' codec can't encode byte '\\u%08lx' in position %d: %s", + "'%s' codec can't encode byte '\\U%08lx' in position %d: %s", #endif self->encoding, *buf->inbuf, start, reason); else @@ -391,14 +388,11 @@ start, end, reason); if (buf->excobj == NULL) goto errorexit; - } else { - if (PyUnicodeEncodeError_SetStart(buf->excobj, start) != 0) - goto errorexit; - if (PyUnicodeEncodeError_SetEnd(buf->excobj, end) != 0) - goto errorexit; - if (PyUnicodeEncodeError_SetReason(buf->excobj, reason) != 0) - goto errorexit; - } + } else + if (PyUnicodeEncodeError_SetStart(buf->excobj, start) || + PyUnicodeEncodeError_SetEnd(buf->excobj, end) || + PyUnicodeEncodeError_SetReason(buf->excobj, reason)) + goto errorexit; if (errors == ERROR_STRICT) { PyCodec_StrictErrors(buf->excobj); @@ -739,9 +733,9 @@ iconvencoder_makestream(IconvEncoderObject *self, PyObject *args, PyObject *kwargs) { - static char *stream_kwarglist[] = {"stream", "errors", NULL}; - PyObject *stream; - char *errors = NULL; + static char *stream_kwarglist[] = {"stream", "errors", NULL}; + PyObject *stream; + char *errors = NULL; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|s:makestream", stream_kwarglist, &stream, &errors)) @@ -820,7 +814,7 @@ static int expand_decodebuffer(IconvDecoderBuffer *buf, int esize) { - int orgpos, orgsize; + int orgpos, orgsize; orgpos = (int)(buf->outbuf - PyUnicode_AS_UNICODE(buf->outobj)); orgsize = PyUnicode_GET_SIZE(buf->outobj); @@ -906,14 +900,11 @@ (size_t)(buf->inbuf_end - buf->inbuf_top), start, end, reason); if (buf->excobj == NULL) goto errorexit; - } else { - if (PyUnicodeDecodeError_SetStart(buf->excobj, start) != 0) - goto errorexit; - if (PyUnicodeDecodeError_SetEnd(buf->excobj, end) != 0) - goto errorexit; - if (PyUnicodeDecodeError_SetReason(buf->excobj, reason) != 0) - goto errorexit; - } + } else + if (PyUnicodeDecodeError_SetStart(buf->excobj, start) || + PyUnicodeDecodeError_SetEnd(buf->excobj, end) || + PyUnicodeDecodeError_SetReason(buf->excobj, reason)) + goto errorexit; if (errors == ERROR_STRICT) { PyCodec_StrictErrors(buf->excobj); @@ -983,7 +974,8 @@ } static size_t -iconvwrap_utf8(iconv_t ic, IconvDecoderBuffer *buf, int inleft, int outleft) +iconvwrap_utf8(iconv_t ic, IconvDecoderBuffer *buf, + size_t inleft, size_t outleft) { unsigned char *ubuf, *ubuf_top, *ubuf_end; size_t r; @@ -994,7 +986,7 @@ return -1; ubuf_top = ubuf; - if (inleft != -1) + if (inleft > 0) r = iconv(ic, (iconv_arg2_t)&(buf->inbuf), &inleft, (char **)&ubuf, &outleft); else @@ -1100,7 +1092,7 @@ iconvdecoder_conv(IconvDecoderObject *self, iconv_t ic, IconvDecoderBuffer *buf, PyObject *errors) { - size_t r, inleft, outleft; + size_t r, inleft, outleft; for (;;) { inleft = (size_t)(buf->inbuf_end - buf->inbuf); @@ -1133,7 +1125,7 @@ iconv_t ic, IconvDecoderBuffer *buf, PyObject *errors) { - size_t r, outleft; + size_t r, outleft; if (buf->inbuf < buf->inbuf_end) /* assumes as left by EINVAL */ if (iconvdecoder_error(self, ic, buf, errors, EINVAL, @@ -1147,7 +1139,7 @@ r = self->iconvwrap(ic, NULL, NULL, (char **)&(buf->outbuf), &outleft); else - r = self->iconvwrap2(ic, buf, -1, outleft); + r = self->iconvwrap2(ic, buf, 0, outleft); if (r == (size_t)-1) { if (errno == E2BIG) { @@ -1242,9 +1234,9 @@ iconvdecoder_makestream(IconvDecoderObject *self, PyObject *args, PyObject *kwargs) { - static char *stream_kwarglist[] = {"stream", "errors", NULL}; - PyObject *stream; - char *errors = NULL; + static char *stream_kwarglist[] = {"stream", "errors", NULL}; + PyObject *stream; + char *errors = NULL; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|s:makestream", stream_kwarglist, &stream, &errors)) @@ -1325,8 +1317,8 @@ const char *method, int sizehint) { IconvDecoderBuffer buf; - PyObject *cres; - int rsize, r, finalsize = 0; + PyObject *cres; + int rsize, r, finalsize = 0; if (sizehint == 0) return PyUnicode_FromUnicode(NULL, 0); @@ -1848,9 +1840,9 @@ iconvcodec_makeencoder(PyObject *spam, PyObject *args) { IconvEncoderObject *self; - iconv_t ic; - char *encoding; - int i; + iconv_t ic; + char *encoding; + int i; if (!PyArg_ParseTuple(args, "s:makeencoder", &encoding)) return NULL; @@ -1898,9 +1890,9 @@ iconvcodec_makedecoder(PyObject *spam, PyObject *args) { IconvDecoderObject *self; - iconv_t ic; - char *encoding; - int i; + iconv_t ic; + char *encoding; + int i; if (!PyArg_ParseTuple(args, "s:makedecoder", &encoding)) return NULL; @@ -1930,9 +1922,11 @@ self->unitype = um[i].type; switch (self->unitype) { case UNIINTERNAL_UCS: - self->iconvwrap = iconv; break; + self->iconvwrap = iconv; + break; case UNIINTERNAL_UCS_SWAPPED: - self->iconvwrap = iconvwrap_ucsswapped; break; + self->iconvwrap = iconvwrap_ucsswapped; + break; case UNIINTERNAL_UTF_8: self->iconvwrap = NULL; self->iconvwrap2= iconvwrap_utf8; @@ -1968,8 +1962,8 @@ static void detect_iconv_endian(void) { - iconv_t ic; - int i; + iconv_t ic; + int i; #define um uniinternal_modes for (i = 0; um[i].encoding[0]; i++) { |