[KoCo-CVS] [Commit] cjkcodecs/src _utf_16.c _utf_16be.c _utf_16le.c _utf_7.c codeccommon.h
Brought to you by:
perky
From: Hye-Shik C. <pe...@us...> - 2003-07-12 05:22:24
|
perky 03/07/11 22:22:24 Modified: src _utf_7.c codeccommon.h Added: src _utf_16.c _utf_16be.c _utf_16le.c Log: Add utf-16 codecs. Revision Changes Path 1.2 +2 -2 cjkcodecs/src/_utf_7.c Index: _utf_7.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/_utf_7.c,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- _utf_7.c 12 Jul 2003 03:47:14 -0000 1.1 +++ _utf_7.c 12 Jul 2003 05:22:23 -0000 1.2 @@ -1,5 +1,5 @@ /* - * _utf_7.c: the UTF-8 codec (RFC2152) + * _utf_7.c: the UTF-7 codec (RFC2152) * * Copyright (C) 2003 Hye-Shik Chang <pe...@Fr...>. * All rights reserved. @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: _utf_7.c,v 1.1 2003/07/12 03:47:14 perky Exp $ + * $Id: _utf_7.c,v 1.2 2003/07/12 05:22:23 perky Exp $ */ #include "codeccommon.h" 1.25 +2 -2 cjkcodecs/src/codeccommon.h Index: codeccommon.h =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/codeccommon.h,v retrieving revision 1.24 retrieving revision 1.25 diff -u -r1.24 -r1.25 --- codeccommon.h 12 Jul 2003 03:47:14 -0000 1.24 +++ codeccommon.h 12 Jul 2003 05:22:23 -0000 1.25 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: codeccommon.h,v 1.24 2003/07/12 03:47:14 perky Exp $ + * $Id: codeccommon.h,v 1.25 2003/07/12 05:22:23 perky Exp $ */ #include "Python.h" @@ -147,7 +147,7 @@ if (c >> 10 == 0xd800 >> 10) { /* high surrogate */ \ RESERVE_INBUF(2) \ if (IN2 >> 10 == 0xdc00 >> 10) { /* low surrogate */ \ - c = 0x10000 + ((c - 0xd800) << 10) + \ + c = 0x10000 + ((ucs4_t)(c - 0xd800) << 10) + \ ((ucs4_t)(IN2) - 0xdc00); \ } \ } 1.1 cjkcodecs/src/_utf_16.c Index: _utf_16.c =================================================================== /* * _utf_16.c: the UTF-16 codec (RFC2781) * * Copyright (C) 2003 Hye-Shik Chang <pe...@Fr...>. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * $Id: _utf_16.c,v 1.1 2003/07/12 05:22:23 perky Exp $ */ #include "codeccommon.h" #define HAVE_ENCODER_INIT ENCODER_INIT(utf_16) { state->i = 1; /* indicates "the beginning of stream" */ return 0; } ENCODER(utf_16) { while (inleft > 0) { Py_UNICODE c = IN1; ucs2_t uc; unsigned char *ucdata = (unsigned char *)&uc; if (state->i) { /* BOM */ state->i = 0; uc = 0xfeff; WRITE2(ucdata[0], ucdata[1]) NEXT_OUT(2) } #if Py_UNICODE_SIZE == 4 if (c > 0xffff) { if (c > 0x10ffff) return 1; RESERVE_OUTBUF(4) uc = 0xd800 + (((c) - 0x10000) >> 10); OUT1(ucdata[0]) OUT2(ucdata[1]) uc = 0xdc00 + (((c) - 0x10000) & 0x3ff); OUT3(ucdata[0]) OUT4(ucdata[1]) NEXT(1, 4) continue; } #endif RESERVE_OUTBUF(2) uc = c; OUT1(ucdata[0]) OUT2(ucdata[1]) NEXT(1, 2) } return 0; } #ifdef WORDS_BIGENDIAN # define _NATIVE 1 #else # define _NATIVE -1 #endif #define HAVE_DECODER_INIT DECODER_INIT(utf_16) { state->i = 0; /* -1: little 0: (the beginning of stream) 1: big */ return 0; } DECODER(utf_16) { while (inleft > 0) { ucs2_t uc; unsigned char *c = (unsigned char *)&uc; RESERVE_INBUF(2) if (state->i == 0) { if (IN1 == 0xff && IN2 == 0xfe) { state->i = -1; NEXT_IN(2) continue; } else if (IN1 == 0xfe && IN2 == 0xff) { state->i = 1; NEXT_IN(2) continue; } else { /* set as native byte order and don't interpret any more * BOMs (RFC2781 Page 4) */ state->i = _NATIVE; } } RESERVE_OUTBUF(1) if (state->i == _NATIVE) c[0] = IN1, c[1] = IN2; else c[1] = IN1, c[0] = IN2; #if Py_UNICODE_SIZE == 4 if (uc >> 10 == 0xd800 >> 10) { ucs2_t uc2; unsigned char *c2 = (unsigned char *)&uc2; RESERVE_INBUF(4) if (state->i == _NATIVE) c2[0] = IN3, c2[1] = IN4; else c2[1] = IN3, c2[0] = IN4; if (uc2 >> 10 != 0xdc00 >> 10) return 4; OUT1(0x10000 + ((ucs4_t)(uc - 0xd800) << 10) + (uc2 - 0xdc00)) NEXT(4, 1) continue; } #endif OUT1(uc) NEXT(2, 1) } return 0; } #define CODEC_WITHOUT_MAPS #include "codecentry.h" CODEC_REGISTRY(utf_16) /* * ex: ts=8 sts=4 et */ 1.1 cjkcodecs/src/_utf_16be.c Index: _utf_16be.c =================================================================== /* * _utf_16be.c: the UTF-16BE codec (RFC2781) * * Copyright (C) 2003 Hye-Shik Chang <pe...@Fr...>. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * $Id: _utf_16be.c,v 1.1 2003/07/12 05:22:23 perky Exp $ */ #include "codeccommon.h" #ifdef WORDS_BIGENDIAN # define BYTE0 0 # define BYTE1 1 #else # define BYTE0 1 # define BYTE1 0 #endif ENCODER(utf_16be) { while (inleft > 0) { Py_UNICODE c = IN1; ucs2_t uc; unsigned char *ucdata = (unsigned char *)&uc; #if Py_UNICODE_SIZE == 4 if (c > 0xffff) { if (c > 0x10ffff) return 1; RESERVE_OUTBUF(4) uc = 0xd800 + (((c) - 0x10000) >> 10); OUT1(ucdata[BYTE0]) OUT2(ucdata[BYTE1]) uc = 0xdc00 + (((c) - 0x10000) & 0x3ff); OUT3(ucdata[BYTE0]) OUT4(ucdata[BYTE1]) NEXT(1, 4) continue; } #endif RESERVE_OUTBUF(2) uc = c; OUT1(ucdata[BYTE0]) OUT2(ucdata[BYTE1]) NEXT(1, 2) } return 0; } DECODER(utf_16be) { while (inleft > 0) { ucs2_t uc; unsigned char *c = (unsigned char *)&uc; RESERVE_INBUF(2) RESERVE_OUTBUF(1) c[BYTE0] = IN1; c[BYTE1] = IN2; #if Py_UNICODE_SIZE == 4 if (uc >> 10 == 0xd800 >> 10) { ucs2_t uc2; unsigned char *c2 = (unsigned char *)&uc2; RESERVE_INBUF(4) c2[BYTE0] = IN3; c2[BYTE1] = IN4; if (uc2 >> 10 != 0xdc00 >> 10) return 4; OUT1(0x10000 + ((ucs4_t)(uc - 0xd800) << 10) + (uc2 - 0xdc00)) NEXT(4, 1) continue; } #endif OUT1(uc) NEXT(2, 1) } return 0; } #define CODEC_WITHOUT_MAPS #include "codecentry.h" CODEC_REGISTRY(utf_16be) /* * ex: ts=8 sts=4 et */ 1.1 cjkcodecs/src/_utf_16le.c Index: _utf_16le.c =================================================================== /* * _utf_16le.c: the UTF-16LE codec (RFC2781) * * Copyright (C) 2003 Hye-Shik Chang <pe...@Fr...>. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * $Id: _utf_16le.c,v 1.1 2003/07/12 05:22:23 perky Exp $ */ #include "codeccommon.h" #ifdef WORDS_BIGENDIAN # define BYTE0 1 # define BYTE1 0 #else # define BYTE0 0 # define BYTE1 1 #endif ENCODER(utf_16le) { while (inleft > 0) { Py_UNICODE c = IN1; ucs2_t uc; unsigned char *ucdata = (unsigned char *)&uc; #if Py_UNICODE_SIZE == 4 if (c > 0xffff) { if (c > 0x10ffff) return 1; RESERVE_OUTBUF(4) uc = 0xd800 + (((c) - 0x10000) >> 10); OUT1(ucdata[BYTE0]) OUT2(ucdata[BYTE1]) uc = 0xdc00 + (((c) - 0x10000) & 0x3ff); OUT3(ucdata[BYTE0]) OUT4(ucdata[BYTE1]) NEXT(1, 4) continue; } #endif RESERVE_OUTBUF(2) uc = c; OUT1(ucdata[BYTE0]) OUT2(ucdata[BYTE1]) NEXT(1, 2) } return 0; } DECODER(utf_16le) { while (inleft > 0) { ucs2_t uc; unsigned char *c = (unsigned char *)&uc; RESERVE_INBUF(2) RESERVE_OUTBUF(1) c[BYTE0] = IN1; c[BYTE1] = IN2; #if Py_UNICODE_SIZE == 4 if (uc >> 10 == 0xd800 >> 10) { ucs2_t uc2; unsigned char *c2 = (unsigned char *)&uc2; RESERVE_INBUF(4) c2[BYTE0] = IN3; c2[BYTE1] = IN4; if (uc2 >> 10 != 0xdc00 >> 10) return 4; OUT1(0x10000 + ((ucs4_t)(uc - 0xd800) << 10) + (uc2 - 0xdc00)) NEXT(4, 1) continue; } #endif OUT1(uc) NEXT(2, 1) } return 0; } #define CODEC_WITHOUT_MAPS #include "codecentry.h" CODEC_REGISTRY(utf_16le) /* * ex: ts=8 sts=4 et */ |