[KoCo-CVS] [Commit] cjkcodecs/src _utf_16.c _utf_16be.c _utf_16le.c _utf_7.c codeccommon.h
Brought to you by:
perky
|
From: Hye-Shik C. <pe...@us...> - 2003-07-12 05:22:24
|
perky 03/07/11 22:22:24
Modified: src _utf_7.c codeccommon.h
Added: src _utf_16.c _utf_16be.c _utf_16le.c
Log:
Add utf-16 codecs.
Revision Changes Path
1.2 +2 -2 cjkcodecs/src/_utf_7.c
Index: _utf_7.c
===================================================================
RCS file: /cvsroot/koco/cjkcodecs/src/_utf_7.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- _utf_7.c 12 Jul 2003 03:47:14 -0000 1.1
+++ _utf_7.c 12 Jul 2003 05:22:23 -0000 1.2
@@ -1,5 +1,5 @@
/*
- * _utf_7.c: the UTF-8 codec (RFC2152)
+ * _utf_7.c: the UTF-7 codec (RFC2152)
*
* Copyright (C) 2003 Hye-Shik Chang <pe...@Fr...>.
* All rights reserved.
@@ -26,7 +26,7 @@
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
- * $Id: _utf_7.c,v 1.1 2003/07/12 03:47:14 perky Exp $
+ * $Id: _utf_7.c,v 1.2 2003/07/12 05:22:23 perky Exp $
*/
#include "codeccommon.h"
1.25 +2 -2 cjkcodecs/src/codeccommon.h
Index: codeccommon.h
===================================================================
RCS file: /cvsroot/koco/cjkcodecs/src/codeccommon.h,v
retrieving revision 1.24
retrieving revision 1.25
diff -u -r1.24 -r1.25
--- codeccommon.h 12 Jul 2003 03:47:14 -0000 1.24
+++ codeccommon.h 12 Jul 2003 05:22:23 -0000 1.25
@@ -26,7 +26,7 @@
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
- * $Id: codeccommon.h,v 1.24 2003/07/12 03:47:14 perky Exp $
+ * $Id: codeccommon.h,v 1.25 2003/07/12 05:22:23 perky Exp $
*/
#include "Python.h"
@@ -147,7 +147,7 @@
if (c >> 10 == 0xd800 >> 10) { /* high surrogate */ \
RESERVE_INBUF(2) \
if (IN2 >> 10 == 0xdc00 >> 10) { /* low surrogate */ \
- c = 0x10000 + ((c - 0xd800) << 10) + \
+ c = 0x10000 + ((ucs4_t)(c - 0xd800) << 10) + \
((ucs4_t)(IN2) - 0xdc00); \
} \
}
1.1 cjkcodecs/src/_utf_16.c
Index: _utf_16.c
===================================================================
/*
* _utf_16.c: the UTF-16 codec (RFC2781)
*
* Copyright (C) 2003 Hye-Shik Chang <pe...@Fr...>.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* $Id: _utf_16.c,v 1.1 2003/07/12 05:22:23 perky Exp $
*/
#include "codeccommon.h"
#define HAVE_ENCODER_INIT
ENCODER_INIT(utf_16)
{
state->i = 1; /* indicates "the beginning of stream" */
return 0;
}
ENCODER(utf_16)
{
while (inleft > 0) {
Py_UNICODE c = IN1;
ucs2_t uc;
unsigned char *ucdata = (unsigned char *)&uc;
if (state->i) { /* BOM */
state->i = 0;
uc = 0xfeff;
WRITE2(ucdata[0], ucdata[1])
NEXT_OUT(2)
}
#if Py_UNICODE_SIZE == 4
if (c > 0xffff) {
if (c > 0x10ffff)
return 1;
RESERVE_OUTBUF(4)
uc = 0xd800 + (((c) - 0x10000) >> 10);
OUT1(ucdata[0])
OUT2(ucdata[1])
uc = 0xdc00 + (((c) - 0x10000) & 0x3ff);
OUT3(ucdata[0])
OUT4(ucdata[1])
NEXT(1, 4)
continue;
}
#endif
RESERVE_OUTBUF(2)
uc = c;
OUT1(ucdata[0])
OUT2(ucdata[1])
NEXT(1, 2)
}
return 0;
}
#ifdef WORDS_BIGENDIAN
# define _NATIVE 1
#else
# define _NATIVE -1
#endif
#define HAVE_DECODER_INIT
DECODER_INIT(utf_16)
{
state->i = 0; /* -1: little 0: (the beginning of stream) 1: big */
return 0;
}
DECODER(utf_16)
{
while (inleft > 0) {
ucs2_t uc;
unsigned char *c = (unsigned char *)&uc;
RESERVE_INBUF(2)
if (state->i == 0) {
if (IN1 == 0xff && IN2 == 0xfe) {
state->i = -1;
NEXT_IN(2)
continue;
} else if (IN1 == 0xfe && IN2 == 0xff) {
state->i = 1;
NEXT_IN(2)
continue;
} else {
/* set as native byte order and don't interpret any more
* BOMs (RFC2781 Page 4) */
state->i = _NATIVE;
}
}
RESERVE_OUTBUF(1)
if (state->i == _NATIVE)
c[0] = IN1, c[1] = IN2;
else
c[1] = IN1, c[0] = IN2;
#if Py_UNICODE_SIZE == 4
if (uc >> 10 == 0xd800 >> 10) {
ucs2_t uc2;
unsigned char *c2 = (unsigned char *)&uc2;
RESERVE_INBUF(4)
if (state->i == _NATIVE)
c2[0] = IN3, c2[1] = IN4;
else
c2[1] = IN3, c2[0] = IN4;
if (uc2 >> 10 != 0xdc00 >> 10)
return 4;
OUT1(0x10000 + ((ucs4_t)(uc - 0xd800) << 10) + (uc2 - 0xdc00))
NEXT(4, 1)
continue;
}
#endif
OUT1(uc)
NEXT(2, 1)
}
return 0;
}
#define CODEC_WITHOUT_MAPS
#include "codecentry.h"
CODEC_REGISTRY(utf_16)
/*
* ex: ts=8 sts=4 et
*/
1.1 cjkcodecs/src/_utf_16be.c
Index: _utf_16be.c
===================================================================
/*
* _utf_16be.c: the UTF-16BE codec (RFC2781)
*
* Copyright (C) 2003 Hye-Shik Chang <pe...@Fr...>.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* $Id: _utf_16be.c,v 1.1 2003/07/12 05:22:23 perky Exp $
*/
#include "codeccommon.h"
#ifdef WORDS_BIGENDIAN
# define BYTE0 0
# define BYTE1 1
#else
# define BYTE0 1
# define BYTE1 0
#endif
ENCODER(utf_16be)
{
while (inleft > 0) {
Py_UNICODE c = IN1;
ucs2_t uc;
unsigned char *ucdata = (unsigned char *)&uc;
#if Py_UNICODE_SIZE == 4
if (c > 0xffff) {
if (c > 0x10ffff)
return 1;
RESERVE_OUTBUF(4)
uc = 0xd800 + (((c) - 0x10000) >> 10);
OUT1(ucdata[BYTE0])
OUT2(ucdata[BYTE1])
uc = 0xdc00 + (((c) - 0x10000) & 0x3ff);
OUT3(ucdata[BYTE0])
OUT4(ucdata[BYTE1])
NEXT(1, 4)
continue;
}
#endif
RESERVE_OUTBUF(2)
uc = c;
OUT1(ucdata[BYTE0])
OUT2(ucdata[BYTE1])
NEXT(1, 2)
}
return 0;
}
DECODER(utf_16be)
{
while (inleft > 0) {
ucs2_t uc;
unsigned char *c = (unsigned char *)&uc;
RESERVE_INBUF(2)
RESERVE_OUTBUF(1)
c[BYTE0] = IN1;
c[BYTE1] = IN2;
#if Py_UNICODE_SIZE == 4
if (uc >> 10 == 0xd800 >> 10) {
ucs2_t uc2;
unsigned char *c2 = (unsigned char *)&uc2;
RESERVE_INBUF(4)
c2[BYTE0] = IN3;
c2[BYTE1] = IN4;
if (uc2 >> 10 != 0xdc00 >> 10)
return 4;
OUT1(0x10000 + ((ucs4_t)(uc - 0xd800) << 10) + (uc2 - 0xdc00))
NEXT(4, 1)
continue;
}
#endif
OUT1(uc)
NEXT(2, 1)
}
return 0;
}
#define CODEC_WITHOUT_MAPS
#include "codecentry.h"
CODEC_REGISTRY(utf_16be)
/*
* ex: ts=8 sts=4 et
*/
1.1 cjkcodecs/src/_utf_16le.c
Index: _utf_16le.c
===================================================================
/*
* _utf_16le.c: the UTF-16LE codec (RFC2781)
*
* Copyright (C) 2003 Hye-Shik Chang <pe...@Fr...>.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* $Id: _utf_16le.c,v 1.1 2003/07/12 05:22:23 perky Exp $
*/
#include "codeccommon.h"
#ifdef WORDS_BIGENDIAN
# define BYTE0 1
# define BYTE1 0
#else
# define BYTE0 0
# define BYTE1 1
#endif
ENCODER(utf_16le)
{
while (inleft > 0) {
Py_UNICODE c = IN1;
ucs2_t uc;
unsigned char *ucdata = (unsigned char *)&uc;
#if Py_UNICODE_SIZE == 4
if (c > 0xffff) {
if (c > 0x10ffff)
return 1;
RESERVE_OUTBUF(4)
uc = 0xd800 + (((c) - 0x10000) >> 10);
OUT1(ucdata[BYTE0])
OUT2(ucdata[BYTE1])
uc = 0xdc00 + (((c) - 0x10000) & 0x3ff);
OUT3(ucdata[BYTE0])
OUT4(ucdata[BYTE1])
NEXT(1, 4)
continue;
}
#endif
RESERVE_OUTBUF(2)
uc = c;
OUT1(ucdata[BYTE0])
OUT2(ucdata[BYTE1])
NEXT(1, 2)
}
return 0;
}
DECODER(utf_16le)
{
while (inleft > 0) {
ucs2_t uc;
unsigned char *c = (unsigned char *)&uc;
RESERVE_INBUF(2)
RESERVE_OUTBUF(1)
c[BYTE0] = IN1;
c[BYTE1] = IN2;
#if Py_UNICODE_SIZE == 4
if (uc >> 10 == 0xd800 >> 10) {
ucs2_t uc2;
unsigned char *c2 = (unsigned char *)&uc2;
RESERVE_INBUF(4)
c2[BYTE0] = IN3;
c2[BYTE1] = IN4;
if (uc2 >> 10 != 0xdc00 >> 10)
return 4;
OUT1(0x10000 + ((ucs4_t)(uc - 0xd800) << 10) + (uc2 - 0xdc00))
NEXT(4, 1)
continue;
}
#endif
OUT1(uc)
NEXT(2, 1)
}
return 0;
}
#define CODEC_WITHOUT_MAPS
#include "codecentry.h"
CODEC_REGISTRY(utf_16le)
/*
* ex: ts=8 sts=4 et
*/
|