koco-cvs Mailing List for Python Korean Codecs (Page 2)
Brought to you by:
perky
You can subscribe to this list here.
2002 |
Jan
|
Feb
|
Mar
|
Apr
(88) |
May
(5) |
Jun
|
Jul
(27) |
Aug
|
Sep
|
Oct
(5) |
Nov
|
Dec
|
---|---|---|---|---|---|---|---|---|---|---|---|---|
2003 |
Jan
(77) |
Feb
(3) |
Mar
|
Apr
(22) |
May
(123) |
Jun
(80) |
Jul
(83) |
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
From: Hye-Shik C. <pe...@us...> - 2003-07-12 05:22:24
|
perky 03/07/11 22:22:23 Modified: . CHANGES setup.py Log: Add utf-16 codecs. Revision Changes Path 1.16 +1 -1 cjkcodecs/CHANGES Index: CHANGES =================================================================== RCS file: /cvsroot/koco/cjkcodecs/CHANGES,v retrieving revision 1.15 retrieving revision 1.16 diff -u -r1.15 -r1.16 --- CHANGES 12 Jul 2003 03:47:14 -0000 1.15 +++ CHANGES 12 Jul 2003 05:22:23 -0000 1.16 @@ -3,7 +3,7 @@ *) SHIFT-JISX0213, EUC-JISX0213, ISO-2022-JP-2 and ISO-2022-JP-3 codec is added. - *) UTF-7 codec is added. + *) UTF-7, UTF-16, UTF-16BE and UTF-16LE codec is added. *) Changed a few characters of a big5 codepoint mapping to cp950's rather than 0xfffd. (documented on NOTES.big5) 1.30 +2 -2 cjkcodecs/setup.py Index: setup.py =================================================================== RCS file: /cvsroot/koco/cjkcodecs/setup.py,v retrieving revision 1.29 retrieving revision 1.30 diff -u -r1.29 -r1.30 --- setup.py 12 Jul 2003 03:47:14 -0000 1.29 +++ setup.py 12 Jul 2003 05:22:23 -0000 1.30 @@ -27,7 +27,7 @@ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # -# $Id: setup.py,v 1.29 2003/07/12 03:47:14 perky Exp $ +# $Id: setup.py,v 1.30 2003/07/12 05:22:23 perky Exp $ # import sys @@ -43,7 +43,7 @@ 'ko_KR': ['euc_kr', 'cp949', 'johab', 'iso_2022_kr'], 'zh_CN': ['gb2312', 'gbk', 'gb18030', 'hz'], 'zh_TW': ['big5', 'cp950'], -'': ['utf_7', 'utf_8'], +'': ['utf_7', 'utf_8', 'utf_16', 'utf_16be', 'utf_16le'], } locales = encodings.keys() |
From: Hye-Shik C. <pe...@us...> - 2003-07-12 05:08:10
|
perky 03/07/11 22:08:07 Modified: cjkcodecs Makefile aliases.py Added: cjkcodecs utf_16be.py utf_16le.py Log: Add utf-16be and utf-16le codec. Revision Changes Path 1.12 +2 -2 cjkcodecs/cjkcodecs/Makefile Index: Makefile =================================================================== RCS file: /cvsroot/koco/cjkcodecs/cjkcodecs/Makefile,v retrieving revision 1.11 retrieving revision 1.12 diff -u -r1.11 -r1.12 --- Makefile 12 Jul 2003 04:16:36 -0000 1.11 +++ Makefile 12 Jul 2003 05:08:07 -0000 1.12 @@ -25,7 +25,7 @@ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # -# $Id: Makefile,v 1.11 2003/07/12 04:16:36 perky Exp $ +# $Id: Makefile,v 1.12 2003/07/12 05:08:07 perky Exp $ # GENERIC_ENCODINGS= gb2312 gbk gb18030 hz \ @@ -34,7 +34,7 @@ iso_2022_jp iso_2022_jp_1 iso_2022_jp_2 \ iso_2022_jp_3 euc_jisx0213 shift_jisx0213 \ cp949 euc_kr johab iso_2022_kr \ - utf_7 utf_8 utf_16 + utf_7 utf_8 utf_16 utf_16be utf_16le all: for cset in ${GENERIC_ENCODINGS}; do \ 1.13 +12 -4 cjkcodecs/cjkcodecs/aliases.py Index: aliases.py =================================================================== RCS file: /cvsroot/koco/cjkcodecs/cjkcodecs/aliases.py,v retrieving revision 1.12 retrieving revision 1.13 diff -u -r1.12 -r1.13 --- aliases.py 12 Jul 2003 04:16:36 -0000 1.12 +++ aliases.py 12 Jul 2003 05:08:07 -0000 1.13 @@ -26,7 +26,7 @@ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # -# $Id: aliases.py,v 1.12 2003/07/12 04:16:36 perky Exp $ +# $Id: aliases.py,v 1.13 2003/07/12 05:08:07 perky Exp $ # from encodings.aliases import aliases @@ -145,15 +145,23 @@ 's_jisx0213' : 'cjkcodecs.shift_jisx0213', # utf_16 codec - #'utf16' : 'cjkcodecs.utf16', + #'utf16' : 'cjkcodecs.utf_16', #'utf_16' : 'cjkcodecs.utf_16', + # utf_16be codec + #'utf16be' : 'cjkcodecs.utf_16be', + #'utf_16be' : 'cjkcodecs.utf_16be', + + # utf_16le codec + #'utf16le' : 'cjkcodecs.utf_16le', + #'utf_16le' : 'cjkcodecs.utf_16le', + # utf_7 codec - #'utf7' : 'cjkcodecs.utf7', + #'utf7' : 'cjkcodecs.utf_7', #'utf_7' : 'cjkcodecs.utf_7', # utf_8 codec - #'utf8' : 'cjkcodecs.utf8', + #'utf8' : 'cjkcodecs.utf_8', #'utf_8' : 'cjkcodecs.utf_8', }) del aliases 1.1 cjkcodecs/cjkcodecs/utf_16be.py Index: utf_16be.py =================================================================== # ACHTUNG: This file is generated automatically. Please do not edit. # # utf_16be.py: Python Unicode Codec for UTF_16BE # # Copyright (C) 2003 Hye-Shik Chang <pe...@Fr...>. # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # # $Id: utf_16be.py,v 1.1 2003/07/12 05:08:07 perky Exp $ # from cjkcodecs._utf_16be import codec import codecs class Codec(codecs.Codec): encode = codec.encode decode = codec.decode class StreamReader(Codec, codecs.StreamReader): def __init__(self, stream, errors='strict'): codecs.StreamReader.__init__(self, stream, errors) __codec = codec.StreamReader(stream, errors) self.read = __codec.read self.readline = __codec.readline self.readlines = __codec.readlines self.reset = __codec.reset class StreamWriter(Codec, codecs.StreamWriter): def __init__(self, stream, errors='strict'): codecs.StreamWriter.__init__(self, stream, errors) __codec = codec.StreamWriter(stream, errors) self.write = __codec.write self.writelines = __codec.writelines self.reset = __codec.reset def getregentry(): return (Codec().encode,Codec().decode,StreamReader,StreamWriter) # ex: ts=8 sts=4 et 1.1 cjkcodecs/cjkcodecs/utf_16le.py Index: utf_16le.py =================================================================== # ACHTUNG: This file is generated automatically. Please do not edit. # # utf_16le.py: Python Unicode Codec for UTF_16LE # # Copyright (C) 2003 Hye-Shik Chang <pe...@Fr...>. # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # # $Id: utf_16le.py,v 1.1 2003/07/12 05:08:07 perky Exp $ # from cjkcodecs._utf_16le import codec import codecs class Codec(codecs.Codec): encode = codec.encode decode = codec.decode class StreamReader(Codec, codecs.StreamReader): def __init__(self, stream, errors='strict'): codecs.StreamReader.__init__(self, stream, errors) __codec = codec.StreamReader(stream, errors) self.read = __codec.read self.readline = __codec.readline self.readlines = __codec.readlines self.reset = __codec.reset class StreamWriter(Codec, codecs.StreamWriter): def __init__(self, stream, errors='strict'): codecs.StreamWriter.__init__(self, stream, errors) __codec = codec.StreamWriter(stream, errors) self.write = __codec.write self.writelines = __codec.writelines self.reset = __codec.reset def getregentry(): return (Codec().encode,Codec().decode,StreamReader,StreamWriter) # ex: ts=8 sts=4 et |
From: Hye-Shik C. <pe...@us...> - 2003-07-12 04:17:00
|
perky 03/07/11 21:16:36 Modified: cjkcodecs Makefile aliases.py Added: cjkcodecs utf_16.py Log: Add utf-16 codec Revision Changes Path 1.11 +2 -2 cjkcodecs/cjkcodecs/Makefile Index: Makefile =================================================================== RCS file: /cvsroot/koco/cjkcodecs/cjkcodecs/Makefile,v retrieving revision 1.10 retrieving revision 1.11 diff -u -r1.10 -r1.11 --- Makefile 10 Jul 2003 13:30:26 -0000 1.10 +++ Makefile 12 Jul 2003 04:16:36 -0000 1.11 @@ -25,7 +25,7 @@ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # -# $Id: Makefile,v 1.10 2003/07/10 13:30:26 perky Exp $ +# $Id: Makefile,v 1.11 2003/07/12 04:16:36 perky Exp $ # GENERIC_ENCODINGS= gb2312 gbk gb18030 hz \ @@ -34,7 +34,7 @@ iso_2022_jp iso_2022_jp_1 iso_2022_jp_2 \ iso_2022_jp_3 euc_jisx0213 shift_jisx0213 \ cp949 euc_kr johab iso_2022_kr \ - utf_7 utf_8 + utf_7 utf_8 utf_16 all: for cset in ${GENERIC_ENCODINGS}; do \ 1.12 +5 -1 cjkcodecs/cjkcodecs/aliases.py Index: aliases.py =================================================================== RCS file: /cvsroot/koco/cjkcodecs/cjkcodecs/aliases.py,v retrieving revision 1.11 retrieving revision 1.12 diff -u -r1.11 -r1.12 --- aliases.py 10 Jul 2003 22:40:03 -0000 1.11 +++ aliases.py 12 Jul 2003 04:16:36 -0000 1.12 @@ -26,7 +26,7 @@ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # -# $Id: aliases.py,v 1.11 2003/07/10 22:40:03 perky Exp $ +# $Id: aliases.py,v 1.12 2003/07/12 04:16:36 perky Exp $ # from encodings.aliases import aliases @@ -143,6 +143,10 @@ 'shift_jisx0213' : 'cjkcodecs.shift_jisx0213', 'sjisx0213' : 'cjkcodecs.shift_jisx0213', 's_jisx0213' : 'cjkcodecs.shift_jisx0213', + + # utf_16 codec + #'utf16' : 'cjkcodecs.utf16', + #'utf_16' : 'cjkcodecs.utf_16', # utf_7 codec #'utf7' : 'cjkcodecs.utf7', 1.1 cjkcodecs/cjkcodecs/utf_16.py Index: utf_16.py =================================================================== # ACHTUNG: This file is generated automatically. Please do not edit. # # utf_16.py: Python Unicode Codec for UTF_16 # # Copyright (C) 2003 Hye-Shik Chang <pe...@Fr...>. # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # # $Id: utf_16.py,v 1.1 2003/07/12 04:16:36 perky Exp $ # from cjkcodecs._utf_16 import codec import codecs class Codec(codecs.Codec): encode = codec.encode decode = codec.decode class StreamReader(Codec, codecs.StreamReader): def __init__(self, stream, errors='strict'): codecs.StreamReader.__init__(self, stream, errors) __codec = codec.StreamReader(stream, errors) self.read = __codec.read self.readline = __codec.readline self.readlines = __codec.readlines self.reset = __codec.reset class StreamWriter(Codec, codecs.StreamWriter): def __init__(self, stream, errors='strict'): codecs.StreamWriter.__init__(self, stream, errors) __codec = codec.StreamWriter(stream, errors) self.write = __codec.write self.writelines = __codec.writelines self.reset = __codec.reset def getregentry(): return (Codec().encode,Codec().decode,StreamReader,StreamWriter) # ex: ts=8 sts=4 et |
From: Hye-Shik C. <pe...@us...> - 2003-07-12 03:56:05
|
perky 03/07/11 20:48:24 Modified: src multibytecodec.h Log: Revive int type of state union for hz codec. Revision Changes Path 1.11 +2 -1 cjkcodecs/src/multibytecodec.h Index: multibytecodec.h =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/multibytecodec.h,v retrieving revision 1.10 retrieving revision 1.11 diff -u -r1.10 -r1.11 --- multibytecodec.h 12 Jul 2003 03:47:15 -0000 1.10 +++ multibytecodec.h 12 Jul 2003 03:48:24 -0000 1.11 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: multibytecodec.h,v 1.10 2003/07/12 03:47:15 perky Exp $ + * $Id: multibytecodec.h,v 1.11 2003/07/12 03:48:24 perky Exp $ */ #ifndef _PYTHON_MULTIBYTECODEC_H_ @@ -39,6 +39,7 @@ typedef union { void *p; + int i; unsigned char c[8]; ucs2_t u2[4]; ucs4_t u4[2]; |
From: Hye-Shik C. <pe...@us...> - 2003-07-12 03:55:53
|
perky 03/07/11 20:55:43 Modified: . ROADMAP Log: Reorder to alphabetical order. Remove planned update for KS X 1001:2002. Revision Changes Path 1.5 +6 -6 cjkcodecs/ROADMAP Index: ROADMAP =================================================================== RCS file: /cvsroot/koco/cjkcodecs/ROADMAP,v retrieving revision 1.4 retrieving revision 1.5 diff -u -r1.4 -r1.5 --- ROADMAP 10 Jun 2003 11:31:35 -0000 1.4 +++ ROADMAP 12 Jul 2003 03:55:42 -0000 1.5 @@ -15,13 +15,13 @@ iso-2022-jp shift-jisx0213 iso-2022-jp-1 -Korea (ROK) euc-kr (ksx1001:2002) - cp949(uhc) mac_korean - johab unijohab - iso-2022-kr - Korea (DPRK) euc-kp +Korea (ROK) euc-kr mac_korean + cp949(uhc) unijohab + johab + iso-2022-kr + Taiwan big5 iso-2022-cn cp950 iso-2022-cn-ext euc-tw @@ -30,5 +30,5 @@ utf-16 -# $Id: ROADMAP,v 1.4 2003/06/10 11:31:35 perky Exp $ +# $Id: ROADMAP,v 1.5 2003/07/12 03:55:42 perky Exp $ # ex: ts=8 sts=4 et |
From: Hye-Shik C. <pe...@us...> - 2003-07-12 03:47:17
|
perky 03/07/11 20:47:14 Modified: . CHANGES setup.py Log: Add utf-7 codec. Revision Changes Path 1.15 +2 -0 cjkcodecs/CHANGES Index: CHANGES =================================================================== RCS file: /cvsroot/koco/cjkcodecs/CHANGES,v retrieving revision 1.14 retrieving revision 1.15 diff -u -r1.14 -r1.15 --- CHANGES 9 Jul 2003 21:00:47 -0000 1.14 +++ CHANGES 12 Jul 2003 03:47:14 -0000 1.15 @@ -3,6 +3,8 @@ *) SHIFT-JISX0213, EUC-JISX0213, ISO-2022-JP-2 and ISO-2022-JP-3 codec is added. + *) UTF-7 codec is added. + *) Changed a few characters of a big5 codepoint mapping to cp950's rather than 0xfffd. (documented on NOTES.big5) 1.29 +2 -2 cjkcodecs/setup.py Index: setup.py =================================================================== RCS file: /cvsroot/koco/cjkcodecs/setup.py,v retrieving revision 1.28 retrieving revision 1.29 diff -u -r1.28 -r1.29 --- setup.py 9 Jul 2003 19:30:01 -0000 1.28 +++ setup.py 12 Jul 2003 03:47:14 -0000 1.29 @@ -27,7 +27,7 @@ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # -# $Id: setup.py,v 1.28 2003/07/09 19:30:01 perky Exp $ +# $Id: setup.py,v 1.29 2003/07/12 03:47:14 perky Exp $ # import sys @@ -43,7 +43,7 @@ 'ko_KR': ['euc_kr', 'cp949', 'johab', 'iso_2022_kr'], 'zh_CN': ['gb2312', 'gbk', 'gb18030', 'hz'], 'zh_TW': ['big5', 'cp950'], -'': ['utf_8'], +'': ['utf_7', 'utf_8'], } locales = encodings.keys() |
From: Hye-Shik C. <pe...@us...> - 2003-07-12 03:47:17
|
perky 03/07/11 20:47:15 Modified: src _utf_8.c codeccommon.h codecentry.h multibytecodec.h Added: src _utf_7.c Log: Add utf-7 codec. Revision Changes Path 1.10 +2 -4 cjkcodecs/src/_utf_8.c Index: _utf_8.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/_utf_8.c,v retrieving revision 1.9 retrieving revision 1.10 diff -u -r1.9 -r1.10 --- _utf_8.c 5 Jul 2003 19:49:02 -0000 1.9 +++ _utf_8.c 12 Jul 2003 03:47:14 -0000 1.10 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: _utf_8.c,v 1.9 2003/07/05 19:49:02 perky Exp $ + * $Id: _utf_8.c,v 1.10 2003/07/12 03:47:14 perky Exp $ */ #include "codeccommon.h" @@ -195,9 +195,7 @@ #define CODEC_WITHOUT_MAPS #include "codecentry.h" -BEGIN_CODEC_REGISTRY(utf_8) -/* no maps */ -END_CODEC_REGISTRY(utf_8) +CODEC_REGISTRY(utf_8) /* * ex: ts=8 sts=4 et 1.24 +1 -2 cjkcodecs/src/codeccommon.h Index: codeccommon.h =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/codeccommon.h,v retrieving revision 1.23 retrieving revision 1.24 diff -u -r1.23 -r1.24 --- codeccommon.h 9 Jul 2003 21:35:19 -0000 1.23 +++ codeccommon.h 12 Jul 2003 03:47:14 -0000 1.24 @@ -26,13 +26,12 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: codeccommon.h,v 1.23 2003/07/09 21:35:19 perky Exp $ + * $Id: codeccommon.h,v 1.24 2003/07/12 03:47:14 perky Exp $ */ #include "Python.h" #include "multibytecodec.h" #include "multibytecodec_compat.h" -#include "cjkcommon.h" #define ENCMAP(encoding) \ const static encode_map *encoding##encmap; 1.4 +5 -1 cjkcodecs/src/codecentry.h Index: codecentry.h =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/codecentry.h,v retrieving revision 1.3 retrieving revision 1.4 diff -u -r1.3 -r1.4 --- codecentry.h 7 Jul 2003 08:37:10 -0000 1.3 +++ codecentry.h 12 Jul 2003 03:47:15 -0000 1.4 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: codecentry.h,v 1.3 2003/07/07 08:37:10 perky Exp $ + * $Id: codecentry.h,v 1.4 2003/07/12 03:47:15 perky Exp $ */ #ifdef HAVE_ENCODER_INIT @@ -117,6 +117,10 @@ Py_XDECREF(mod); \ Py_XDECREF(o); \ } + +#define CODEC_REGISTRY(encoding) \ + BEGIN_CODEC_REGISTRY(encoding) \ + END_CODEC_REGISTRY(encoding) #ifdef USING_BINARY_PAIR_SEARCH static DBCHAR 1.10 +6 -4 cjkcodecs/src/multibytecodec.h Index: multibytecodec.h =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/multibytecodec.h,v retrieving revision 1.9 retrieving revision 1.10 diff -u -r1.9 -r1.10 --- multibytecodec.h 5 Jul 2003 19:49:02 -0000 1.9 +++ multibytecodec.h 12 Jul 2003 03:47:15 -0000 1.10 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: multibytecodec.h,v 1.9 2003/07/05 19:49:02 perky Exp $ + * $Id: multibytecodec.h,v 1.10 2003/07/12 03:47:15 perky Exp $ */ #ifndef _PYTHON_MULTIBYTECODEC_H_ @@ -35,11 +35,13 @@ extern "C" { #endif -#define MAXCHARSTATE 8 +#include "cjkcommon.h" + typedef union { - unsigned long i; void *p; - unsigned char c[MAXCHARSTATE]; + unsigned char c[8]; + ucs2_t u2[4]; + ucs4_t u4[2]; } MultibyteCodec_State; typedef int (*mbencode_func)(MultibyteCodec_State *state, 1.1 cjkcodecs/src/_utf_7.c Index: _utf_7.c =================================================================== /* * _utf_7.c: the UTF-8 codec (RFC2152) * * Copyright (C) 2003 Hye-Shik Chang <pe...@Fr...>. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * $Id: _utf_7.c,v 1.1 2003/07/12 03:47:14 perky Exp $ */ #include "codeccommon.h" #define SET_DIRECT 1 #define SET_OPTIONAL 2 #define SET_WHITESPACE 3 #define D SET_DIRECT #define O SET_OPTIONAL #define W SET_WHITESPACE static const char utf7_sets[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, W, W, 0, 0, W, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, W, O, O, O, O, O, O, D, D, D, O, 0, D, D, D, 0, D, D, D, D, D, D, D, D, D, D, D, O, O, O, O, D, O, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, O, 0, O, O, O, O, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, O, O, O, 0, 0, }; #undef W #undef O #undef D #define B64(n) ("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" \ "0123456789+/"[(n) & 0x3f]) #define B64CHAR(c) (((c) >= 'A' && (c) <= 'Z') || \ ((c) >= 'a' && (c) <= 'z') || \ ((c) >= '0' && (c) <= '9') || \ (c) == '+' || (c) == '/') #define UB64(c) ((c) == '+' ? 62 : (c) == '/' ? 63 : (c) >= 'a' ? \ (c) - 71 : (c) >= 'A' ? (c) - 65 : (c) + 4) #define UTF7_DENCODABLE_COMPATIBLE(c) (utf7_sets[c] != 0) #define UTF7_DENCODABLE_STRICT(c) (utf7_sets[c] == SET_DIRECT || \ utf7_sets[c] == SET_WHITESPACE) #define ESTATE_INITIALIZE(state) ESTATE_SETSTAGE(state, 0) \ ESTATE_CLEARSHIFTED(state) #define ESTATE_SETPENDING(state, v) (state)->c[0] = (v); #define ESTATE_GETPENDING(state) (state)->c[0] #define ESTATE_SETSHIFTED(state) (state)->c[2] = 1; #define ESTATE_ISSHIFTED(state) ((state)->c[2]) #define ESTATE_CLEARSHIFTED(state) (state)->c[2] = 0; #define ESTATE_SETSTAGE(state, v) (state)->c[3] = (v); #define ESTATE_GETSTAGE(state) ((state)->c[3]) #define HAVE_ENCODER_INIT ENCODER_INIT(utf_7) { ESTATE_INITIALIZE(state) return 0; } #define HAVE_ENCODER_RESET ENCODER_RESET(utf_7) { if (ESTATE_ISSHIFTED(state)) { if (ESTATE_GETSTAGE(state) != 0) { unsigned char oc; oc = B64(ESTATE_GETPENDING(state)); WRITE2(oc, '-') NEXT_OUT(2) } else { WRITE1('-') NEXT_OUT(1) } ESTATE_CLEARSHIFTED(state) } return 0; } ENCODER(utf_7) { while (inleft > 0) { Py_UNICODE c1 = IN1, c2 = 0; size_t insize = 1; #if Py_UNICODE_SIZE == 2 if (c1 >> 10 == 0xd800 >> 10) { /* high surrogate */ RESERVE_INBUF(2) if (IN2 >> 10 != 0xdc00 >> 10) /* low surrogate */ return 2; /* invalid surrogate pair */ c2 = IN2; insize = 2; } #else if (c1 > 0x10ffff) /* UTF-16 unencodable */ return 1; else if (c1 > 0xffff) { c2 = 0xdc00 | ((c1 - 0x10000) & 0x3ff); c1 = 0xd800 | ((c1 - 0x10000) >> 10); } #endif for (;;) { unsigned char oc1, oc2, oc3; if (ESTATE_ISSHIFTED(state)) { if (c1 < 128 && UTF7_DENCODABLE_STRICT(c1)) { if (ESTATE_GETSTAGE(state) != 0) { oc1 = B64(ESTATE_GETPENDING(state)); WRITE3(oc1, '-', c1) NEXT_OUT(3) } else { WRITE2('-', c1) NEXT_OUT(2) } ESTATE_CLEARSHIFTED(state) } else { switch (ESTATE_GETSTAGE(state)) { case 0: oc1 = c1 >> 10; oc2 = (c1 >> 4) & 0x3f; WRITE2(B64(oc1), B64(oc2)) ESTATE_SETPENDING(state, (c1 & 0x0f) << 2) ESTATE_SETSTAGE(state, 2) NEXT_OUT(2) break; case 1: oc1 = ESTATE_GETPENDING(state) | (c1 >> 12); oc2 = (c1 >> 6) & 0x3f; oc3 = c1 & 0x3f; WRITE3(B64(oc1), B64(oc2), B64(oc3)) ESTATE_SETSTAGE(state, 0) NEXT_OUT(3) break; case 2: oc1 = ESTATE_GETPENDING(state) | (c1 >> 14); oc2 = (c1 >> 8) & 0x3f; oc3 = (c1 >> 2) & 0x3f; WRITE3(B64(oc1), B64(oc2), B64(oc3)) ESTATE_SETPENDING(state, (c1 & 0x03) << 4) ESTATE_SETSTAGE(state, 1) NEXT_OUT(3) break; default: return MBERR_INTERNAL; } } } else { if (c1 < 128 && UTF7_DENCODABLE_STRICT(c1)) { WRITE1(c1) NEXT_OUT(1) } else if (c1 == '+') { WRITE2('+', '-') NEXT_OUT(2) } else { oc1 = c1 >> 10; oc2 = (c1 >> 4) & 0x3f; WRITE3('+', B64(oc1), B64(oc2)) ESTATE_SETPENDING(state, (c1 & 0x0f) << 2) ESTATE_SETSTAGE(state, 2) ESTATE_SETSHIFTED(state) NEXT_OUT(3) } } if (c2 != 0) { c1 = c2; c2 = 0; } else break; } NEXT_IN(insize) } return 0; } #define DSTATE_INITIALIZE(state) DSTATE_SETBSTAGE(state, 0) \ DSTATE_CLEARSHIFTED(state) \ DSTATE_SETULENGTH(state, 0) \ DSTATE_SETUPENDING1(state, 0) \ DSTATE_SETUPENDING2(state, 0) /* XXX: Type-mixed usage of a state union may be not so portable. * If you see any problem with this on your platfom. Please let * me know. */ #define DSTATE_SETSHIFTED(state) (state)->c[0] = 1; #define DSTATE_ISSHIFTED(state) ((state)->c[0]) #define DSTATE_CLEARSHIFTED(state) (state)->c[0] = 0; #define DSTATE_SETBSTAGE(state, v) (state)->c[1] = (v); #define DSTATE_GETBSTAGE(state) ((state)->c[1]) #define DSTATE_SETBPENDING(state, v) (state)->c[2] = (v); #define DSTATE_GETBPENDING(state) ((state)->c[2]) #define DSTATE_SETULENGTH(state, v) (state)->c[3] = (v); #define DSTATE_GETULENGTH(state) ((state)->c[3]) #define DSTATE_SETUPENDING1(state, v) (state)->u2[2] = (v); #define DSTATE_GETUPENDING1(state) (state)->u2[2] #define DSTATE_SETUPENDING2(state, v) (state)->u2[3] = (v); #define DSTATE_GETUPENDING2(state) (state)->u2[3] #define DSTATE_UAPPEND(state, v) \ (state)->u2[(state)->c[3] > 1 ? 3 : 2] |= \ ((state)->c[3] & 1) ? (v) : ((ucs2_t)(v)) << 8; \ (state)->c[3]++; #define HAVE_DECODER_INIT DECODER_INIT(utf_7) { DSTATE_INITIALIZE(state) return 0; } static int utf_7_flush(MultibyteCodec_State *state, Py_UNICODE **outbuf, size_t *outleft) { switch (DSTATE_GETULENGTH(state)) { case 2: { ucs2_t uc; uc = DSTATE_GETUPENDING1(state); #if Py_UNICODE_SIZE == 4 if (uc >> 10 == 0xd800 >> 10) return MBERR_TOOFEW; #endif OUT1(uc) (*outbuf)++; (*outleft)--; DSTATE_SETULENGTH(state, 0) DSTATE_SETUPENDING1(state, 0) break; } #if Py_UNICODE_SIZE == 4 case 4: if (DSTATE_GETUPENDING2(state) >> 10 == 0xdc00 >> 10) return 1; OUT1(0x10000 + (((ucs4_t)DSTATE_GETUPENDING1(state) - 0xd800) << 10) + (DSTATE_GETUPENDING2(state) - 0xdc00)) (*outbuf)++; (*outleft)--; DSTATE_SETULENGTH(state, 0) DSTATE_SETUPENDING1(state, 0) DSTATE_SETUPENDING2(state, 0) break; #endif case 0: /* FALLTHROUGH */ case 1: /* FALLTHROUGH */ case 3: return MBERR_TOOFEW; default: return MBERR_INTERNAL; } return 0; } #define HAVE_DECODER_RESET DECODER_RESET(utf_7) { DSTATE_INITIALIZE(state) return 0; } DECODER(utf_7) { while (inleft > 0) { unsigned char c = IN1; int r; if (!DSTATE_ISSHIFTED(state)) { if (c == '+') { RESERVE_INBUF(2) if (inleft >= 2 && IN2 == '-') { WRITE1('+') NEXT(2, 1) } else { DSTATE_SETSHIFTED(state) NEXT_IN(1) } } else if (c < 128 && UTF7_DENCODABLE_COMPATIBLE(c)) { WRITE1(c) NEXT(1, 1) } else return 1; } else if (B64CHAR(c)) { unsigned char tb; RESERVE_OUTBUF(1) c = UB64(c); assert(DSTATE_GETULENGTH(state) < 4); switch (DSTATE_GETBSTAGE(state)) { case 0: DSTATE_SETBPENDING(state, c << 2) DSTATE_SETBSTAGE(state, 1) break; case 1: tb = DSTATE_GETBPENDING(state) | (c >> 4); DSTATE_SETBPENDING(state, c << 4) DSTATE_SETBSTAGE(state, 2) DSTATE_UAPPEND(state, tb) break; case 2: tb = DSTATE_GETBPENDING(state) | (c >> 2); DSTATE_SETBPENDING(state, c << 6) DSTATE_SETBSTAGE(state, 3) DSTATE_UAPPEND(state, tb) break; case 3: tb = DSTATE_GETBPENDING(state) | c; DSTATE_SETBSTAGE(state, 0) DSTATE_UAPPEND(state, tb) break; } r = utf_7_flush(state, outbuf, &outleft); if (r != 0 && r != MBERR_TOOFEW) return r; NEXT_IN(1) } else if (c == '-' || UTF7_DENCODABLE_COMPATIBLE(c)) { if (DSTATE_GETBSTAGE(state) != 0) { DSTATE_UAPPEND(state, DSTATE_GETBSTAGE(state)) DSTATE_SETBSTAGE(state, 0) } r = utf_7_flush(state, outbuf, &outleft); if (r != 0 && r != MBERR_TOOFEW) return r; DSTATE_CLEARSHIFTED(state) if (c != '-') { WRITE1(c) NEXT_OUT(1) } NEXT_IN(1) } else return 1; } return 0; } #define CODEC_WITHOUT_MAPS #include "codecentry.h" CODEC_REGISTRY(utf_7) /* * ex: ts=8 sts=4 et */ |
From: Hye-Shik C. <pe...@us...> - 2003-07-10 22:40:06
|
perky 03/07/10 15:40:03 Modified: cjkcodecs aliases.py Log: Add aliases for utf-7 codec. Revision Changes Path 1.11 +5 -1 cjkcodecs/cjkcodecs/aliases.py Index: aliases.py =================================================================== RCS file: /cvsroot/koco/cjkcodecs/cjkcodecs/aliases.py,v retrieving revision 1.10 retrieving revision 1.11 diff -u -r1.10 -r1.11 --- aliases.py 8 Jul 2003 06:14:13 -0000 1.10 +++ aliases.py 10 Jul 2003 22:40:03 -0000 1.11 @@ -26,7 +26,7 @@ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # -# $Id: aliases.py,v 1.10 2003/07/08 06:14:13 perky Exp $ +# $Id: aliases.py,v 1.11 2003/07/10 22:40:03 perky Exp $ # from encodings.aliases import aliases @@ -143,6 +143,10 @@ 'shift_jisx0213' : 'cjkcodecs.shift_jisx0213', 'sjisx0213' : 'cjkcodecs.shift_jisx0213', 's_jisx0213' : 'cjkcodecs.shift_jisx0213', + + # utf_7 codec + #'utf7' : 'cjkcodecs.utf7', + #'utf_7' : 'cjkcodecs.utf_7', # utf_8 codec #'utf8' : 'cjkcodecs.utf8', |
From: Hye-Shik C. <pe...@us...> - 2003-07-10 13:30:30
|
perky 03/07/10 06:30:26 Modified: cjkcodecs Makefile Added: cjkcodecs utf_7.py Log: add utf-7 codec. Revision Changes Path 1.10 +4 -2 cjkcodecs/cjkcodecs/Makefile Index: Makefile =================================================================== RCS file: /cvsroot/koco/cjkcodecs/cjkcodecs/Makefile,v retrieving revision 1.9 retrieving revision 1.10 diff -u -r1.9 -r1.10 --- Makefile 7 Jul 2003 08:17:36 -0000 1.9 +++ Makefile 10 Jul 2003 13:30:26 -0000 1.10 @@ -25,7 +25,7 @@ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # -# $Id: Makefile,v 1.9 2003/07/07 08:17:36 perky Exp $ +# $Id: Makefile,v 1.10 2003/07/10 13:30:26 perky Exp $ # GENERIC_ENCODINGS= gb2312 gbk gb18030 hz \ @@ -34,14 +34,16 @@ iso_2022_jp iso_2022_jp_1 iso_2022_jp_2 \ iso_2022_jp_3 euc_jisx0213 shift_jisx0213 \ cp949 euc_kr johab iso_2022_kr \ - utf_8 + utf_7 utf_8 all: for cset in ${GENERIC_ENCODINGS}; do \ CSET=`echo $$cset|tr "[:lower:]" "[:upper:]"`; \ + if [ ! -f $$cset.py ]; then \ sed -e "s/%%ENCODING%%/$$CSET/g" \ -e "s/%%encoding%%/$$cset/g" \ -e "s/%%__%%/ACHTUNG: This file is generated automatically.\ Please do not edit./g" xxcodec.py.in \ > $$cset.py; \ + fi \ done 1.1 cjkcodecs/cjkcodecs/utf_7.py Index: utf_7.py =================================================================== # ACHTUNG: This file is generated automatically. Please do not edit. # # utf_7.py: Python Unicode Codec for UTF_7 # # Copyright (C) 2003 Hye-Shik Chang <pe...@Fr...>. # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # # $Id: utf_7.py,v 1.1 2003/07/10 13:30:26 perky Exp $ # from cjkcodecs._utf_7 import codec import codecs class Codec(codecs.Codec): encode = codec.encode decode = codec.decode class StreamReader(Codec, codecs.StreamReader): def __init__(self, stream, errors='strict'): codecs.StreamReader.__init__(self, stream, errors) __codec = codec.StreamReader(stream, errors) self.read = __codec.read self.readline = __codec.readline self.readlines = __codec.readlines self.reset = __codec.reset class StreamWriter(Codec, codecs.StreamWriter): def __init__(self, stream, errors='strict'): codecs.StreamWriter.__init__(self, stream, errors) __codec = codec.StreamWriter(stream, errors) self.write = __codec.write self.writelines = __codec.writelines self.reset = __codec.reset def getregentry(): return (Codec().encode,Codec().decode,StreamReader,StreamWriter) # ex: ts=8 sts=4 et |
From: Hye-Shik C. <pe...@us...> - 2003-07-09 21:50:18
|
perky 03/07/09 14:50:16 Modified: src iso2022common.h Log: - Add ISO8859-1 and ISO8859-7 charset constant - Don't bypass SP and DEL as undecoded itself. Revision Changes Path 1.4 +5 -8 cjkcodecs/src/iso2022common.h Index: iso2022common.h =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/iso2022common.h,v retrieving revision 1.3 retrieving revision 1.4 diff -u -r1.3 -r1.4 --- iso2022common.h 9 Jul 2003 18:47:47 -0000 1.3 +++ iso2022common.h 9 Jul 2003 21:50:16 -0000 1.4 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: iso2022common.h,v 1.3 2003/07/09 18:47:47 perky Exp $ + * $Id: iso2022common.h,v 1.4 2003/07/09 21:50:16 perky Exp $ */ /* This ISO-2022 implementation is intended to comply ECMA-43 Level 1 @@ -35,8 +35,6 @@ #define ESC 0x1b #define SO 0x0e #define SI 0x0f -#define SP 0x20 -#define DEL 0x7f #define MAX_ESCSEQLEN 16 @@ -66,6 +64,9 @@ #define CHARSET_ASCII 'B' +#define CHARSET_ISO8859_1 'A' +#define CHARSET_ISO8859_7 'F' + #define CHARSET_KSX1001 ('C'|CHARSET_DOUBLEBYTE) #define CHARSET_JISX0201_R 'J' @@ -135,11 +136,7 @@ break; \ case '\n': \ STATE_CLEARFLAG(state, F_SHIFTED) \ - /* FALLTHROUGH */ \ - case SP: /* FALLTHROUGH */ \ - case DEL: \ - RESERVE_OUTBUF(1) \ - OUT1(c1) \ + WRITE1('\n') \ NEXT(1, 1) \ break; |
From: Hye-Shik C. <pe...@us...> - 2003-07-09 21:35:20
|
perky 03/07/09 14:35:19 Modified: src _big5.c _cp932.c _cp949.c _cp950.c _euc_jisx0213.c _euc_jp.c _euc_kr.c _gb18030.c _gb2312.c _gbk.c _iso_2022_jp.c _johab.c _shift_jis.c _shift_jisx0213.c codeccommon.h Log: Utilize WRITE* and OUT* macro. Revision Changes Path 1.7 +4 -4 cjkcodecs/src/_big5.c Index: _big5.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/_big5.c,v retrieving revision 1.6 retrieving revision 1.7 diff -u -r1.6 -r1.7 --- _big5.c 9 Jun 2003 10:25:36 -0000 1.6 +++ _big5.c 9 Jul 2003 21:35:18 -0000 1.7 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: _big5.c,v 1.6 2003/06/09 10:25:36 perky Exp $ + * $Id: _big5.c,v 1.7 2003/07/09 21:35:18 perky Exp $ */ #include "codeccommon.h" @@ -64,18 +64,18 @@ DECODER(big5) { while (inleft > 0) { - unsigned char c = **inbuf; + unsigned char c = IN1; RESERVE_OUTBUF(1) if (c < 0x80) { - **outbuf = c; + OUT1(c) NEXT(1, 1) continue; } RESERVE_INBUF(2) - TRYMAP_DEC(big5, **outbuf, c, (*inbuf)[1]) { + TRYMAP_DEC(big5, **outbuf, c, IN2) { NEXT(2, 1) } else return 2; } 1.5 +20 -22 cjkcodecs/src/_cp932.c Index: _cp932.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/_cp932.c,v retrieving revision 1.4 retrieving revision 1.5 diff -u -r1.4 -r1.5 --- _cp932.c 20 Jun 2003 09:04:52 -0000 1.4 +++ _cp932.c 9 Jul 2003 21:35:19 -0000 1.5 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: _cp932.c,v 1.4 2003/06/20 09:04:52 perky Exp $ + * $Id: _cp932.c,v 1.5 2003/07/09 21:35:19 perky Exp $ */ #include "codeccommon.h" @@ -39,27 +39,25 @@ ENCODER(cp932) { while (inleft > 0) { - Py_UNICODE c = **inbuf; + Py_UNICODE c = IN1; DBCHAR code; unsigned char c1, c2; if (c <= 0x80) { - RESERVE_OUTBUF(1) - **outbuf = (unsigned char)c; + WRITE1(c) NEXT(1, 1) continue; } else if (c >= 0xff61 && c <= 0xff9f) { - RESERVE_OUTBUF(1) - **outbuf = (unsigned char)(c - 0xfec0); + WRITE1(c - 0xfec0) NEXT(1, 1) continue; } else if (c >= 0xf8f0 && c <= 0xf8f3) { /* Windows compatability */ RESERVE_OUTBUF(1) if (c == 0xf8f0) - **outbuf = 0xa0; + OUT1(0xa0) else - **outbuf = (unsigned char)(c - 0xfef1 + 0xfd); + OUT1(c - 0xfef1 + 0xfd) NEXT(1, 1) continue; } @@ -68,8 +66,8 @@ RESERVE_OUTBUF(2) TRYMAP_ENC(cp932ext, code, c) { - (*outbuf)[0] = code >> 8; - (*outbuf)[1] = code & 0xff; + OUT1(code >> 8) + OUT2(code & 0xff) } else TRYMAP_ENC(jisxcommon, code, c) { if (code & 0x8000) /* MSB set: JIS X 0212 */ return 1; @@ -79,14 +77,14 @@ c2 = code & 0xff; c2 = (((c1 - 0x21) & 1) ? 0x5e : 0) + (c2 - 0x21); c1 = (c1 - 0x21) >> 1; - (*outbuf)[0] = c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1; - (*outbuf)[1] = c2 < 0x3f ? c2 + 0x40 : c2 + 0x41; + OUT1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1) + OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41) } else if (c >= 0xe000 && c < 0xe758) { /* User-defined area */ c1 = (Py_UNICODE)(c - 0xe000) / 188; c2 = (Py_UNICODE)(c - 0xe000) % 188; - (*outbuf)[0] = c1 + 0xf0; - (*outbuf)[1] = (c2 < 0x3f ? c2 + 0x40 : c2 + 0x41); + OUT1(c1 + 0xf0) + OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41) } else return 1; @@ -99,29 +97,29 @@ DECODER(cp932) { while (inleft > 0) { - unsigned char c = **inbuf, c2; + unsigned char c = IN1, c2; RESERVE_OUTBUF(1) if (c <= 0x80) { - **outbuf = c; + OUT1(c) NEXT(1, 1) continue; } else if (c >= 0xa0 && c <= 0xdf) { if (c == 0xa0) - **outbuf = 0xf8f0; /* half-width katakana */ + OUT1(0xf8f0) /* half-width katakana */ else - **outbuf = 0xfec0 + c; + OUT1(0xfec0 + c) NEXT(1, 1) continue; } else if (c >= 0xfd/* && c <= 0xff*/) { /* Windows compatibility */ - **outbuf = 0xf8f1 - 0xfd + c; + OUT1(0xf8f1 - 0xfd + c) NEXT(1, 1) continue; } RESERVE_INBUF(2) - c2 = (*inbuf)[1]; + c2 = IN2; TRYMAP_DEC(cp932ext, **outbuf, c, c2); else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)) { @@ -137,8 +135,8 @@ else return 2; } else if (c >= 0xf0 && c <= 0xf9) { if ((c2 >= 0x40 && c2 <= 0x7e) || (c2 >= 0x80 && c2 <= 0xfc)) - **outbuf = 0xe000 + 188 * (c - 0xf0) + - (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41); + OUT1(0xe000 + 188 * (c - 0xf0) + + (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41)) else return 2; } else 1.11 +10 -11 cjkcodecs/src/_cp949.c Index: _cp949.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/_cp949.c,v retrieving revision 1.10 retrieving revision 1.11 diff -u -r1.10 -r1.11 --- _cp949.c 9 Jun 2003 10:25:36 -0000 1.10 +++ _cp949.c 9 Jul 2003 21:35:19 -0000 1.11 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: _cp949.c,v 1.10 2003/06/09 10:25:36 perky Exp $ + * $Id: _cp949.c,v 1.11 2003/07/09 21:35:19 perky Exp $ */ #include "codeccommon.h" @@ -38,12 +38,11 @@ ENCODER(cp949) { while (inleft > 0) { - Py_UNICODE c = **inbuf; + Py_UNICODE c = IN1; DBCHAR code; if (c < 0x80) { - RESERVE_OUTBUF(1) - **outbuf = c; + WRITE1(c) NEXT(1, 1) continue; } @@ -53,11 +52,11 @@ TRYMAP_ENC(cp949, code, c); else return 1; - (*outbuf)[0] = (code >> 8) | 0x80; + OUT1((code >> 8) | 0x80) if (code & 0x8000) - (*outbuf)[1] = (code & 0xFF); /* MSB set: CP949 */ + OUT2(code & 0xFF) /* MSB set: CP949 */ else - (*outbuf)[1] = (code & 0xFF) | 0x80; /* MSB unset: ks x 1001 */ + OUT2((code & 0xFF) | 0x80) /* MSB unset: ks x 1001 */ NEXT(1, 2) } @@ -67,19 +66,19 @@ DECODER(cp949) { while (inleft > 0) { - unsigned char c = **inbuf; + unsigned char c = IN1; RESERVE_OUTBUF(1) if (c < 0x80) { - **outbuf = c; + OUT1(c) NEXT(1, 1) continue; } RESERVE_INBUF(2) - TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, (*inbuf)[1] ^ 0x80); - else TRYMAP_DEC(cp949ext, **outbuf, c, (*inbuf)[1]); + TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80); + else TRYMAP_DEC(cp949ext, **outbuf, c, IN2); else return 2; NEXT(2, 1) 1.7 +9 -10 cjkcodecs/src/_cp950.c Index: _cp950.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/_cp950.c,v retrieving revision 1.6 retrieving revision 1.7 diff -u -r1.6 -r1.7 --- _cp950.c 9 Jun 2003 10:25:36 -0000 1.6 +++ _cp950.c 9 Jul 2003 21:35:19 -0000 1.7 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: _cp950.c,v 1.6 2003/06/09 10:25:36 perky Exp $ + * $Id: _cp950.c,v 1.7 2003/07/09 21:35:19 perky Exp $ */ #include "codeccommon.h" @@ -39,12 +39,11 @@ ENCODER(cp950) { while (inleft > 0) { - Py_UNICODE c = **inbuf; + Py_UNICODE c = IN1; DBCHAR code; if (c < 0x80) { - RESERVE_OUTBUF(1) - **outbuf = c; + WRITE1(c) NEXT(1, 1) continue; } @@ -55,8 +54,8 @@ else TRYMAP_ENC(big5, code, c); else return 1; - (*outbuf)[0] = code >> 8; - (*outbuf)[1] = code & 0xFF; + OUT1(code >> 8) + OUT2(code & 0xFF) NEXT(1, 2) } @@ -66,20 +65,20 @@ DECODER(cp950) { while (inleft > 0) { - unsigned char c = **inbuf; + unsigned char c = IN1; RESERVE_OUTBUF(1) if (c < 0x80) { - **outbuf = c; + OUT1(c) NEXT(1, 1) continue; } RESERVE_INBUF(2) - TRYMAP_DEC(cp950ext, **outbuf, c, (*inbuf)[1]); - else TRYMAP_DEC(big5, **outbuf, c, (*inbuf)[1]); + TRYMAP_DEC(cp950ext, **outbuf, c, IN2); + else TRYMAP_DEC(big5, **outbuf, c, IN2); else return 2; NEXT(2, 1) 1.6 +12 -20 cjkcodecs/src/_euc_jisx0213.c Index: _euc_jisx0213.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/_euc_jisx0213.c,v retrieving revision 1.5 retrieving revision 1.6 diff -u -r1.5 -r1.6 --- _euc_jisx0213.c 9 Jul 2003 20:46:56 -0000 1.5 +++ _euc_jisx0213.c 9 Jul 2003 21:35:19 -0000 1.6 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: _euc_jisx0213.c,v 1.5 2003/07/09 20:46:56 perky Exp $ + * $Id: _euc_jisx0213.c,v 1.6 2003/07/09 21:35:19 perky Exp $ */ #define USING_BINARY_PAIR_SEARCH @@ -53,8 +53,7 @@ int insize; if (c < 0x80) { - RESERVE_OUTBUF(1) - **outbuf = c; + WRITE1(c) NEXT(1, 1) continue; } @@ -89,9 +88,7 @@ } else TRYMAP_ENC(jisxcommon, code, c); else if (c >= 0xff61 && c <= 0xff9f) { /* JIS X 0201 half-width katakana */ - RESERVE_OUTBUF(2) - (*outbuf)[0] = 0x8e; - (*outbuf)[1] = (unsigned char)(c - 0xfec0); + WRITE2(0x8e, c - 0xfec0) NEXT(1, 2) continue; } else if (c == 0xff3c) @@ -107,16 +104,11 @@ if (code & 0x8000) { /* Codeset 2 */ - RESERVE_OUTBUF(3) - (*outbuf)[0] = 0x8f; - (*outbuf)[1] = code >> 8; - (*outbuf)[2] = (code & 0xFF) | 0x80; + WRITE3(0x8f, code >> 8, (code & 0xFF) | 0x80) NEXT(insize, 3) } else { /* Codeset 1 */ - RESERVE_OUTBUF(2) - (*outbuf)[0] = (code >> 8) | 0x80; - (*outbuf)[1] = (code & 0xFF) | 0x80; + WRITE2((code >> 8) | 0x80, (code & 0xFF) | 0x80) NEXT(insize, 2) } } @@ -127,13 +119,13 @@ DECODER(euc_jisx0213) { while (inleft > 0) { - unsigned char c = **inbuf; + unsigned char c = IN1; ucs4_t code; RESERVE_OUTBUF(1) if (c < 0x80) { - **outbuf = c; + OUT1(c) NEXT(1, 1) continue; } @@ -143,9 +135,9 @@ unsigned char c2; RESERVE_INBUF(2) - c2 = (*inbuf)[1]; + c2 = IN2; if (c2 >= 0xa1 && c2 <= 0xdf) { - **outbuf = 0xfec0 + c2; + OUT1(0xfec0 + c2) NEXT(2, 1) } else return 2; @@ -153,8 +145,8 @@ unsigned char c2, c3; RESERVE_INBUF(3) - c2 = (*inbuf)[1] ^ 0x80; - c3 = (*inbuf)[2] ^ 0x80; + c2 = IN2 ^ 0x80; + c3 = IN3 ^ 0x80; /* JIS X 0213 Plane 2 or JIS X 0212 (see NOTES.euc-jisx0213) */ TRYMAP_DEC(jisx0213_2_bmp, **outbuf, c2, c3) ; @@ -170,7 +162,7 @@ RESERVE_INBUF(2) c ^= 0x80; - c2 = (*inbuf)[1] ^ 0x80; + c2 = IN2 ^ 0x80; /* JIS X 0213 Plane 1 */ if (c == 0xa1 && c2 == 0xc0) **outbuf = 0xff3c; 1.5 +19 -30 cjkcodecs/src/_euc_jp.c Index: _euc_jp.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/_euc_jp.c,v retrieving revision 1.4 retrieving revision 1.5 diff -u -r1.4 -r1.5 --- _euc_jp.c 6 Jul 2003 10:30:26 -0000 1.4 +++ _euc_jp.c 9 Jul 2003 21:35:19 -0000 1.5 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: _euc_jp.c,v 1.4 2003/07/06 10:30:26 perky Exp $ + * $Id: _euc_jp.c,v 1.5 2003/07/09 21:35:19 perky Exp $ */ #include "codeccommon.h" @@ -38,12 +38,11 @@ ENCODER(euc_jp) { while (inleft > 0) { - Py_UNICODE c = **inbuf; + Py_UNICODE c = IN1; DBCHAR code; if (c < 0x80) { - RESERVE_OUTBUF(1) - **outbuf = c; + WRITE1(c) NEXT(1, 1) continue; } @@ -53,24 +52,19 @@ TRYMAP_ENC(jisxcommon, code, c); else if (c >= 0xff61 && c <= 0xff9f) { /* JIS X 0201 half-width katakana */ - RESERVE_OUTBUF(2) - (*outbuf)[0] = 0x8e; - (*outbuf)[1] = (unsigned char)(c - 0xfec0); + WRITE2(0x8e, c - 0xfec0) NEXT(1, 2) continue; } else if (c >= 0xe000 && c < 0xe3ac) { /* User-defined area 1 */ - RESERVE_OUTBUF(2) - (*outbuf)[0] = (Py_UNICODE)(c - 0xe000) / 94 + 0xf5; - (*outbuf)[1] = (Py_UNICODE)(c - 0xe000) % 94 + 0xa1; + WRITE2((Py_UNICODE)(c - 0xe000) / 94 + 0xf5, + (Py_UNICODE)(c - 0xe000) % 94 + 0xa1) NEXT(1, 2) continue; } else if (c >= 0xe3ac && c < 0xe758) { /* User-defined area 2 */ - RESERVE_OUTBUF(3) - (*outbuf)[0] = 0x8f; - (*outbuf)[1] = (Py_UNICODE)(c - 0xe3ac) / 94 + 0xf5; - (*outbuf)[2] = (Py_UNICODE)(c - 0xe3ac) % 94 + 0xa1; + WRITE3(0x8f, (Py_UNICODE)(c - 0xe3ac) / 94 + 0xf5, + (Py_UNICODE)(c - 0xe3ac) % 94 + 0xa1) NEXT(1, 3) continue; } else if (c == 0xff3c) /* F/W REVERSE SOLIDUS (see NOTES.euc-jp) */ @@ -80,16 +74,11 @@ if (code & 0x8000) { /* JIS X 0212 */ - RESERVE_OUTBUF(3) - (*outbuf)[0] = 0x8f; - (*outbuf)[1] = code >> 8; - (*outbuf)[2] = (code & 0xFF) | 0x80; + WRITE3(0x8f, code >> 8, (code & 0xFF) | 0x80) NEXT(1, 3) } else { /* JIS X 0208 */ - RESERVE_OUTBUF(2) - (*outbuf)[0] = (code >> 8) | 0x80; - (*outbuf)[1] = (code & 0xFF) | 0x80; + WRITE2((code >> 8) | 0x80, (code & 0xFF) | 0x80) NEXT(1, 2) } } @@ -100,12 +89,12 @@ DECODER(euc_jp) { while (inleft > 0) { - unsigned char c = **inbuf; + unsigned char c = IN1; RESERVE_OUTBUF(1) if (c < 0x80) { - **outbuf = c; + OUT1(c) NEXT(1, 1) continue; } @@ -115,9 +104,9 @@ unsigned char c2; RESERVE_INBUF(2) - c2 = (*inbuf)[1]; + c2 = IN2; if (c2 >= 0xa1 && c2 <= 0xdf) { - **outbuf = 0xfec0 + c2; + OUT1(0xfec0 + c2) NEXT(2, 1) } else return 2; @@ -125,8 +114,8 @@ unsigned char c2, c3; RESERVE_INBUF(3) - c2 = (*inbuf)[1]; - c3 = (*inbuf)[2]; + c2 = IN2; + c3 = IN3; if (c2 < 0xf5) { /* JIS X 0212 */ TRYMAP_DEC(jisx0212, **outbuf, c2 ^ 0x80, c3 ^ 0x80); @@ -135,14 +124,14 @@ /* User-defined area 2 */ if (c2 == 0xff || c3 < 0xa1 || c3 == 0xff) return 3; - **outbuf = 0xe3ac + 94 * (c2 - 0xf5) + (c3 - 0xa1); + OUT1(0xe3ac + 94 * (c2 - 0xf5) + (c3 - 0xa1)) } NEXT(3, 1) } else { unsigned char c2; RESERVE_INBUF(2) - c2 = (*inbuf)[1]; + c2 = IN2; if (c < 0xf5) { /* JIS X 0208 */ if (c == 0xa1 && c2 == 0xc0) **outbuf = 0xff3c; @@ -152,7 +141,7 @@ /* User-defined area 1 */ if (c2 < 0xa1 || c2 == 0xff) return 2; - **outbuf = 0xe000 + 94 * (c - 0xf5) + (c2 - 0xa1); + OUT1(0xe000 + 94 * (c - 0xf5) + (c2 - 0xa1)) } NEXT(2, 1) } 1.14 +8 -9 cjkcodecs/src/_euc_kr.c Index: _euc_kr.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/_euc_kr.c,v retrieving revision 1.13 retrieving revision 1.14 diff -u -r1.13 -r1.14 --- _euc_kr.c 9 Jun 2003 10:25:36 -0000 1.13 +++ _euc_kr.c 9 Jul 2003 21:35:19 -0000 1.14 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: _euc_kr.c,v 1.13 2003/06/09 10:25:36 perky Exp $ + * $Id: _euc_kr.c,v 1.14 2003/07/09 21:35:19 perky Exp $ */ #include "codeccommon.h" @@ -37,12 +37,11 @@ ENCODER(euc_kr) { while (inleft > 0) { - Py_UNICODE c = **inbuf; + Py_UNICODE c = IN1; DBCHAR code; if (c < 0x80) { - RESERVE_OUTBUF(1) - **outbuf = c; + WRITE1(c) NEXT(1, 1) continue; } @@ -55,8 +54,8 @@ if (code & 0x8000) /* MSB set: CP949 */ return 1; - (*outbuf)[0] = (code >> 8) | 0x80; - (*outbuf)[1] = (code & 0xFF) | 0x80; + OUT1((code >> 8) | 0x80) + OUT2((code & 0xFF) | 0x80) NEXT(1, 2) } @@ -66,19 +65,19 @@ DECODER(euc_kr) { while (inleft > 0) { - unsigned char c = **inbuf; + unsigned char c = IN1; RESERVE_OUTBUF(1) if (c < 0x80) { - **outbuf = c; + OUT1(c) NEXT(1, 1) continue; } RESERVE_INBUF(2) - TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, (*inbuf)[1] ^ 0x80) { + TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80) { NEXT(2, 1) } else return 2; 1.11 +20 -21 cjkcodecs/src/_gb18030.c Index: _gb18030.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/_gb18030.c,v retrieving revision 1.10 retrieving revision 1.11 diff -u -r1.10 -r1.11 --- _gb18030.c 9 Jul 2003 20:46:56 -0000 1.10 +++ _gb18030.c 9 Jul 2003 21:35:19 -0000 1.11 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: _gb18030.c,v 1.10 2003/07/09 20:46:56 perky Exp $ + * $Id: _gb18030.c,v 1.11 2003/07/09 21:35:19 perky Exp $ */ #include "codeccommon.h" @@ -42,12 +42,11 @@ ENCODER(gb18030) { while (inleft > 0) { - ucs4_t c = **inbuf; + ucs4_t c = IN1; DBCHAR code; if (c < 0x80) { - RESERVE_OUTBUF(1) - **outbuf = c; + WRITE1(c) NEXT(1, 1) continue; } @@ -64,13 +63,13 @@ RESERVE_OUTBUF(4) - (*outbuf)[3] = (unsigned char)(tc % 10) + 0x30; + OUT4((unsigned char)(tc % 10) + 0x30) tc /= 10; - (*outbuf)[2] = (unsigned char)(tc % 126) + 0x81; + OUT3((unsigned char)(tc % 126) + 0x81) tc /= 126; - (*outbuf)[1] = (unsigned char)(tc % 10) + 0x30; + OUT2((unsigned char)(tc % 10) + 0x30) tc /= 10; - (*outbuf)[0] = (unsigned char)(tc + 0x90); + OUT1((unsigned char)(tc + 0x90)) #if Py_UNICODE_SIZE == 2 NEXT(2, 4) /* surrogates pair */ @@ -98,13 +97,13 @@ tc = c - utrrange->first + utrrange->base; - (*outbuf)[3] = (unsigned char)(tc % 10) + 0x30; + OUT4((unsigned char)(tc % 10) + 0x30) tc /= 10; - (*outbuf)[2] = (unsigned char)(tc % 126) + 0x81; + OUT3((unsigned char)(tc % 126) + 0x81) tc /= 126; - (*outbuf)[1] = (unsigned char)(tc % 10) + 0x30; + OUT2((unsigned char)(tc % 10) + 0x30) tc /= 10; - (*outbuf)[0] = (unsigned char)tc + 0x81; + OUT1((unsigned char)tc + 0x81) NEXT(1, 4) break; @@ -118,11 +117,11 @@ continue; } - (*outbuf)[0] = (code >> 8) | 0x80; + OUT1((code >> 8) | 0x80) if (code & 0x8000) - (*outbuf)[1] = (code & 0xFF); /* MSB set: GBK or GB18030ext */ + OUT2((code & 0xFF)) /* MSB set: GBK or GB18030ext */ else - (*outbuf)[1] = (code & 0xFF) | 0x80; /* MSB unset: GB2312 */ + OUT2((code & 0xFF) | 0x80) /* MSB unset: GB2312 */ NEXT(1, 2) } @@ -133,27 +132,27 @@ DECODER(gb18030) { while (inleft > 0) { - unsigned char c = **inbuf, c2; + unsigned char c = IN1, c2; RESERVE_OUTBUF(1) if (c < 0x80) { - **outbuf = c; + OUT1(c) NEXT(1, 1) continue; } RESERVE_INBUF(2) - c2 = (*inbuf)[1]; + c2 = IN2; if (c2 >= 0x30 && c2 <= 0x39) { /* 4 bytes seq */ const struct _gb18030_to_unibmp_ranges *utr; unsigned char c3, c4; ucs4_t lseq; RESERVE_INBUF(4) - c3 = (*inbuf)[2]; - c4 = (*inbuf)[3]; + c3 = IN3; + c4 = IN4; if (c < 0x81 || c3 < 0x81 || c4 < 0x30 || c4 > 0x39) return 4; c -= 0x81; c2 -= 0x30; @@ -166,7 +165,7 @@ for (utr = gb18030_to_unibmp_ranges; lseq >= (utr + 1)->base; utr++) ; - **outbuf = utr->first - utr->base + lseq; + OUT1(utr->first - utr->base + lseq) NEXT(4, 1) continue; } 1.9 +7 -8 cjkcodecs/src/_gb2312.c Index: _gb2312.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/_gb2312.c,v retrieving revision 1.8 retrieving revision 1.9 diff -u -r1.8 -r1.9 --- _gb2312.c 9 Jun 2003 10:25:36 -0000 1.8 +++ _gb2312.c 9 Jul 2003 21:35:19 -0000 1.9 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: _gb2312.c,v 1.8 2003/06/09 10:25:36 perky Exp $ + * $Id: _gb2312.c,v 1.9 2003/07/09 21:35:19 perky Exp $ */ #include "codeccommon.h" @@ -37,12 +37,11 @@ ENCODER(gb2312) { while (inleft > 0) { - Py_UNICODE c = **inbuf; + Py_UNICODE c = IN1; DBCHAR code; if (c < 0x80) { - RESERVE_OUTBUF(1) - **outbuf = c; + WRITE1(c) NEXT(1, 1) continue; } @@ -55,8 +54,8 @@ if (code & 0x8000) /* MSB set: GBK */ return 1; - (*outbuf)[0] = (code >> 8) | 0x80; - (*outbuf)[1] = (code & 0xFF) | 0x80; + OUT1((code >> 8) | 0x80) + OUT2((code & 0xFF) | 0x80) NEXT(1, 2) } @@ -71,13 +70,13 @@ RESERVE_OUTBUF(1) if (c < 0x80) { - **outbuf = c; + OUT1(c) NEXT(1, 1) continue; } RESERVE_INBUF(2) - TRYMAP_DEC(gb2312, **outbuf, c ^ 0x80, (*inbuf)[1] ^ 0x80) { + TRYMAP_DEC(gb2312, **outbuf, c ^ 0x80, IN2 ^ 0x80) { NEXT(2, 1) } else return 2; } 1.8 +11 -12 cjkcodecs/src/_gbk.c Index: _gbk.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/_gbk.c,v retrieving revision 1.7 retrieving revision 1.8 diff -u -r1.7 -r1.8 --- _gbk.c 9 Jun 2003 10:25:36 -0000 1.7 +++ _gbk.c 9 Jul 2003 21:35:19 -0000 1.8 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: _gbk.c,v 1.7 2003/06/09 10:25:36 perky Exp $ + * $Id: _gbk.c,v 1.8 2003/07/09 21:35:19 perky Exp $ */ #include "codeccommon.h" @@ -39,12 +39,11 @@ ENCODER(gbk) { while (inleft > 0) { - Py_UNICODE c = **inbuf; + Py_UNICODE c = IN1; DBCHAR code; if (c < 0x80) { - RESERVE_OUTBUF(1) - **outbuf = c; + WRITE1(c) NEXT(1, 1) continue; } @@ -56,11 +55,11 @@ else TRYMAP_ENC(gbcommon, code, c); else return 1; - (*outbuf)[0] = (code >> 8) | 0x80; + OUT1((code >> 8) | 0x80) if (code & 0x8000) - (*outbuf)[1] = (code & 0xFF); /* MSB set: GBK */ + OUT2((code & 0xFF)) /* MSB set: GBK */ else - (*outbuf)[1] = (code & 0xFF) | 0x80; /* MSB unset: GB2312 */ + OUT2((code & 0xFF) | 0x80) /* MSB unset: GB2312 */ NEXT(1, 2) } @@ -70,21 +69,21 @@ DECODER(gbk) { while (inleft > 0) { - unsigned char c = **inbuf; + unsigned char c = IN1; RESERVE_OUTBUF(1) if (c < 0x80) { - **outbuf = c; + OUT1(c) NEXT(1, 1) continue; } RESERVE_INBUF(2) - GBK_PREDECODE(c, (*inbuf)[1], **outbuf) - else TRYMAP_DEC(gb2312, **outbuf, c ^ 0x80, (*inbuf)[1] ^ 0x80); - else TRYMAP_DEC(gbkext, **outbuf, c, (*inbuf)[1]); + GBK_PREDECODE(c, IN2, **outbuf) + else TRYMAP_DEC(gb2312, **outbuf, c ^ 0x80, IN2 ^ 0x80); + else TRYMAP_DEC(gbkext, **outbuf, c, IN2); else return 2; NEXT(2, 1) 1.8 +2 -2 cjkcodecs/src/_iso_2022_jp.c Index: _iso_2022_jp.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/_iso_2022_jp.c,v retrieving revision 1.7 retrieving revision 1.8 diff -u -r1.7 -r1.8 --- _iso_2022_jp.c 9 Jul 2003 18:47:47 -0000 1.7 +++ _iso_2022_jp.c 9 Jul 2003 21:35:19 -0000 1.8 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: _iso_2022_jp.c,v 1.7 2003/07/09 18:47:47 perky Exp $ + * $Id: _iso_2022_jp.c,v 1.8 2003/07/09 21:35:19 perky Exp $ */ #define ISO2022_DESIGNATIONS \ @@ -65,7 +65,7 @@ ENCODER(iso_2022_jp) { while (inleft > 0) { - Py_UNICODE c = **inbuf; + Py_UNICODE c = IN1; DBCHAR code; if (c < 0x80) { 1.7 +18 -19 cjkcodecs/src/_johab.c Index: _johab.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/_johab.c,v retrieving revision 1.6 retrieving revision 1.7 diff -u -r1.6 -r1.7 --- _johab.c 9 Jul 2003 20:30:15 -0000 1.6 +++ _johab.c 9 Jul 2003 21:35:19 -0000 1.7 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: _johab.c,v 1.6 2003/07/09 20:30:15 perky Exp $ + * $Id: _johab.c,v 1.7 2003/07/09 21:35:19 perky Exp $ */ #include "codeccommon.h" @@ -64,12 +64,11 @@ ENCODER(johab) { while (inleft > 0) { - Py_UNICODE c = **inbuf; + Py_UNICODE c = IN1; DBCHAR code; if (c < 0x80) { - RESERVE_OUTBUF(1) - **outbuf = c; + WRITE1(c) NEXT(1, 1) continue; } @@ -96,8 +95,8 @@ && (c2 >= 0x21 && c2 <= 0x7e)) { t1 = (c1 < 0x4a ? (c1 - 0x21 + 0x1b2) : (c1 - 0x21 + 0x197)); t2 = ((t1 & 1) ? 0x5e : 0) + (c2 - 0x21); - (*outbuf)[0] = t1 >> 1; - (*outbuf)[1] = (t2 < 0x4e ? t2 + 0x31 : t2 + 0x43); + OUT1(t1 >> 1) + OUT2(t2 < 0x4e ? t2 + 0x31 : t2 + 0x43) NEXT(1, 2) continue; } else @@ -105,8 +104,8 @@ } else return 1; - (*outbuf)[0] = code >> 8; - (*outbuf)[1] = code & 0xFF; + OUT1(code >> 8) + OUT2(code & 0xff) NEXT(1, 2) } @@ -157,18 +156,18 @@ DECODER(johab) { while (inleft > 0) { - unsigned char c = **inbuf, c2; + unsigned char c = IN1, c2; RESERVE_OUTBUF(1) if (c < 0x80) { - **outbuf = c; + OUT1(c) NEXT(1, 1) continue; } RESERVE_INBUF(2) - c2 = (*inbuf)[1]; + c2 = IN2; if (c < 0xd8) { /* johab hangul */ @@ -190,26 +189,26 @@ if (i_cho == FILL) { if (i_jung == FILL) { if (i_jong == FILL) - **outbuf = 0x3000; + OUT1(0x3000) else - **outbuf = 0x3100 | johabjamo_jongseong[c_jong]; + OUT1(0x3100 | johabjamo_jongseong[c_jong]) } else { if (i_jong == FILL) - **outbuf = 0x3100 | johabjamo_jungseong[c_jung]; + OUT1(0x3100 | johabjamo_jungseong[c_jung]) else return 2; } } else { if (i_jung == FILL) { if (i_jong == FILL) - **outbuf = 0x3100 | johabjamo_choseong[c_cho]; + OUT1(0x3100 | johabjamo_choseong[c_cho]) else return 2; } else - **outbuf = 0xac00 + - i_cho * 588 + - i_jung * 28 + - (i_jong == FILL ? 0 : i_jong); + OUT1(0xac00 + + i_cho * 588 + + i_jung * 28 + + (i_jong == FILL ? 0 : i_jong)) } NEXT(2, 1) } else { 1.6 +12 -12 cjkcodecs/src/_shift_jis.c Index: _shift_jis.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/_shift_jis.c,v retrieving revision 1.5 retrieving revision 1.6 diff -u -r1.5 -r1.6 --- _shift_jis.c 31 May 2003 11:50:19 -0000 1.5 +++ _shift_jis.c 9 Jul 2003 21:35:19 -0000 1.6 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: _shift_jis.c,v 1.5 2003/05/31 11:50:19 perky Exp $ + * $Id: _shift_jis.c,v 1.6 2003/07/09 21:35:19 perky Exp $ */ #include "codeccommon.h" @@ -38,7 +38,7 @@ ENCODER(shift_jis) { while (inleft > 0) { - Py_UNICODE c = **inbuf; + Py_UNICODE c = IN1; DBCHAR code; unsigned char c1, c2; @@ -49,7 +49,7 @@ if (code < 0x80 || (code >= 0xa1 && code <= 0xdf)) { RESERVE_OUTBUF(1) - **outbuf = (unsigned char)code; + OUT1(code) NEXT(1, 1) continue; } @@ -62,8 +62,8 @@ /* user-defined area */ c1 = (Py_UNICODE)(c - 0xe000) / 188; c2 = (Py_UNICODE)(c - 0xe000) % 188; - (*outbuf)[0] = c1 + 0xf0; - (*outbuf)[1] = (c2 < 0x3f ? c2 + 0x40 : c2 + 0x41); + OUT1(c1 + 0xf0) + OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41) NEXT(1, 2) continue; } else @@ -77,8 +77,8 @@ c2 = code & 0xff; c2 = (((c1 - 0x21) & 1) ? 0x5e : 0) + (c2 - 0x21); c1 = (c1 - 0x21) >> 1; - (*outbuf)[0] = c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1; - (*outbuf)[1] = c2 < 0x3f ? c2 + 0x40 : c2 + 0x41; + OUT1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1) + OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41) NEXT(1, 2) } @@ -88,7 +88,7 @@ DECODER(shift_jis) { while (inleft > 0) { - unsigned char c = **inbuf; + unsigned char c = IN1; RESERVE_OUTBUF(1) JISX0201_DECODE(c, **outbuf) @@ -96,7 +96,7 @@ unsigned char c1, c2; RESERVE_INBUF(2) - c2 = (*inbuf)[1]; + c2 = IN2; if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc) return 2; @@ -114,10 +114,10 @@ unsigned char c2; RESERVE_INBUF(2) - c2 = (*inbuf)[1]; + c2 = IN2; if ((c2 >= 0x40 && c2 <= 0x7e) || (c2 >= 0x80 && c2 <= 0xfc)) { - **outbuf = 0xe000 + 188 * (c - 0xf0) + - (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41); + OUT1(0xe000 + 188 * (c - 0xf0) + + (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41)) NEXT(2, 1) continue; } else 1.4 +2 -2 cjkcodecs/src/_shift_jisx0213.c Index: _shift_jisx0213.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/_shift_jisx0213.c,v retrieving revision 1.3 retrieving revision 1.4 diff -u -r1.3 -r1.4 --- _shift_jisx0213.c 9 Jul 2003 20:46:57 -0000 1.3 +++ _shift_jisx0213.c 9 Jul 2003 21:35:19 -0000 1.4 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: _shift_jisx0213.c,v 1.3 2003/07/09 20:46:57 perky Exp $ + * $Id: _shift_jisx0213.c,v 1.4 2003/07/09 21:35:19 perky Exp $ */ #define USING_BINARY_PAIR_SEARCH @@ -48,7 +48,7 @@ ENCODER(shift_jisx0213) { while (inleft > 0) { - ucs4_t c = **inbuf; + ucs4_t c = IN1; DBCHAR code = NOCHAR; int c1, c2; size_t insize; 1.23 +5 -5 cjkcodecs/src/codeccommon.h Index: codeccommon.h =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/codeccommon.h,v retrieving revision 1.22 retrieving revision 1.23 diff -u -r1.22 -r1.23 --- codeccommon.h 9 Jul 2003 20:46:57 -0000 1.22 +++ codeccommon.h 9 Jul 2003 21:35:19 -0000 1.23 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: codeccommon.h,v 1.22 2003/07/09 20:46:57 perky Exp $ + * $Id: codeccommon.h,v 1.23 2003/07/09 21:35:19 perky Exp $ */ #include "Python.h" @@ -93,10 +93,10 @@ #define IN3 ((*inbuf)[2]) #define IN4 ((*inbuf)[3]) -#define OUT1(c) ((*outbuf)[0]) = (unsigned char)(c); -#define OUT2(c) ((*outbuf)[1]) = (unsigned char)(c); -#define OUT3(c) ((*outbuf)[2]) = (unsigned char)(c); -#define OUT4(c) ((*outbuf)[3]) = (unsigned char)(c); +#define OUT1(c) ((*outbuf)[0]) = (c); +#define OUT2(c) ((*outbuf)[1]) = (c); +#define OUT3(c) ((*outbuf)[2]) = (c); +#define OUT4(c) ((*outbuf)[3]) = (c); #define WRITE1(c1) \ RESERVE_OUTBUF(1) \ |
From: Hye-Shik C. <pe...@us...> - 2003-07-09 21:00:49
|
perky 03/07/09 14:00:48 Modified: src _hz.c Log: Fix hz codec's bug that doesn't initialize the encoding mode to ASCII. Revision Changes Path 1.3 +44 -24 cjkcodecs/src/_hz.c Index: _hz.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/_hz.c,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- _hz.c 31 May 2003 11:50:19 -0000 1.2 +++ _hz.c 9 Jul 2003 21:00:48 -0000 1.3 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: _hz.c,v 1.2 2003/05/31 11:50:19 perky Exp $ + * $Id: _hz.c,v 1.3 2003/07/09 21:00:48 perky Exp $ */ #include "codeccommon.h" @@ -34,22 +34,36 @@ ENCMAP(gbcommon) DECMAP(gb2312) +#define HAVE_ENCODER_INIT +ENCODER_INIT(hz) +{ + state->i = 0; + return 0; +} + +#define HAVE_ENCODER_RESET +ENCODER_RESET(hz) +{ + if (state->i != 0) { + WRITE2('~', '}') + state->i = 0; + NEXT_OUT(2) + } + return 0; +} + ENCODER(hz) { while (inleft > 0) { - Py_UNICODE c = **inbuf; + Py_UNICODE c = IN1; DBCHAR code; if (c < 0x80) { if (state->i == 0) { - RESERVE_OUTBUF(1) - **outbuf = c; + WRITE1(c) NEXT(1, 1) } else { - RESERVE_OUTBUF(3) - (*outbuf)[0] = '~'; - (*outbuf)[1] = '}'; - (*outbuf)[2] = (unsigned char)c; + WRITE3('~', '}', c) NEXT(1, 3) state->i = 0; } @@ -65,17 +79,11 @@ return 1; if (state->i == 0) { - RESERVE_OUTBUF(4) - (*outbuf)[0] = '~'; - (*outbuf)[1] = '{'; - (*outbuf)[2] = code >> 8; - (*outbuf)[3] = code & 0xFF; + WRITE4('~', '{', code >> 8, code & 0xff) NEXT(1, 4) state->i = 1; } else { - RESERVE_OUTBUF(2) - (*outbuf)[0] = code >> 8; - (*outbuf)[1] = code & 0xFF; + WRITE2(code >> 8, code & 0xff) NEXT(1, 2) } } @@ -83,18 +91,31 @@ return 0; } +#define HAVE_DECODER_INIT +DECODER_INIT(hz) +{ + state->i = 0; + return 0; +} + +#define HAVE_DECODER_RESET +DECODER_RESET(hz) +{ + state->i = 0; + return 0; +} + DECODER(hz) { while (inleft > 0) { - unsigned char c = **inbuf; + unsigned char c = IN1; if (c == '~') { - unsigned char c2 = (*inbuf)[1]; + unsigned char c2 = IN2; RESERVE_INBUF(2) if (c2 == '~') { - RESERVE_OUTBUF(1) - **outbuf = '~'; + WRITE1('~') NEXT(2, 1) continue; } else if (c2 == '{' && state->i == 0) @@ -113,13 +134,12 @@ return 1; if (state->i == 0) { /* ASCII mode */ - RESERVE_OUTBUF(1) - **outbuf = c; + WRITE1(c) NEXT(1, 1) } else { /* GB mode */ RESERVE_INBUF(2) - TRYMAP_DEC(gb2312, **outbuf, c, (*inbuf)[1]) { - RESERVE_OUTBUF(1) + RESERVE_OUTBUF(1) + TRYMAP_DEC(gb2312, **outbuf, c, IN2) { NEXT(2, 1) } else return 2; |
From: Hye-Shik C. <pe...@us...> - 2003-07-09 21:00:49
|
perky 03/07/09 14:00:47 Modified: . CHANGES Log: Fix hz codec's bug that doesn't initialize the encoding mode to ASCII. Revision Changes Path 1.14 +3 -0 cjkcodecs/CHANGES Index: CHANGES =================================================================== RCS file: /cvsroot/koco/cjkcodecs/CHANGES,v retrieving revision 1.13 retrieving revision 1.14 diff -u -r1.13 -r1.14 --- CHANGES 9 Jul 2003 19:30:01 -0000 1.13 +++ CHANGES 9 Jul 2003 21:00:47 -0000 1.14 @@ -36,3 +36,6 @@ U+FF3C because EUC-JP 0x5C is also a REVERSE SOLIDUS and 0xA1C0 is FULLWIDTH REVERSE SOLIDUS on japanese environments. + *) Fixed hz codec's bug that doesn't initialize the encoding mode to + ASCII. + |
From: Hye-Shik C. <pe...@us...> - 2003-07-09 20:30:18
|
perky 03/07/09 13:30:16 Modified: src _johab.c Log: Fix typo on comment Revision Changes Path 1.6 +2 -2 cjkcodecs/src/_johab.c Index: _johab.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/_johab.c,v retrieving revision 1.5 retrieving revision 1.6 diff -u -r1.5 -r1.6 --- _johab.c 6 Jun 2003 06:23:05 -0000 1.5 +++ _johab.c 9 Jul 2003 20:30:15 -0000 1.6 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: _johab.c,v 1.5 2003/06/06 06:23:05 perky Exp $ + * $Id: _johab.c,v 1.6 2003/07/09 20:30:15 perky Exp $ */ #include "codeccommon.h" @@ -186,7 +186,7 @@ if (i_cho == NONE || i_jung == NONE || i_jong == NONE) return 2; - /* we don't use U+1100 hangul jomo yet. */ + /* we don't use U+1100 hangul jamo yet. */ if (i_cho == FILL) { if (i_jung == FILL) { if (i_jong == FILL) |
From: Hye-Shik C. <pe...@us...> - 2003-07-09 19:30:04
|
perky 03/07/09 12:30:03 Added: src _iso_2022_jp_2.c Log: Add ISO-2022-JP-2 codec w/o iso-8859-1 and iso-8859-7 support. Revision Changes Path 1.1 cjkcodecs/src/_iso_2022_jp_2.c Index: _iso_2022_jp_2.c =================================================================== /* * _iso_2022_jp_2.c: the ISO-2022-JP-2 codec (RFC1554) * * Copyright (C) 2003 Hye-Shik Chang <pe...@Fr...>. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * $Id: _iso_2022_jp_2.c,v 1.1 2003/07/09 19:30:03 perky Exp $ */ #define ISO2022_DESIGNATIONS \ CHARSET_ASCII, CHARSET_JISX0201_R, CHARSET_JISX0208, \ CHARSET_JISX0208_O, CHARSET_JISX0212, CHARSET_GB2312, \ CHARSET_KSX1001, CHARSET_JISX0212 /* XXX: NotImplmented CHARSET_ISO8859_1, CHARSET_ISO8859_7 */ #include "codeccommon.h" #include "iso2022common.h" #include "maps/alg_jisx0201.h" ENCMAP(jisxcommon) DECMAP(jisx0208) DECMAP(jisx0212) ENCMAP(cp949) DECMAP(ksx1001) ENCMAP(gbcommon) DECMAP(gb2312) #define HAVE_ENCODER_INIT ENCODER_INIT(iso_2022_jp_2) { STATE_CLEARFLAGS(state) STATE_SETG0(state, CHARSET_ASCII) STATE_SETG1(state, CHARSET_ASCII) STATE_SETG2(state, CHARSET_ASCII) return 0; } #define HAVE_ENCODER_RESET ENCODER_RESET(iso_2022_jp_2) { if (STATE_GETG0(state) != CHARSET_ASCII) { WRITE3(ESC, '(', 'B') STATE_SETG0(state, CHARSET_ASCII) NEXT_OUT(3) } return 0; } ENCODER(iso_2022_jp_2) { while (inleft > 0) { Py_UNICODE c = IN1; DBCHAR code; if (c < 0x80) { switch (STATE_GETG0(state)) { case CHARSET_ASCII: WRITE1(c) NEXT(1, 1) break; case CHARSET_JISX0201_R: JISX0201_R_ENCODE(c, code) else { /* FALLTHROUGH (yay!) */ default: WRITE3(ESC, '(', 'B') NEXT_OUT(3) STATE_SETG0(state, CHARSET_ASCII) code = c; } WRITE1(code) NEXT(1, 1) break; } if (c == '\n') STATE_CLEARFLAG(state, F_SHIFTED) } else UCS4INVALID(c) else { unsigned char charset; charset = STATE_GETG0(state); if (charset == CHARSET_JISX0201_R) { code = DBCINV; JISX0201_R_ENCODE(c, code) if (code != DBCINV) { WRITE1(code) NEXT(1, 1) continue; } } TRYMAP_ENC(jisxcommon, code, c) { if (code & 0x8000) { /* MSB set: JIS X 0212 */ if (charset != CHARSET_JISX0212) { WRITE4(ESC, '$', '(', 'D') STATE_SETG0(state, CHARSET_JISX0212) NEXT_OUT(4) } WRITE2((code >> 8) & 0x7f, code & 0x7f) } else { /* MSB unset: JIS X 0208 */ if (charset != CHARSET_JISX0208) { WRITE3(ESC, '$', 'B') STATE_SETG0(state, CHARSET_JISX0208) NEXT_OUT(3) } WRITE2(code >> 8, code & 0xff) } NEXT(1, 2) } else TRYMAP_ENC(cp949, code, c) { if (code & 0x8000) /* MSB set: CP949 */ return 2; if (charset != CHARSET_KSX1001) { WRITE4(ESC, '$', '(', 'C') STATE_SETG0(state, CHARSET_KSX1001) NEXT_OUT(4) } WRITE2(code >> 8, code & 0xff) NEXT(1, 2) } else TRYMAP_ENC(gbcommon, code, c) { if (code & 0x8000) /* MSB set: GBK */ return 2; if (charset != CHARSET_GB2312) { WRITE4(ESC, '$', '(', 'A') STATE_SETG0(state, CHARSET_GB2312) NEXT_OUT(4) } WRITE2(code >> 8, code & 0xff) NEXT(1, 2) } else { JISX0201_R_ENCODE(c, code) else return 1; /* if (charset == CHARSET_JISX0201_R) : already checked */ WRITE4(ESC, '(', 'J', code) STATE_SETG0(state, CHARSET_JISX0201_R) NEXT(1, 4) } } } return 0; } #define HAVE_DECODER_INIT DECODER_INIT(iso_2022_jp_2) { STATE_CLEARFLAGS(state) STATE_SETG0(state, CHARSET_ASCII) STATE_SETG1(state, CHARSET_ASCII) STATE_SETG2(state, CHARSET_ASCII) return 0; } #define HAVE_DECODER_RESET DECODER_RESET(iso_2022_jp_2) { STATE_CLEARFLAG(state, F_SHIFTED) return 0; } DECODER(iso_2022_jp_2) { ISO2022_LOOP_BEGIN unsigned char charset, c2; ISO2022_GETCHARSET(charset, c, c2) if (charset & CHARSET_DOUBLEBYTE) { RESERVE_INBUF(2) RESERVE_OUTBUF(1) c2 &= IN2; if (charset == CHARSET_JISX0208 || charset == CHARSET_JISX0208_O) { TRYMAP_DEC(jisx0208, **outbuf, c, c2); else return 2; } else if (charset == CHARSET_JISX0212) { TRYMAP_DEC(jisx0212, **outbuf, c, c2); else return 2; } else if (charset == CHARSET_KSX1001) { TRYMAP_DEC(ksx1001, **outbuf, c, c2); else return 2; } else if (charset == CHARSET_GB2312) { TRYMAP_DEC(gb2312, **outbuf, c, c2); else return 2; } else return MBERR_INTERNAL; NEXT(2, 1) } else if (charset == CHARSET_ASCII) { RESERVE_OUTBUF(1) OUT1(c) NEXT(1, 1) } else if (charset == CHARSET_JISX0201_R) { RESERVE_OUTBUF(1) JISX0201_R_DECODE(c, **outbuf) else return 1; NEXT(1, 1) } else return MBERR_INTERNAL; ISO2022_LOOP_END return 0; } #include "codecentry.h" BEGIN_CODEC_REGISTRY(iso_2022_jp_2) MAPOPEN(ja_JP) IMPORTMAP_DEC(jisx0208) IMPORTMAP_DEC(jisx0212) IMPORTMAP_ENC(jisxcommon) MAPCLOSE() MAPOPEN(ko_KR) IMPORTMAP_ENC(cp949) IMPORTMAP_DEC(ksx1001) MAPCLOSE() MAPOPEN(zh_CN) IMPORTMAP_ENC(gbcommon) IMPORTMAP_DEC(gb2312) MAPCLOSE() END_CODEC_REGISTRY(iso_2022_jp_2) /* * ex: ts=8 sts=4 et */ |
From: Hye-Shik C. <pe...@us...> - 2003-07-09 19:30:03
|
perky 03/07/09 12:30:02 Modified: . CHANGES setup.py Log: Add ISO-2022-JP-2 codec w/o iso-8859-1 and iso-8859-7 support. Revision Changes Path 1.13 +2 -1 cjkcodecs/CHANGES Index: CHANGES =================================================================== RCS file: /cvsroot/koco/cjkcodecs/CHANGES,v retrieving revision 1.12 retrieving revision 1.13 diff -u -r1.12 -r1.13 --- CHANGES 8 Jul 2003 09:23:20 -0000 1.12 +++ CHANGES 9 Jul 2003 19:30:01 -0000 1.13 @@ -1,6 +1,7 @@ Changes with CJKCodecs 1.0 - *) SHIFT-JISX0213, EUC-JISX0213 and ISO-2022-JP-3 codec is added. + *) SHIFT-JISX0213, EUC-JISX0213, ISO-2022-JP-2 and ISO-2022-JP-3 + codec is added. *) Changed a few characters of a big5 codepoint mapping to cp950's rather than 0xfffd. (documented on NOTES.big5) 1.28 +3 -2 cjkcodecs/setup.py Index: setup.py =================================================================== RCS file: /cvsroot/koco/cjkcodecs/setup.py,v retrieving revision 1.27 retrieving revision 1.28 diff -u -r1.27 -r1.28 --- setup.py 8 Jul 2003 09:23:20 -0000 1.27 +++ setup.py 9 Jul 2003 19:30:01 -0000 1.28 @@ -27,7 +27,7 @@ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # -# $Id: setup.py,v 1.27 2003/07/08 09:23:20 perky Exp $ +# $Id: setup.py,v 1.28 2003/07/09 19:30:01 perky Exp $ # import sys @@ -38,7 +38,8 @@ extensions = [] encodings = { 'ja_JP': ['shift_jis', 'cp932', 'euc_jp', 'iso_2022_jp', 'iso_2022_jp_1', - 'shift_jisx0213', 'euc_jisx0213', 'iso_2022_jp_3'], + 'iso_2022_jp_2', 'iso_2022_jp_3', 'shift_jisx0213', + 'euc_jisx0213'], 'ko_KR': ['euc_kr', 'cp949', 'johab', 'iso_2022_kr'], 'zh_CN': ['gb2312', 'gbk', 'gb18030', 'hz'], 'zh_TW': ['big5', 'cp950'], |
From: Hye-Shik C. <pe...@us...> - 2003-07-09 18:47:49
|
perky 03/07/09 11:47:47 Modified: src _iso_2022_jp.c _iso_2022_jp_1.c _iso_2022_jp_3.c _iso_2022_kr.c iso2022common.h Log: Revamp iso-2022 decoder implementations. Revision Changes Path 1.7 +33 -112 cjkcodecs/src/_iso_2022_jp.c Index: _iso_2022_jp.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/_iso_2022_jp.c,v retrieving revision 1.6 retrieving revision 1.7 diff -u -r1.6 -r1.7 --- _iso_2022_jp.c 7 Jul 2003 08:26:19 -0000 1.6 +++ _iso_2022_jp.c 9 Jul 2003 18:47:47 -0000 1.7 @@ -26,9 +26,12 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: _iso_2022_jp.c,v 1.6 2003/07/07 08:26:19 perky Exp $ + * $Id: _iso_2022_jp.c,v 1.7 2003/07/09 18:47:47 perky Exp $ */ +#define ISO2022_DESIGNATIONS \ + CHARSET_ASCII, CHARSET_JISX0201_R, CHARSET_JISX0208, CHARSET_JISX0208_O + #include "codeccommon.h" #include "iso2022common.h" #include "maps/alg_jisx0201.h" @@ -39,7 +42,7 @@ #define HAVE_ENCODER_INIT ENCODER_INIT(iso_2022_jp) { - state->i = 0; + STATE_CLEARFLAGS(state) STATE_SETG0(state, CHARSET_ASCII) STATE_SETG1(state, CHARSET_ASCII) return 0; @@ -129,7 +132,7 @@ #define HAVE_DECODER_INIT DECODER_INIT(iso_2022_jp) { - state->i = 0; + STATE_CLEARFLAGS(state) STATE_SETG0(state, CHARSET_ASCII) STATE_SETG1(state, CHARSET_ASCII) return 0; @@ -144,118 +147,36 @@ DECODER(iso_2022_jp) { - while (inleft > 0) { - unsigned char c = **inbuf; - - if (STATE_GETFLAG(state, F_ESCTHROUGHOUT)) { - /* ESC throughout mode: for non-iso2022 escape sequences */ - RESERVE_OUTBUF(1) - **outbuf = c; /* assume as ISO-8859-1 */ - NEXT(1, 1) - if (IS_ESCEND(c)) { - STATE_CLEARFLAG(state, F_ESCTHROUGHOUT) - } - continue; - } + ISO2022_LOOP_BEGIN + unsigned char charset, c2; - switch (c) { - case ESC: - RESERVE_INBUF(2) - if (IS_ISO2022ESC((*inbuf)[1])) { - int eslen; - - eslen = iso2022esclen(*inbuf, inleft); - if (eslen < 0) - return eslen == MBERR_INTERNAL ? 1 : eslen; - - if (eslen == 3) { - unsigned char charset; - - if ((*inbuf)[1] == '$') { - if ((*inbuf)[2] == '@' || (*inbuf)[2] == 'B') { - charset = (*inbuf)[2] | CHARSET_DOUBLEBYTE; - STATE_SETG0(state, charset); - } else - return 3; - } else { - if ((*inbuf)[2] == 'B' || (*inbuf)[2] == 'J') - charset = (*inbuf)[2]; - else - return 3; - - if ((*inbuf)[1] == '(') { - STATE_SETG0(state, charset) - } else if ((*inbuf)[1] == ')') { - STATE_SETG1(state, charset) - } else - return 3; - } - } else - return eslen; - NEXT_IN(eslen) - } else { - STATE_SETFLAG(state, F_ESCTHROUGHOUT) - **outbuf = ESC; - NEXT(1, 1) - } - break; - case SI: - STATE_CLEARFLAG(state, F_SHIFTED) - NEXT_IN(1) - break; - case SO: - STATE_SETFLAG(state, F_SHIFTED) - NEXT_IN(1) - break; - case '\n': - STATE_CLEARFLAG(state, F_SHIFTED) - /* FALLTHROUGH */ - case SP: /* FALLTHROUGH */ - case DEL: - RESERVE_OUTBUF(1) - **outbuf = c; - NEXT(1, 1) - break; - default: - if ((c & 0x7f) < 0x20) { /* C0 and C1 */ - RESERVE_OUTBUF(1) - **outbuf = c & 0x7f; - NEXT(1, 1) - } else { - unsigned char charset; + ISO2022_GETCHARSET(charset, c, c2) - if (!STATE_GETFLAG(state, F_SHIFTED) && c < 0x80) /* G0 */ - charset = STATE_GETG0(state); - else /* G1 */ - charset = STATE_GETG1(state); - - if (charset & CHARSET_DOUBLEBYTE) { - /* all double byte character sets are in JIS X 0208 here. - * this means that we don't distinguish :1978 from :1983. */ - RESERVE_INBUF(2) - RESERVE_OUTBUF(1) - TRYMAP_DEC(jisx0208, **outbuf, c & 0x7f, - (*inbuf)[1] & 0x7f) { - NEXT(2, 1) - } else - return 2; - } else if (charset == CHARSET_ASCII) { - RESERVE_OUTBUF(1) - **outbuf = c & 0x7f; - NEXT(1, 1) - } else if (charset == CHARSET_JISX0201_R) { - RESERVE_OUTBUF(1) - JISX0201_R_DECODE(c & 0x7f, **outbuf) - else - return 1; - NEXT(1, 1) - } else - return MBERR_INTERNAL; - } - } - } + if (charset & CHARSET_DOUBLEBYTE) { + /* all double byte character sets are in JIS X 0208 here. + * this means that we don't distinguish :1978 from :1983. */ + RESERVE_INBUF(2) + RESERVE_OUTBUF(1) + c2 &= IN2; + TRYMAP_DEC(jisx0208, **outbuf, c, c2) { + NEXT(2, 1) + } else + return 2; + } else if (charset == CHARSET_ASCII) { + RESERVE_OUTBUF(1) + OUT1(c) + NEXT(1, 1) + } else if (charset == CHARSET_JISX0201_R) { + RESERVE_OUTBUF(1) + JISX0201_R_DECODE(c, **outbuf) + else + return 1; + NEXT(1, 1) + } else + return MBERR_INTERNAL; + ISO2022_LOOP_END - return 0; + return 0; } #include "codecentry.h" 1.6 +37 -127 cjkcodecs/src/_iso_2022_jp_1.c Index: _iso_2022_jp_1.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/_iso_2022_jp_1.c,v retrieving revision 1.5 retrieving revision 1.6 diff -u -r1.5 -r1.6 --- _iso_2022_jp_1.c 7 Jul 2003 08:26:19 -0000 1.5 +++ _iso_2022_jp_1.c 9 Jul 2003 18:47:47 -0000 1.6 @@ -26,9 +26,13 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: _iso_2022_jp_1.c,v 1.5 2003/07/07 08:26:19 perky Exp $ + * $Id: _iso_2022_jp_1.c,v 1.6 2003/07/09 18:47:47 perky Exp $ */ +#define ISO2022_DESIGNATIONS \ + CHARSET_ASCII, CHARSET_JISX0201_R, CHARSET_JISX0208, \ + CHARSET_JISX0208_O, CHARSET_JISX0212 + #include "codeccommon.h" #include "iso2022common.h" #include "maps/alg_jisx0201.h" @@ -40,7 +44,7 @@ #define HAVE_ENCODER_INIT ENCODER_INIT(iso_2022_jp_1) { - state->i = 0; + STATE_CLEARFLAGS(state) STATE_SETG0(state, CHARSET_ASCII) STATE_SETG1(state, CHARSET_ASCII) return 0; @@ -137,7 +141,7 @@ #define HAVE_DECODER_INIT DECODER_INIT(iso_2022_jp_1) { - state->i = 0; + STATE_CLEARFLAGS(state) STATE_SETG0(state, CHARSET_ASCII) STATE_SETG1(state, CHARSET_ASCII) return 0; @@ -152,133 +156,39 @@ DECODER(iso_2022_jp_1) { - while (inleft > 0) { - unsigned char c = **inbuf; + ISO2022_LOOP_BEGIN + unsigned char charset, c2; - if (STATE_GETFLAG(state, F_ESCTHROUGHOUT)) { - /* ESC throughout mode: for non-iso2022 escape sequences */ - RESERVE_OUTBUF(1) - **outbuf = c; /* assume as ISO-8859-1 */ - NEXT(1, 1) - if (IS_ESCEND(c)) { - STATE_CLEARFLAG(state, F_ESCTHROUGHOUT) - } - continue; - } + ISO2022_GETCHARSET(charset, c, c2) - switch (c) { - case ESC: - RESERVE_INBUF(2) - if (IS_ISO2022ESC((*inbuf)[1])) { - int eslen; - - eslen = iso2022esclen(*inbuf, inleft); - if (eslen < 0) - return eslen == MBERR_INTERNAL ? 1 : eslen; - - if (eslen == 3) { - unsigned char charset; - - if ((*inbuf)[1] == '$') { - if ((*inbuf)[2] == '@' || (*inbuf)[2] == 'B') { - charset = (*inbuf)[2] | CHARSET_DOUBLEBYTE; - STATE_SETG0(state, charset); - } else - return 3; - } else { - if ((*inbuf)[2] == 'B' || (*inbuf)[2] == 'J') - charset = (*inbuf)[2]; - else - return 3; - - if ((*inbuf)[1] == '(') { - STATE_SETG0(state, charset) - } else if ((*inbuf)[1] == ')') { - STATE_SETG1(state, charset) - } else - return 3; - } - } else if (eslen == 4) { - if ((*inbuf)[1] == '$' && (*inbuf)[3] == 'D') { - if ((*inbuf)[2] == '(') { - STATE_SETG0(state, CHARSET_JISX0212) - } else if ((*inbuf)[2] == ')') { - STATE_SETG1(state, CHARSET_JISX0212) - } else - return 4; - } else - return 4; - } else - return eslen; - NEXT_IN(eslen) - } else { - STATE_SETFLAG(state, F_ESCTHROUGHOUT) - **outbuf = ESC; - NEXT(1, 1) - } - break; - case SI: - STATE_CLEARFLAG(state, F_SHIFTED) - NEXT_IN(1) - break; - case SO: - STATE_SETFLAG(state, F_SHIFTED) - NEXT_IN(1) - break; - case '\n': - STATE_CLEARFLAG(state, F_SHIFTED) - /* FALLTHROUGH */ - case SP: /* FALLTHROUGH */ - case DEL: - RESERVE_OUTBUF(1) - **outbuf = c; - NEXT(1, 1) - break; - default: - if ((c & 0x7f) < 0x20) { /* C0 and C1 */ - RESERVE_OUTBUF(1) - **outbuf = c & 0x7f; - NEXT(1, 1) - } else { - unsigned char charset; - - if (!STATE_GETFLAG(state, F_SHIFTED) && c < 0x80) /* G0 */ - charset = STATE_GETG0(state); - else /* G1 */ - charset = STATE_GETG1(state); - - if (charset & CHARSET_DOUBLEBYTE) { - RESERVE_INBUF(2) - RESERVE_OUTBUF(1) - if (charset == CHARSET_JISX0208 || - charset == CHARSET_JISX0208_O) { - TRYMAP_DEC(jisx0208, **outbuf, c & 0x7f, - (*inbuf)[1] & 0x7f); - else return 2; - } else if (charset == CHARSET_JISX0212) { - TRYMAP_DEC(jisx0212, **outbuf, c & 0x7f, - (*inbuf)[1] & 0x7f); - else return 2; - } else - return MBERR_INTERNAL; - NEXT(2, 1) - } else if (charset == CHARSET_ASCII) { - RESERVE_OUTBUF(1) - **outbuf = c & 0x7f; - NEXT(1, 1) - } else if (charset == CHARSET_JISX0201_R) { - RESERVE_OUTBUF(1) - JISX0201_R_DECODE(c & 0x7f, **outbuf) - else - return 1; - NEXT(1, 1) - } else - return MBERR_INTERNAL; - } - } - } + if (charset & CHARSET_DOUBLEBYTE) { + RESERVE_INBUF(2) + RESERVE_OUTBUF(1) + c2 &= IN2; + if (charset == CHARSET_JISX0208 || charset == CHARSET_JISX0208_O) { + TRYMAP_DEC(jisx0208, **outbuf, c, c2); + else return 2; + } else if (charset == CHARSET_JISX0212) { + TRYMAP_DEC(jisx0212, **outbuf, c, c2); + else return 2; + } else + return MBERR_INTERNAL; + NEXT(2, 1) + } else if (charset == CHARSET_ASCII) { + RESERVE_OUTBUF(1) + OUT1(c) + NEXT(1, 1) + } else if (charset == CHARSET_JISX0201_R) { + RESERVE_OUTBUF(1) + JISX0201_R_DECODE(c, **outbuf) + else + return 1; + NEXT(1, 1) + } else + return MBERR_INTERNAL; + ISO2022_LOOP_END - return 0; + return 0; } #include "codecentry.h" 1.5 +39 -132 cjkcodecs/src/_iso_2022_jp_3.c Index: _iso_2022_jp_3.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/_iso_2022_jp_3.c,v retrieving revision 1.4 retrieving revision 1.5 diff -u -r1.4 -r1.5 --- _iso_2022_jp_3.c 8 Jul 2003 08:47:02 -0000 1.4 +++ _iso_2022_jp_3.c 9 Jul 2003 18:47:47 -0000 1.5 @@ -26,10 +26,12 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: _iso_2022_jp_3.c,v 1.4 2003/07/08 08:47:02 perky Exp $ + * $Id: _iso_2022_jp_3.c,v 1.5 2003/07/09 18:47:47 perky Exp $ */ #define USING_BINARY_PAIR_SEARCH +#define ISO2022_DESIGNATIONS \ + CHARSET_ASCII, CHARSET_JISX0208, CHARSET_JISX0213_1, CHARSET_JISX0213_2 #include "codeccommon.h" #include "iso2022common.h" #include "maps/map_jisx0213_pairs.h" @@ -49,7 +51,7 @@ #define HAVE_ENCODER_INIT ENCODER_INIT(iso_2022_jp_3) { - state->i = 0; + STATE_CLEARFLAGS(state) STATE_SETG0(state, CHARSET_ASCII) STATE_SETG1(state, CHARSET_ASCII) return 0; @@ -163,7 +165,7 @@ #define HAVE_DECODER_INIT DECODER_INIT(iso_2022_jp_3) { - state->i = 0; + STATE_CLEARFLAGS(state) STATE_SETG0(state, CHARSET_ASCII) STATE_SETG1(state, CHARSET_ASCII) return 0; @@ -178,141 +180,46 @@ DECODER(iso_2022_jp_3) { - while (inleft > 0) { - unsigned char c = IN1; + ISO2022_LOOP_BEGIN + unsigned char charset, c2; + ucs4_t code; - if (STATE_GETFLAG(state, F_ESCTHROUGHOUT)) { - /* ESC throughout mode: for non-iso2022 escape sequences */ - RESERVE_OUTBUF(1) - OUT1(c) /* assume as ISO-8859-1 */ - NEXT(1, 1) - if (IS_ESCEND(c)) { - STATE_CLEARFLAG(state, F_ESCTHROUGHOUT) - } - continue; - } + ISO2022_GETCHARSET(charset, c, c2) - switch (c) { - case ESC: + if (charset & CHARSET_DOUBLEBYTE) { RESERVE_INBUF(2) - if (IS_ISO2022ESC(IN2)) { - int eslen; - unsigned char charset; - - eslen = iso2022esclen(*inbuf, inleft); - if (eslen < 0) - return eslen == MBERR_INTERNAL ? 1 : eslen; - - if (eslen == 3) { - if (IN2 == '$') { - if (IN3 == 'B') { - charset = IN3 | CHARSET_DOUBLEBYTE; - STATE_SETG0(state, charset); - } else - return 3; - } else { - if (IN3 == 'B') - charset = IN3; - else - return 3; - - if (IN2 == '(') { - STATE_SETG0(state, charset) - } else if (IN2 == ')') { - STATE_SETG1(state, charset) - } else - return 3; - } - } else if (eslen == 4) { - if (IN2 == '$' && (IN4 == 'O' || IN4 == 'P')) { - charset = IN4 | CHARSET_DOUBLEBYTE; - if (IN3 == '(') { - STATE_SETG0(state, charset) - } else if (IN3 == ')') { - STATE_SETG1(state, charset) - } else - return 4; - } else - return 4; - } else - return eslen; - NEXT_IN(eslen) - } else { - STATE_SETFLAG(state, F_ESCTHROUGHOUT) - OUT1(ESC) - NEXT(1, 1) - } - break; - case SI: - STATE_CLEARFLAG(state, F_SHIFTED) - NEXT_IN(1) - break; - case SO: - STATE_SETFLAG(state, F_SHIFTED) - NEXT_IN(1) - break; - case '\n': - STATE_CLEARFLAG(state, F_SHIFTED) - /* FALLTHROUGH */ - case SP: /* FALLTHROUGH */ - case DEL: + RESERVE_OUTBUF(1) + c2 &= IN2; + if (charset == CHARSET_JISX0213_1) { + if (c == 0x21 && c2 == 0x40) **outbuf = 0xff3c; + else TRYMAP_DEC(jisx0208, **outbuf, c, c2); + else TRYMAP_DEC(jisx0213_1_bmp, **outbuf, c, c2); + else TRYMAP_DEC(jisx0213_1_emp, code, c, c2) { + PUTUCS4(EMPBASE | code) + NEXT_IN(2) + continue; + } else TRYMAP_DEC(jisx0213_pair, code, c, c2) { + WRITE2(code >> 16, code & 0xffff) + NEXT(2, 2) + continue; + } else return 2; + } else if (charset == CHARSET_JISX0213_2) { + TRYMAP_DEC(jisx0213_2_bmp, **outbuf, c, c2); + else TRYMAP_DEC(jisx0213_2_emp, code, c, c2) { + PUTUCS4(EMPBASE | code) + NEXT_IN(2) + continue; + } else return 2; + } else + return MBERR_INTERNAL; + NEXT(2, 1) + } else if (charset == CHARSET_ASCII) { RESERVE_OUTBUF(1) OUT1(c) NEXT(1, 1) - break; - default: - if ((c & 0x7f) < 0x20) { /* C0 and C1 */ - RESERVE_OUTBUF(1) - OUT1(c & 0x7f) - NEXT(1, 1) - } else { - unsigned char charset, c2; - ucs4_t code; - - if (!STATE_GETFLAG(state, F_SHIFTED) && c < 0x80) { /* G0 */ - charset = STATE_GETG0(state); - c2 = IN2; - } else { /* G1 */ - charset = STATE_GETG1(state); - c &= 0x7f; - c2 = IN2 & 0x7f; - } - - if (charset & CHARSET_DOUBLEBYTE) { - RESERVE_INBUF(2) - RESERVE_OUTBUF(1) - if (charset == CHARSET_JISX0213_1) { - if (c == 0x21 && c2 == 0x40) **outbuf = 0xff3c; - else TRYMAP_DEC(jisx0208, **outbuf, c, c2); - else TRYMAP_DEC(jisx0213_1_bmp, **outbuf, c, c2); - else TRYMAP_DEC(jisx0213_1_emp, code, c, c2) { - PUTUCS4(EMPBASE | code) - NEXT_IN(2) - continue; - } else TRYMAP_DEC(jisx0213_pair, code, c, c2) { - WRITE2(code >> 16, code & 0xffff) - NEXT(2, 2) - continue; - } else return 2; - } else if (charset == CHARSET_JISX0213_2) { - TRYMAP_DEC(jisx0213_2_bmp, **outbuf, c, c2); - else TRYMAP_DEC(jisx0213_2_emp, code, c, c2) { - PUTUCS4(EMPBASE | code) - NEXT_IN(2) - continue; - } else return 2; - } else - return MBERR_INTERNAL; - NEXT(2, 1) - } else if (charset == CHARSET_ASCII) { - RESERVE_OUTBUF(1) - OUT1(c) - NEXT(1, 1) - } else - return MBERR_INTERNAL; - } - } - } + } else + return MBERR_INTERNAL; + ISO2022_LOOP_END return 0; } 1.8 +25 -103 cjkcodecs/src/_iso_2022_kr.c Index: _iso_2022_kr.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/_iso_2022_kr.c,v retrieving revision 1.7 retrieving revision 1.8 diff -u -r1.7 -r1.8 --- _iso_2022_kr.c 7 Jul 2003 08:26:19 -0000 1.7 +++ _iso_2022_kr.c 9 Jul 2003 18:47:47 -0000 1.8 @@ -26,9 +26,12 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: _iso_2022_kr.c,v 1.7 2003/07/07 08:26:19 perky Exp $ + * $Id: _iso_2022_kr.c,v 1.8 2003/07/09 18:47:47 perky Exp $ */ +#define ISO2022_DESIGNATIONS \ + CHARSET_ASCII, CHARSET_KSX1001 + #include "codeccommon.h" #include "iso2022common.h" @@ -38,7 +41,7 @@ #define HAVE_ENCODER_INIT ENCODER_INIT(iso_2022_kr) { - state->i = 0; + STATE_CLEARFLAGS(state) STATE_SETG0(state, CHARSET_ASCII) STATE_SETG1(state, CHARSET_ASCII) return 0; @@ -49,7 +52,7 @@ { if (STATE_GETFLAG(state, F_SHIFTED)) { RESERVE_OUTBUF(1) - **outbuf = SI; + OUT1(SI) NEXT_OUT(1) STATE_CLEARFLAG(state, F_SHIFTED) } @@ -103,7 +106,7 @@ #define HAVE_DECODER_INIT DECODER_INIT(iso_2022_kr) { - state->i = 0; + STATE_CLEARFLAGS(state) STATE_SETG0(state, CHARSET_ASCII) STATE_SETG1(state, CHARSET_ASCII) return 0; @@ -118,108 +121,27 @@ DECODER(iso_2022_kr) { - while (inleft > 0) { - unsigned char c = **inbuf; - - if (STATE_GETFLAG(state, F_ESCTHROUGHOUT)) { - /* ESC throughout mode: for non-iso2022 escape sequences */ - RESERVE_OUTBUF(1) - **outbuf = c; /* assume as ISO-8859-1 */ - NEXT(1, 1) - if (IS_ESCEND(c)) { - STATE_CLEARFLAG(state, F_ESCTHROUGHOUT) - } - continue; - } + ISO2022_LOOP_BEGIN + unsigned char charset, c2; - switch (c) { - case ESC: - RESERVE_INBUF(2) - if (IS_ISO2022ESC((*inbuf)[1])) { - int eslen; - - eslen = iso2022esclen(*inbuf, inleft); - if (eslen < 0) - return eslen == MBERR_INTERNAL ? 1 : eslen; - - if (eslen == 3) { - if ((*inbuf)[2] == 'B') { /* ASCII */ - if ((*inbuf)[1] == '(') { - STATE_SETG0(state, CHARSET_ASCII) - } else if ((*inbuf)[1] == ')') { - STATE_SETG1(state, CHARSET_ASCII) - } else - return 3; - } else - return 3; - } else if (eslen == 4) { - if ((*inbuf)[1] == '$' && (*inbuf)[3] == 'C') { - /* KS X 1001 */ - if ((*inbuf)[2] == '(') { - STATE_SETG0(state, CHARSET_KSX1001) - } else if ((*inbuf)[2] == ')') { - STATE_SETG1(state, CHARSET_KSX1001) - } else - return 4; - } else - return 4; - } else - return eslen; - NEXT_IN(eslen) - } else { - STATE_SETFLAG(state, F_ESCTHROUGHOUT) - **outbuf = ESC; - NEXT(1, 1) - } - break; - case SI: - STATE_CLEARFLAG(state, F_SHIFTED) - NEXT_IN(1) - break; - case SO: - STATE_SETFLAG(state, F_SHIFTED) - NEXT_IN(1) - break; - case '\n': - STATE_CLEARFLAG(state, F_SHIFTED) - /* FALLTHROUGH */ - case SP: /* FALLTHROUGH */ - case DEL: - RESERVE_OUTBUF(1) - **outbuf = c; - NEXT(1, 1) - break; - default: - if ((c & 0x7f) < 0x20) { /* C0 and C1 */ - RESERVE_OUTBUF(1) - **outbuf = c & 0x7f; - NEXT(1, 1) - } else { - unsigned char charset; + ISO2022_GETCHARSET(charset, c, c2) - if (!STATE_GETFLAG(state, F_SHIFTED) && c < 0x80) /* G0 */ - charset = STATE_GETG0(state); - else /* G1 */ - charset = STATE_GETG1(state); - - if (charset & CHARSET_DOUBLEBYTE) { - /* all double byte character sets are in KS X 1001 here */ - RESERVE_INBUF(2) - RESERVE_OUTBUF(1) - TRYMAP_DEC(ksx1001, **outbuf, c & 0x7f, (*inbuf)[1] & 0x7f){ - NEXT(2, 1) - } else - return 2; - } else { - RESERVE_OUTBUF(1) - **outbuf = c & 0x7f; - NEXT(1, 1) - } - } - } + if (charset & CHARSET_DOUBLEBYTE) { + /* all double byte character sets are in KS X 1001 here */ + RESERVE_INBUF(2) + RESERVE_OUTBUF(1) + c2 &= IN2; + TRYMAP_DEC(ksx1001, **outbuf, c, c2) { + NEXT(2, 1) + } else + return 2; + } else { + RESERVE_OUTBUF(1) + OUT1(c); + NEXT(1, 1) } - - return 0; + ISO2022_LOOP_END + return 0; } #include "codecentry.h" 1.3 +134 -17 cjkcodecs/src/iso2022common.h Index: iso2022common.h =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/iso2022common.h,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- iso2022common.h 6 Jul 2003 17:32:25 -0000 1.2 +++ iso2022common.h 9 Jul 2003 18:47:47 -0000 1.3 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: iso2022common.h,v 1.2 2003/07/06 17:32:25 perky Exp $ + * $Id: iso2022common.h,v 1.3 2003/07/09 18:47:47 perky Exp $ */ /* This ISO-2022 implementation is intended to comply ECMA-43 Level 1 @@ -85,35 +85,152 @@ #define F_SHIFTED 0x01 #define F_ESCTHROUGHOUT 0x02 -#define STATE_SETG0(s, v) ((s)->c[0]) = (v); -#define STATE_GETG0(s) ((s)->c[0]) +#define STATE_SETG(dn, s, v) ((s)->c[dn]) = (v); +#define STATE_GETG(dn, s) ((s)->c[dn]) -#define STATE_SETG1(s, v) ((s)->c[1]) = (v); -#define STATE_GETG1(s) ((s)->c[1]) - -#define STATE_SETG2(s, v) ((s)->c[2]) = (v); -#define STATE_GETG2(s) ((s)->c[2]) - -#define STATE_SETG3(s, v) ((s)->c[3]) = (v); -#define STATE_GETG3(s) ((s)->c[3]) +#define STATE_SETG0(s, v) STATE_SETG(0, s, v) +#define STATE_GETG0(s) STATE_GETG(0, s) +#define STATE_SETG1(s, v) STATE_SETG(1, s, v) +#define STATE_GETG1(s) STATE_GETG(1, s) +#define STATE_SETG2(s, v) STATE_SETG(2, s, v) +#define STATE_GETG2(s) STATE_GETG(2, s) +#define STATE_SETG3(s, v) STATE_SETG(3, s, v) +#define STATE_GETG3(s) STATE_GETG(3, s) #define STATE_SETFLAG(s, f) ((s)->c[4]) |= (f); #define STATE_GETFLAG(s, f) ((s)->c[4] & (f)) #define STATE_CLEARFLAG(s, f) ((s)->c[4]) &= ~(f); +#define STATE_CLEARFLAGS(s) ((s)->c[4]) = 0; + +#define ISO2022_GETCHARSET(charset, c1, c2mask) \ + if (STATE_GETFLAG(state, F_SHIFTED) || (c) >= 0x80) { /* G1 */ \ + (charset) = STATE_GETG1(state); \ + (c) &= 0x7f; (c2mask) = 0x7f; \ + } else { /* G1 */ \ + (charset) = STATE_GETG0(state); \ + (c2mask) = 0xff; \ + } + +#define ISO2022_BASECASES(c1) \ + case ESC: \ + RESERVE_INBUF(2) \ + if (IS_ISO2022ESC(IN2)) { \ + int err; \ + err = iso2022processesc(state, inbuf, &inleft); \ + if (err != 0) \ + return err; \ + } else { \ + STATE_SETFLAG(state, F_ESCTHROUGHOUT) \ + OUT1(ESC) \ + NEXT(1, 1) \ + } \ + break; \ + case SI: \ + STATE_CLEARFLAG(state, F_SHIFTED) \ + NEXT_IN(1) \ + break; \ + case SO: \ + STATE_SETFLAG(state, F_SHIFTED) \ + NEXT_IN(1) \ + break; \ + case '\n': \ + STATE_CLEARFLAG(state, F_SHIFTED) \ + /* FALLTHROUGH */ \ + case SP: /* FALLTHROUGH */ \ + case DEL: \ + RESERVE_OUTBUF(1) \ + OUT1(c1) \ + NEXT(1, 1) \ + break; + +#define ISO2022_ESCTHROUGHOUT(c) \ + if (STATE_GETFLAG(state, F_ESCTHROUGHOUT)) { \ + /* ESC throughout mode: for non-iso2022 escape sequences */ \ + RESERVE_OUTBUF(1) \ + OUT1(c) /* assume as ISO-8859-1 */ \ + NEXT(1, 1) \ + if (IS_ESCEND(c)) { \ + STATE_CLEARFLAG(state, F_ESCTHROUGHOUT) \ + } \ + continue; \ + } + +#define ISO2022_LOOP_BEGIN \ + while (inleft > 0) { \ + unsigned char c = IN1; \ + ISO2022_ESCTHROUGHOUT(c) \ + switch(c) { \ + ISO2022_BASECASES(c) \ + default: \ + if ((c & 0x7f) < 0x20) { /* C0 and C1 */ \ + RESERVE_OUTBUF(1) \ + OUT1(c & 0x7f) \ + NEXT(1, 1) \ + } else { +#define ISO2022_LOOP_END \ + } \ + } \ + } static int -iso2022esclen(const unsigned char *s, size_t len) +iso2022processesc(MultibyteCodec_State *state, + const unsigned char **inbuf, size_t *inleft) { - int i; + unsigned char charset, designation; + int i, esclen; for (i = 1;i < MAX_ESCSEQLEN;i++) { - if (i >= len) + if (i >= *inleft) return MBERR_TOOFEW; - if (IS_ESCEND(s[i])) - return i + 1; + if (IS_ESCEND((*inbuf)[i])) { + esclen = i + 1; + break; + } + } + + if (i >= MAX_ESCSEQLEN) + return 1; /* unterminated escape sequence */ + + switch (esclen) { + case 3: + if (IN2 == '$') { + charset = IN3 | CHARSET_DOUBLEBYTE; + designation = 0; + } else { + charset = IN3; + if (IN2 == '(') designation = 0; + else if (IN2 == ')') designation = 1; + else return 3; + } + break; + case 4: + if (IN2 != '$') + return 4; + + charset = IN4 | CHARSET_DOUBLEBYTE; + if (IN3 == '(') designation = 0; + else if (IN3 == ')') designation = 1; + else return 4; + break; + default: + return esclen; + } + + { /* raise error when the charset is not designated for this encoding */ + const unsigned char dsgs[] = {ISO2022_DESIGNATIONS, '\x00'}; + + for (i = 0; dsgs[i] != '\x00'; i++) + if (dsgs[i] == charset) + break; + + if (dsgs[i] == '\x00') + return esclen; } - return MBERR_INTERNAL; /* unterminated escape sequence */ + STATE_SETG(designation, state, charset) + *inleft -= esclen; + (*inbuf) += esclen; + return 0; } /* |
From: Hye-Shik C. <pe...@us...> - 2003-07-08 17:14:27
|
perky 03/07/08 10:14:25 Modified: src _shift_jisx0213.c Log: Fix Shift-JISX0213 decoder to the real one. (was Shift-JIS's :) Revision Changes Path 1.2 +35 -24 cjkcodecs/src/_shift_jisx0213.c Index: _shift_jisx0213.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/_shift_jisx0213.c,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- _shift_jisx0213.c 8 Jul 2003 09:23:20 -0000 1.1 +++ _shift_jisx0213.c 8 Jul 2003 17:14:25 -0000 1.2 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: _shift_jisx0213.c,v 1.1 2003/07/08 09:23:20 perky Exp $ + * $Id: _shift_jisx0213.c,v 1.2 2003/07/08 17:14:25 perky Exp $ */ #define USING_BINARY_PAIR_SEARCH @@ -109,7 +109,7 @@ } c1 = code >> 8; - c2 = code & 0xff - 0x21; + c2 = (code & 0xff) - 0x21; if (c1 & 0x80) { /* Plane 2 */ if (c1 >= 0xee) c1 -= 0x87; @@ -132,40 +132,51 @@ DECODER(shift_jisx0213) { while (inleft > 0) { - unsigned char c = **inbuf; + unsigned char c = IN1; RESERVE_OUTBUF(1) JISX0201_DECODE(c, **outbuf) - else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)) { - unsigned char c1, c2; + else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xfc)) { + unsigned char c1, c2 = IN2; + ucs4_t code; RESERVE_INBUF(2) - c2 = (*inbuf)[1]; if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc) return 2; c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1); c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41); - c1 = (2 * c1 + (c2 < 0x5e ? 0 : 1) + 0x21); + c1 = (2 * c1 + (c2 < 0x5e ? 0 : 1)); c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21; - TRYMAP_DEC(jisx0208, **outbuf, c1, c2) { - NEXT(2, 1) - continue; - } else - return 2; - } else if (c >= 0xf0 && c <= 0xf9) { - unsigned char c2; - - RESERVE_INBUF(2) - c2 = (*inbuf)[1]; - if ((c2 >= 0x40 && c2 <= 0x7e) || (c2 >= 0x80 && c2 <= 0xfc)) { - **outbuf = 0xe000 + 188 * (c - 0xf0) + - (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41); - NEXT(2, 1) - continue; - } else - return 2; + if (c1 < 0x5e) { /* Plane 1 */ + c1 += 0x21; + TRYMAP_DEC(jisx0208, **outbuf, c1, c2) { + NEXT_OUT(1) + } else TRYMAP_DEC(jisx0213_1_bmp, **outbuf, c1, c2) { + NEXT_OUT(1) + } else TRYMAP_DEC(jisx0213_1_emp, code, c1, c2) { + PUTUCS4(EMPBASE | code) + } else TRYMAP_DEC(jisx0213_pair, code, c1, c2) { + WRITE2(code >> 16, code & 0xffff) + NEXT_OUT(2) + } else + return 2; + NEXT_IN(2) + } else { /* Plane 2 */ + if (c1 >= 0x67) c1 += 0x07; + else if (c1 >= 0x63 || c1 == 0x5f) c1 -= 0x37; + else c1 -= 0x3d; + + TRYMAP_DEC(jisx0213_2_bmp, **outbuf, c1, c2) { + NEXT_OUT(1) + } else TRYMAP_DEC(jisx0213_2_emp, code, c1, c2) { + PUTUCS4(EMPBASE | code) + } else + return 2; + NEXT_IN(2) + } + continue; } else return 2; |
From: Hye-Shik C. <pe...@us...> - 2003-07-08 09:23:22
|
perky 03/07/08 02:23:20 Modified: . CHANGES setup.py Log: Add Shift-JISX0213 codec. Revision Changes Path 1.12 +1 -1 cjkcodecs/CHANGES Index: CHANGES =================================================================== RCS file: /cvsroot/koco/cjkcodecs/CHANGES,v retrieving revision 1.11 retrieving revision 1.12 diff -u -r1.11 -r1.12 --- CHANGES 8 Jul 2003 07:02:03 -0000 1.11 +++ CHANGES 8 Jul 2003 09:23:20 -0000 1.12 @@ -1,6 +1,6 @@ Changes with CJKCodecs 1.0 - *) EUC-JISX0213 and ISO-2022-JP-3 codec is added. + *) SHIFT-JISX0213, EUC-JISX0213 and ISO-2022-JP-3 codec is added. *) Changed a few characters of a big5 codepoint mapping to cp950's rather than 0xfffd. (documented on NOTES.big5) 1.27 +2 -2 cjkcodecs/setup.py Index: setup.py =================================================================== RCS file: /cvsroot/koco/cjkcodecs/setup.py,v retrieving revision 1.26 retrieving revision 1.27 diff -u -r1.26 -r1.27 --- setup.py 8 Jul 2003 07:02:03 -0000 1.26 +++ setup.py 8 Jul 2003 09:23:20 -0000 1.27 @@ -27,7 +27,7 @@ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # -# $Id: setup.py,v 1.26 2003/07/08 07:02:03 perky Exp $ +# $Id: setup.py,v 1.27 2003/07/08 09:23:20 perky Exp $ # import sys @@ -38,7 +38,7 @@ extensions = [] encodings = { 'ja_JP': ['shift_jis', 'cp932', 'euc_jp', 'iso_2022_jp', 'iso_2022_jp_1', - 'euc_jisx0213', 'iso_2022_jp_3'], + 'shift_jisx0213', 'euc_jisx0213', 'iso_2022_jp_3'], 'ko_KR': ['euc_kr', 'cp949', 'johab', 'iso_2022_kr'], 'zh_CN': ['gb2312', 'gbk', 'gb18030', 'hz'], 'zh_TW': ['big5', 'cp950'], |
From: Hye-Shik C. <pe...@us...> - 2003-07-08 09:23:22
|
perky 03/07/08 02:23:20 Added: src _shift_jisx0213.c Log: Add Shift-JISX0213 codec. Revision Changes Path 1.1 cjkcodecs/src/_shift_jisx0213.c Index: _shift_jisx0213.c =================================================================== /* * _shift_jisx0213.c: the SHIFT-JISX0213 codec * * Copyright (C) 2003 Hye-Shik Chang <pe...@Fr...>. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * $Id: _shift_jisx0213.c,v 1.1 2003/07/08 09:23:20 perky Exp $ */ #define USING_BINARY_PAIR_SEARCH #include "codeccommon.h" #include "maps/alg_jisx0201.h" #include "maps/map_jisx0213_pairs.h" ENCMAP(jisxcommon) DECMAP(jisx0208) ENCMAP(jisx0213_bmp) DECMAP(jisx0213_1_bmp) DECMAP(jisx0213_2_bmp) ENCMAP(jisx0213_emp) DECMAP(jisx0213_1_emp) DECMAP(jisx0213_2_emp) #define EMPBASE 0x20000 ENCODER(shift_jisx0213) { while (inleft > 0) { ucs4_t c = **inbuf; DBCHAR code = NOCHAR; int c1, c2; size_t insize = 1; JISX0201_ENCODE(c, code) #if Py_UNICODE_SIZE == 2 else if (c >> 10 == 0xd800 >> 10) { /* high surrogate */ RESERVE_INBUF(2) if ((*inbuf)[1] >> 10 == 0xdc00 >> 10) { /* low surrogate */ c = 0x10000 + ((c - 0xd800) << 10) + ((ucs4_t)((*inbuf)[1]) - 0xdc00); insize = 2; } } #endif if (code < 0x80 || (code >= 0xa1 && code <= 0xdf)) { WRITE1(code) NEXT(1, 1) continue; } RESERVE_OUTBUF(2) if (code == NOCHAR) { if (c <= 0xffff) { TRYMAP_ENC(jisx0213_bmp, code, c) { if (code == MULTIC) { if (inleft < 2) { if (flags & MBENC_FLUSH) { code = find_pairencmap(c, 0, jisx0213_pairencmap, JISX0213_ENCPAIRS); if (code == DBCINV) return 1; } else return MBERR_TOOFEW; } else { code = find_pairencmap(c, IN2, jisx0213_pairencmap, JISX0213_ENCPAIRS); if (code == DBCINV) { code = find_pairencmap(c, 0, jisx0213_pairencmap, JISX0213_ENCPAIRS); if (code == DBCINV) return 1; } else insize = 2; } } } else TRYMAP_ENC(jisxcommon, code, c) { if (code & 0x8000) return 1; /* abandon JIS X 0212 codes */ } else return 1; } else if (c >> 16 == EMPBASE >> 16) { TRYMAP_ENC(jisx0213_emp, code, c & 0xffff); else return insize; } else return insize; } c1 = code >> 8; c2 = code & 0xff - 0x21; if (c1 & 0x80) { /* Plane 2 */ if (c1 >= 0xee) c1 -= 0x87; else if (c1 >= 0xac || c1 == 0xa8) c1 -= 0x49; else c1 -= 0x43; } else /* Plane 1 */ c1 -= 0x21; if (c1 & 1) c2 += 0x5e; c1 >>= 1; OUT1(c1 + (c1 < 0x1f ? 0x81 : 0xc1)) OUT2(c2 + (c2 < 0x3f ? 0x40 : 0x41)) NEXT(insize, 2) } return 0; } DECODER(shift_jisx0213) { while (inleft > 0) { unsigned char c = **inbuf; RESERVE_OUTBUF(1) JISX0201_DECODE(c, **outbuf) else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)) { unsigned char c1, c2; RESERVE_INBUF(2) c2 = (*inbuf)[1]; if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc) return 2; c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1); c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41); c1 = (2 * c1 + (c2 < 0x5e ? 0 : 1) + 0x21); c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21; TRYMAP_DEC(jisx0208, **outbuf, c1, c2) { NEXT(2, 1) continue; } else return 2; } else if (c >= 0xf0 && c <= 0xf9) { unsigned char c2; RESERVE_INBUF(2) c2 = (*inbuf)[1]; if ((c2 >= 0x40 && c2 <= 0x7e) || (c2 >= 0x80 && c2 <= 0xfc)) { **outbuf = 0xe000 + 188 * (c - 0xf0) + (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41); NEXT(2, 1) continue; } else return 2; } else return 2; NEXT(1, 1) /* JIS X 0201 */ } return 0; } #include "codecentry.h" BEGIN_CODEC_REGISTRY(shift_jisx0213) MAPOPEN(ja_JP) IMPORTMAP_DEC(jisx0208) IMPORTMAP_ENC(jisxcommon) IMPORTMAP_ENC(jisx0213_bmp) IMPORTMAP_DEC(jisx0213_1_bmp) IMPORTMAP_DEC(jisx0213_2_bmp) IMPORTMAP_ENC(jisx0213_emp) IMPORTMAP_DEC(jisx0213_1_emp) IMPORTMAP_DEC(jisx0213_2_emp) MAPCLOSE() END_CODEC_REGISTRY(shift_jisx0213) /* * ex: ts=8 sts=4 et */ |
From: Hye-Shik C. <pe...@us...> - 2003-07-08 08:47:04
|
perky 03/07/08 01:47:02 Modified: src _iso_2022_jp_3.c codeccommon.h Log: Find JIS X 0208 table as a subset of JIS X 0213 Plane 1. Revision Changes Path 1.4 +22 -13 cjkcodecs/src/_iso_2022_jp_3.c Index: _iso_2022_jp_3.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/_iso_2022_jp_3.c,v retrieving revision 1.3 retrieving revision 1.4 diff -u -r1.3 -r1.4 --- _iso_2022_jp_3.c 8 Jul 2003 07:22:35 -0000 1.3 +++ _iso_2022_jp_3.c 8 Jul 2003 08:47:02 -0000 1.4 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: _iso_2022_jp_3.c,v 1.3 2003/07/08 07:22:35 perky Exp $ + * $Id: _iso_2022_jp_3.c,v 1.4 2003/07/08 08:47:02 perky Exp $ */ #define USING_BINARY_PAIR_SEARCH @@ -125,6 +125,9 @@ insize = 2; } } + } else TRYMAP_ENC(jisxcommon, code, c) { + if (code & 0x8000) + return 1; /* avoid JIS X 0212 codes */ } else if (c == 0xff3c) /* F/W REVERSE SOLIDUS */ code = 0x2140; else @@ -263,31 +266,37 @@ OUT1(c & 0x7f) NEXT(1, 1) } else { - unsigned char charset; + unsigned char charset, c2; ucs4_t code; - if (!STATE_GETFLAG(state, F_SHIFTED) && c < 0x80) /* G0 */ + if (!STATE_GETFLAG(state, F_SHIFTED) && c < 0x80) { /* G0 */ charset = STATE_GETG0(state); - else /* G1 */ + c2 = IN2; + } else { /* G1 */ charset = STATE_GETG1(state); + c &= 0x7f; + c2 = IN2 & 0x7f; + } if (charset & CHARSET_DOUBLEBYTE) { RESERVE_INBUF(2) RESERVE_OUTBUF(1) if (charset == CHARSET_JISX0213_1) { - if (c == 0x21 && IN2 == 0x40) **outbuf = 0xff3c; - else TRYMAP_DEC(jisx0213_1_bmp, **outbuf, - c & 0x7f, IN2 & 0x7f); - else TRYMAP_DEC(jisx0213_1_emp, code, c & 0x7f, - IN2 & 0x7f) { + if (c == 0x21 && c2 == 0x40) **outbuf = 0xff3c; + else TRYMAP_DEC(jisx0208, **outbuf, c, c2); + else TRYMAP_DEC(jisx0213_1_bmp, **outbuf, c, c2); + else TRYMAP_DEC(jisx0213_1_emp, code, c, c2) { PUTUCS4(EMPBASE | code) NEXT_IN(2) continue; + } else TRYMAP_DEC(jisx0213_pair, code, c, c2) { + WRITE2(code >> 16, code & 0xffff) + NEXT(2, 2) + continue; } else return 2; } else if (charset == CHARSET_JISX0213_2) { - TRYMAP_DEC(jisx0213_2_bmp, **outbuf, c & 0x7f, IN2 & 0x7f); - else TRYMAP_DEC(jisx0213_2_emp, code, c & 0x7f, - IN2 & 0x7f) { + TRYMAP_DEC(jisx0213_2_bmp, **outbuf, c, c2); + else TRYMAP_DEC(jisx0213_2_emp, code, c, c2) { PUTUCS4(EMPBASE | code) NEXT_IN(2) continue; @@ -297,7 +306,7 @@ NEXT(2, 1) } else if (charset == CHARSET_ASCII) { RESERVE_OUTBUF(1) - OUT1(c & 0x7f) + OUT1(c) NEXT(1, 1) } else return MBERR_INTERNAL; 1.21 +5 -5 cjkcodecs/src/codeccommon.h Index: codeccommon.h =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/codeccommon.h,v retrieving revision 1.20 retrieving revision 1.21 diff -u -r1.20 -r1.21 --- codeccommon.h 8 Jul 2003 07:02:03 -0000 1.20 +++ codeccommon.h 8 Jul 2003 08:47:02 -0000 1.21 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: codeccommon.h,v 1.20 2003/07/08 07:02:03 perky Exp $ + * $Id: codeccommon.h,v 1.21 2003/07/08 08:47:02 perky Exp $ */ #include "Python.h" @@ -93,10 +93,10 @@ #define IN3 ((*inbuf)[2]) #define IN4 ((*inbuf)[3]) -#define OUT1(c) ((*outbuf)[0]) = (c); -#define OUT2(c) ((*outbuf)[1]) = (c); -#define OUT3(c) ((*outbuf)[2]) = (c); -#define OUT4(c) ((*outbuf)[3]) = (c); +#define OUT1(c) ((*outbuf)[0]) = (unsigned char)(c); +#define OUT2(c) ((*outbuf)[1]) = (unsigned char)(c); +#define OUT3(c) ((*outbuf)[2]) = (unsigned char)(c); +#define OUT4(c) ((*outbuf)[3]) = (unsigned char)(c); #define WRITE1(c1) \ RESERVE_OUTBUF(1) \ |
From: Hye-Shik C. <pe...@us...> - 2003-07-08 07:22:37
|
perky 03/07/08 00:22:36 Modified: src _iso_2022_jp_3.c Log: ISO-2022-JP-3 doesn't use JIS X 0201 Revision Changes Path 1.3 +1 -2 cjkcodecs/src/_iso_2022_jp_3.c Index: _iso_2022_jp_3.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/_iso_2022_jp_3.c,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- _iso_2022_jp_3.c 8 Jul 2003 07:11:21 -0000 1.2 +++ _iso_2022_jp_3.c 8 Jul 2003 07:22:35 -0000 1.3 @@ -26,14 +26,13 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: _iso_2022_jp_3.c,v 1.2 2003/07/08 07:11:21 perky Exp $ + * $Id: _iso_2022_jp_3.c,v 1.3 2003/07/08 07:22:35 perky Exp $ */ #define USING_BINARY_PAIR_SEARCH #include "codeccommon.h" #include "iso2022common.h" #include "maps/map_jisx0213_pairs.h" -#include "maps/alg_jisx0201.h" ENCMAP(jisxcommon) DECMAP(jisx0208) |
From: Hye-Shik C. <pe...@us...> - 2003-07-08 07:11:24
|
perky 03/07/08 00:11:21 Modified: src _iso_2022_jp_3.c Log: Correct decoding FULL-WIDTH REVERSE SOLIDUS Revision Changes Path 1.2 +4 -2 cjkcodecs/src/_iso_2022_jp_3.c Index: _iso_2022_jp_3.c =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/_iso_2022_jp_3.c,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- _iso_2022_jp_3.c 8 Jul 2003 07:02:03 -0000 1.1 +++ _iso_2022_jp_3.c 8 Jul 2003 07:11:21 -0000 1.2 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: _iso_2022_jp_3.c,v 1.1 2003/07/08 07:02:03 perky Exp $ + * $Id: _iso_2022_jp_3.c,v 1.2 2003/07/08 07:11:21 perky Exp $ */ #define USING_BINARY_PAIR_SEARCH @@ -276,7 +276,9 @@ RESERVE_INBUF(2) RESERVE_OUTBUF(1) if (charset == CHARSET_JISX0213_1) { - TRYMAP_DEC(jisx0213_1_bmp, **outbuf, c & 0x7f, IN2 & 0x7f); + if (c == 0x21 && IN2 == 0x40) **outbuf = 0xff3c; + else TRYMAP_DEC(jisx0213_1_bmp, **outbuf, + c & 0x7f, IN2 & 0x7f); else TRYMAP_DEC(jisx0213_1_emp, code, c & 0x7f, IN2 & 0x7f) { PUTUCS4(EMPBASE | code) |
From: Hye-Shik C. <pe...@us...> - 2003-07-08 07:02:05
|
perky 03/07/08 00:02:04 Modified: src codeccommon.h Added: src _iso_2022_jp_3.c Log: Add the ISO-2022-JP-3 codec. Revision Changes Path 1.20 +11 -1 cjkcodecs/src/codeccommon.h Index: codeccommon.h =================================================================== RCS file: /cvsroot/koco/cjkcodecs/src/codeccommon.h,v retrieving revision 1.19 retrieving revision 1.20 diff -u -r1.19 -r1.20 --- codeccommon.h 8 Jul 2003 04:40:44 -0000 1.19 +++ codeccommon.h 8 Jul 2003 07:02:03 -0000 1.20 @@ -26,7 +26,7 @@ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * - * $Id: codeccommon.h,v 1.19 2003/07/08 04:40:44 perky Exp $ + * $Id: codeccommon.h,v 1.20 2003/07/08 07:02:03 perky Exp $ */ #include "Python.h" @@ -87,6 +87,16 @@ #define RESERVE_OUTBUF(n) \ if (outleft < (n)) \ return MBERR_TOOSMALL; + +#define IN1 ((*inbuf)[0]) +#define IN2 ((*inbuf)[1]) +#define IN3 ((*inbuf)[2]) +#define IN4 ((*inbuf)[3]) + +#define OUT1(c) ((*outbuf)[0]) = (c); +#define OUT2(c) ((*outbuf)[1]) = (c); +#define OUT3(c) ((*outbuf)[2]) = (c); +#define OUT4(c) ((*outbuf)[3]) = (c); #define WRITE1(c1) \ RESERVE_OUTBUF(1) \ 1.1 cjkcodecs/src/_iso_2022_jp_3.c Index: _iso_2022_jp_3.c =================================================================== /* * _iso_2022_jp_3.c: the ISO-2022-JP-3 codec (JIS X 0213) * * Copyright (C) 2003 Hye-Shik Chang <pe...@Fr...>. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * $Id: _iso_2022_jp_3.c,v 1.1 2003/07/08 07:02:03 perky Exp $ */ #define USING_BINARY_PAIR_SEARCH #include "codeccommon.h" #include "iso2022common.h" #include "maps/map_jisx0213_pairs.h" #include "maps/alg_jisx0201.h" ENCMAP(jisxcommon) DECMAP(jisx0208) DECMAP(jisx0212) ENCMAP(jisx0213_bmp) DECMAP(jisx0213_1_bmp) DECMAP(jisx0213_2_bmp) ENCMAP(jisx0213_emp) DECMAP(jisx0213_1_emp) DECMAP(jisx0213_2_emp) #define EMPBASE 0x20000 #define HAVE_ENCODER_INIT ENCODER_INIT(iso_2022_jp_3) { state->i = 0; STATE_SETG0(state, CHARSET_ASCII) STATE_SETG1(state, CHARSET_ASCII) return 0; } #define HAVE_ENCODER_RESET ENCODER_RESET(iso_2022_jp_3) { if (STATE_GETG0(state) != CHARSET_ASCII) { WRITE3(ESC, '(', 'B') STATE_SETG0(state, CHARSET_ASCII) NEXT_OUT(3) } return 0; } ENCODER(iso_2022_jp_3) { while (inleft > 0) { unsigned char charset; ucs4_t c = IN1; DBCHAR code; size_t insize = 1; if (c < 0x80) { switch (STATE_GETG0(state)) { case CHARSET_ASCII: WRITE1(c) NEXT(1, 1) break; default: WRITE4(ESC, '(', 'B', c) STATE_SETG0(state, CHARSET_ASCII) NEXT(1, 4) break; } if (c == '\n') STATE_CLEARFLAG(state, F_SHIFTED) continue; } #if Py_UNICODE_SIZE == 2 if (c >> 10 == 0xd800 >> 10) { /* high surrogate */ RESERVE_INBUF(2) if (IN2 >> 10 == 0xdc00 >> 10) { /* low surrogate */ c = 0x10000 + ((c - 0xd800) << 10) + ((ucs4_t)IN2 - 0xdc00); insize = 2; } } #endif if (c <= 0xffff) { TRYMAP_ENC(jisx0213_bmp, code, c) { if (code == MULTIC) { if (inleft < 2) { if (flags & MBENC_FLUSH) { code = find_pairencmap(c, 0, jisx0213_pairencmap, JISX0213_ENCPAIRS); if (code == DBCINV) return 1; } else return MBERR_TOOFEW; } else { code = find_pairencmap(c, IN2, jisx0213_pairencmap, JISX0213_ENCPAIRS); if (code == DBCINV) { code = find_pairencmap(c, 0, jisx0213_pairencmap, JISX0213_ENCPAIRS); if (code == DBCINV) return 1; } else insize = 2; } } } else if (c == 0xff3c) /* F/W REVERSE SOLIDUS */ code = 0x2140; else return 1; } else if (c >> 16 == EMPBASE >> 16) { TRYMAP_ENC(jisx0213_emp, code, c & 0xffff); else return insize; } else return insize; charset = STATE_GETG0(state); if (code & 0x8000) { /* MSB set: Plane 2 */ if (charset != CHARSET_JISX0213_2) { WRITE4(ESC, '$', '(', 'P') STATE_SETG0(state, CHARSET_JISX0213_2) NEXT_OUT(4) } WRITE2((code >> 8) & 0x7f, code & 0x7f) } else { /* MSB unset: Plane 1 */ if (charset != CHARSET_JISX0213_1) { WRITE4(ESC, '$', '(', 'O') STATE_SETG0(state, CHARSET_JISX0213_1) NEXT_OUT(4) } WRITE2(code >> 8, code & 0xff) } NEXT(insize, 2) } return 0; } #define HAVE_DECODER_INIT DECODER_INIT(iso_2022_jp_3) { state->i = 0; STATE_SETG0(state, CHARSET_ASCII) STATE_SETG1(state, CHARSET_ASCII) return 0; } #define HAVE_DECODER_RESET DECODER_RESET(iso_2022_jp_3) { STATE_CLEARFLAG(state, F_SHIFTED) return 0; } DECODER(iso_2022_jp_3) { while (inleft > 0) { unsigned char c = IN1; if (STATE_GETFLAG(state, F_ESCTHROUGHOUT)) { /* ESC throughout mode: for non-iso2022 escape sequences */ RESERVE_OUTBUF(1) OUT1(c) /* assume as ISO-8859-1 */ NEXT(1, 1) if (IS_ESCEND(c)) { STATE_CLEARFLAG(state, F_ESCTHROUGHOUT) } continue; } switch (c) { case ESC: RESERVE_INBUF(2) if (IS_ISO2022ESC(IN2)) { int eslen; unsigned char charset; eslen = iso2022esclen(*inbuf, inleft); if (eslen < 0) return eslen == MBERR_INTERNAL ? 1 : eslen; if (eslen == 3) { if (IN2 == '$') { if (IN3 == 'B') { charset = IN3 | CHARSET_DOUBLEBYTE; STATE_SETG0(state, charset); } else return 3; } else { if (IN3 == 'B') charset = IN3; else return 3; if (IN2 == '(') { STATE_SETG0(state, charset) } else if (IN2 == ')') { STATE_SETG1(state, charset) } else return 3; } } else if (eslen == 4) { if (IN2 == '$' && (IN4 == 'O' || IN4 == 'P')) { charset = IN4 | CHARSET_DOUBLEBYTE; if (IN3 == '(') { STATE_SETG0(state, charset) } else if (IN3 == ')') { STATE_SETG1(state, charset) } else return 4; } else return 4; } else return eslen; NEXT_IN(eslen) } else { STATE_SETFLAG(state, F_ESCTHROUGHOUT) OUT1(ESC) NEXT(1, 1) } break; case SI: STATE_CLEARFLAG(state, F_SHIFTED) NEXT_IN(1) break; case SO: STATE_SETFLAG(state, F_SHIFTED) NEXT_IN(1) break; case '\n': STATE_CLEARFLAG(state, F_SHIFTED) /* FALLTHROUGH */ case SP: /* FALLTHROUGH */ case DEL: RESERVE_OUTBUF(1) OUT1(c) NEXT(1, 1) break; default: if ((c & 0x7f) < 0x20) { /* C0 and C1 */ RESERVE_OUTBUF(1) OUT1(c & 0x7f) NEXT(1, 1) } else { unsigned char charset; ucs4_t code; if (!STATE_GETFLAG(state, F_SHIFTED) && c < 0x80) /* G0 */ charset = STATE_GETG0(state); else /* G1 */ charset = STATE_GETG1(state); if (charset & CHARSET_DOUBLEBYTE) { RESERVE_INBUF(2) RESERVE_OUTBUF(1) if (charset == CHARSET_JISX0213_1) { TRYMAP_DEC(jisx0213_1_bmp, **outbuf, c & 0x7f, IN2 & 0x7f); else TRYMAP_DEC(jisx0213_1_emp, code, c & 0x7f, IN2 & 0x7f) { PUTUCS4(EMPBASE | code) NEXT_IN(2) continue; } else return 2; } else if (charset == CHARSET_JISX0213_2) { TRYMAP_DEC(jisx0213_2_bmp, **outbuf, c & 0x7f, IN2 & 0x7f); else TRYMAP_DEC(jisx0213_2_emp, code, c & 0x7f, IN2 & 0x7f) { PUTUCS4(EMPBASE | code) NEXT_IN(2) continue; } else return 2; } else return MBERR_INTERNAL; NEXT(2, 1) } else if (charset == CHARSET_ASCII) { RESERVE_OUTBUF(1) OUT1(c & 0x7f) NEXT(1, 1) } else return MBERR_INTERNAL; } } } return 0; } #include "codecentry.h" BEGIN_CODEC_REGISTRY(iso_2022_jp_3) MAPOPEN(ja_JP) IMPORTMAP_DEC(jisx0208) IMPORTMAP_DEC(jisx0212) IMPORTMAP_ENC(jisxcommon) IMPORTMAP_ENC(jisx0213_bmp) IMPORTMAP_DEC(jisx0213_1_bmp) IMPORTMAP_DEC(jisx0213_2_bmp) IMPORTMAP_ENC(jisx0213_emp) IMPORTMAP_DEC(jisx0213_1_emp) IMPORTMAP_DEC(jisx0213_2_emp) MAPCLOSE() END_CODEC_REGISTRY(iso_2022_jp_3) /* * ex: ts=8 sts=4 et */ |
From: Hye-Shik C. <pe...@us...> - 2003-07-08 07:02:04
|
perky 03/07/08 00:02:03 Modified: . CHANGES setup.py Log: Add the ISO-2022-JP-3 codec. Revision Changes Path 1.11 +1 -1 cjkcodecs/CHANGES Index: CHANGES =================================================================== RCS file: /cvsroot/koco/cjkcodecs/CHANGES,v retrieving revision 1.10 retrieving revision 1.11 diff -u -r1.10 -r1.11 --- CHANGES 7 Jul 2003 08:17:36 -0000 1.10 +++ CHANGES 8 Jul 2003 07:02:03 -0000 1.11 @@ -1,6 +1,6 @@ Changes with CJKCodecs 1.0 - *) EUC-JISX0213 codec is added. + *) EUC-JISX0213 and ISO-2022-JP-3 codec is added. *) Changed a few characters of a big5 codepoint mapping to cp950's rather than 0xfffd. (documented on NOTES.big5) 1.26 +2 -2 cjkcodecs/setup.py Index: setup.py =================================================================== RCS file: /cvsroot/koco/cjkcodecs/setup.py,v retrieving revision 1.25 retrieving revision 1.26 diff -u -r1.25 -r1.26 --- setup.py 7 Jul 2003 08:17:36 -0000 1.25 +++ setup.py 8 Jul 2003 07:02:03 -0000 1.26 @@ -27,7 +27,7 @@ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # -# $Id: setup.py,v 1.25 2003/07/07 08:17:36 perky Exp $ +# $Id: setup.py,v 1.26 2003/07/08 07:02:03 perky Exp $ # import sys @@ -38,7 +38,7 @@ extensions = [] encodings = { 'ja_JP': ['shift_jis', 'cp932', 'euc_jp', 'iso_2022_jp', 'iso_2022_jp_1', - 'euc_jisx0213'], + 'euc_jisx0213', 'iso_2022_jp_3'], 'ko_KR': ['euc_kr', 'cp949', 'johab', 'iso_2022_kr'], 'zh_CN': ['gb2312', 'gbk', 'gb18030', 'hz'], 'zh_TW': ['big5', 'cp950'], |