[KoCo-CVS] [Commit] KoreanCodecs/korean cp949.py euc_kr.py iso_2022_kr.py johab.py mackorean.py qwer

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

perky       03/01/12 14:54:12

  Modified:    korean   cp949.py euc_kr.py iso_2022_kr.py johab.py
                        mackorean.py qwerty2bul.py unijohab.py
  Removed:     korean   hangul.py
  Log:
  Remove selective framework for two implementations, 'C' and 'Python'.
  We'll maintain only 1 implementation from now.
  Accordingly, --with[out]-extension options is removed, too.
  
  Revision  Changes    Path
  1.5       +18 -5     KoreanCodecs/korean/cp949.py
  
  Index: cp949.py
  ===================================================================
  RCS file: /cvsroot/koco/KoreanCodecs/korean/cp949.py,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- cp949.py	9 Jan 2003 21:35:48 -0000	1.4
  +++ cp949.py	12 Jan 2003 22:54:12 -0000	1.5
  @@ -17,10 +17,23 @@
   # along with KoreanCodecs; if not, write to the Free Software
   # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   #
  -# $Id: cp949.py,v 1.4 2003/01/09 21:35:48 perky Exp $
  +# $Id: cp949.py,v 1.5 2003/01/12 22:54:12 perky Exp $
   #
   
  -try:
  -    from korean.c.cp949 import *
  -except ImportError:
  -    from korean.python.cp949 import *
  +import codecs
  +import _koco
  +
  +class Codec(codecs.Codec):
  +    encode = _koco.cp949_encode
  +    decode = _koco.cp949_decode
  +
  +class StreamWriter(Codec, codecs.StreamWriter):
  +    pass
  +
  +class StreamReader(Codec, _koco.StreamReader, codecs.StreamReader):
  +    encoding = 'cp949'
  +
  +### encodings module API
  +
  +def getregentry():
  +    return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
  
  
  
  1.5       +18 -5     KoreanCodecs/korean/euc_kr.py
  
  Index: euc_kr.py
  ===================================================================
  RCS file: /cvsroot/koco/KoreanCodecs/korean/euc_kr.py,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- euc_kr.py	9 Jan 2003 21:35:48 -0000	1.4
  +++ euc_kr.py	12 Jan 2003 22:54:12 -0000	1.5
  @@ -17,10 +17,23 @@
   # along with KoreanCodecs; if not, write to the Free Software
   # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   #
  -# $Id: euc_kr.py,v 1.4 2003/01/09 21:35:48 perky Exp $
  +# $Id: euc_kr.py,v 1.5 2003/01/12 22:54:12 perky Exp $
   #
   
  -try:
  -    from korean.c.euc_kr import *
  -except ImportError:
  -    from korean.python.euc_kr import *
  +import codecs
  +import _koco
  +
  +class Codec(codecs.Codec):
  +    encode = _koco.euc_kr_encode
  +    decode = _koco.euc_kr_decode
  +
  +class StreamWriter(Codec, codecs.StreamWriter):
  +    pass
  +
  +class StreamReader(Codec, _koco.StreamReader, codecs.StreamReader):
  +    encoding = 'euc-kr'
  +
  +### encodings module API
  +
  +def getregentry():
  +    return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
  
  
  
  1.5       +139 -5    KoreanCodecs/korean/iso_2022_kr.py
  
  Index: iso_2022_kr.py
  ===================================================================
  RCS file: /cvsroot/koco/KoreanCodecs/korean/iso_2022_kr.py,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- iso_2022_kr.py	9 Jan 2003 21:35:48 -0000	1.4
  +++ iso_2022_kr.py	12 Jan 2003 22:54:12 -0000	1.5
  @@ -17,10 +17,144 @@
   # along with KoreanCodecs; if not, write to the Free Software
   # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   #
  -# $Id: iso_2022_kr.py,v 1.4 2003/01/09 21:35:48 perky Exp $
  +# $Id: iso_2022_kr.py,v 1.5 2003/01/12 22:54:12 perky Exp $
   #
   
  -try:
  -    from korean.c.iso_2022_kr import *
  -except ImportError:
  -    from korean.python.iso_2022_kr import *
  +import codecs
  +
  +KSC5601_CODEC   = 'korean.euc-kr'
  +
  +US_ASCII        = 1
  +KSC5601_1987    = 2
  +
  +G0, G1 = 0, 1 # iso-2022-kr doesn't handle G2 and G3 area.
  +
  +CHARSETS = {
  +    "\033(B": (G0, US_ASCII),
  +    "\033)B": (G1, US_ASCII),
  +    "\033$(C": (G0, KSC5601_1987),
  +    "\033$)C": (G1, KSC5601_1987),
  +}
  +SI = '\x0f'
  +SO = '\x0e'
  +ESC = '\033'
  +
  +DESIGNATION_MARK = {}
  +for k, v in CHARSETS.items():
  +    DESIGNATION_MARK[v] = k
  +
  +class Codec(codecs.Codec):
  +    # Unicode to character buffer
  +    def encode(self, data, errors='strict'):
  +        if errors not in ('strict', 'ignore', 'replace'):
  +            raise ValueError, "unknown error handling"
  +        buffer = []
  +        designation = [US_ASCII, US_ASCII]
  +        new_designation = designation[:]
  +        new_shiftstate = shiftstate = 0
  +        for c in data:
  +            if c in ('\n', '\r'):
  +                new_shiftstate = 0
  +
  +            if c < u'\u0080':
  +                new_shiftstate = 0
  +                new_designation[0] = US_ASCII
  +                s = c.encode("ascii", errors)
  +            else:
  +                new_shiftstate = 1
  +                new_designation[1] = KSC5601_1987
  +                s = c.encode('korean.euc_kr', errors)
  +
  +            if designation[0] != new_designation[0]:
  +                buffer.append(DESIGNATION_MARK[(G0, new_designation[0])])
  +                designation[0] = new_designation[0]
  +            if designation[1] != new_designation[1]:
  +                buffer.append(DESIGNATION_MARK[(G1, new_designation[1])])
  +                designation[1] = new_designation[1]
  +            if shiftstate != new_shiftstate:
  +                buffer.append([SI, SO][new_shiftstate])
  +                shiftstate = new_shiftstate
  +
  +            if shiftstate:
  +                s = chr(ord(s[0])&0x7F) + chr(ord(s[1])&0x7F)
  +            buffer.append(s)
  +
  +        if shiftstate:
  +            buffer.append(SI)
  +
  +        return (''.join(buffer), len(data))
  +
  +    # character buffer to Unicode
  +    def decode(self, data, errors='strict'):
  +        global decmap_ideo, decmap_misc
  +        
  +        if errors not in ('strict', 'ignore', 'replace'):
  +            raise ValueError, "unknown error handling"
  +        buffer = []
  +        data = str(data) # character buffer compatible object
  +        size = len(data)
  +
  +        designation = [US_ASCII, KSC5601_1987]
  +        shiftstate  = 0
  +        escstart    = -1
  +        p           = 0
  +
  +        while p < size:
  +            if data[p] in ('\n', '\r'):
  +                shiftstate = 0
  +
  +            if escstart >= 0:
  +                if data[p].isalpha():
  +                    escstr = data[escstart:p+1]
  +                    if CHARSETS.has_key(escstr):
  +                        charset = CHARSETS[escstr]
  +                        designation[charset[0]] = charset[1]
  +                    elif errors == 'strict':
  +                        raise UnicodeError, "unsupported charset found: " \
  +                                            + repr(data[escstart:p+1])
  +                    escstart = -1
  +                p += 1
  +            elif data[p] == SO:
  +                shiftstate = 1
  +                p += 1
  +            elif data[p] == SI:
  +                shiftstate = 0
  +                p += 1
  +            elif data[p] == ESC:
  +                escstart = p
  +                p += 1
  +            else:
  +                if (ord(data[p]) | (shiftstate and 0x80 or 0x00)) >= 0x80:
  +                    codearea = G1
  +                else:
  +                    codearea = G0
  +
  +                if designation[codearea] == US_ASCII:
  +                    buffer.append(unicode(data[p], "ascii", errors))
  +                    p += 1
  +                elif ord(data[p]) & 0x7F >= 0x20: # KSC5601_1987
  +                    c = data[p:p+2]
  +                    p += 2
  +                    if len(c) == 2:
  +                        c = chr(ord(c[0])|0x80) + chr(ord(c[1])|0x80)
  +                        buffer.append(unicode(c, KSC5601_CODEC, errors))
  +                else: # control characters
  +                    buffer.append(unichr(ord(data[p]) & 0x7F))
  +                    p += 1
  +
  +        return (u''.join(buffer), len(data))
  +
  +class StreamWriter(Codec, codecs.StreamWriter):
  +    pass
  +
  +class StreamReader(Codec, codecs.StreamReader):
  +    pass
  +    # not implemented.
  +    # (JapaneseCodecs's implementation is so different to adopt.)
  +
  +### encodings module API
  +
  +def getregentry():
  +    return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
  +
  +# ex: ts=8 sts=4 et
  
  
  
  1.5       +183 -5    KoreanCodecs/korean/johab.py
  
  Index: johab.py
  ===================================================================
  RCS file: /cvsroot/koco/KoreanCodecs/korean/johab.py,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- johab.py	9 Jan 2003 21:35:48 -0000	1.4
  +++ johab.py	12 Jan 2003 22:54:12 -0000	1.5
  @@ -17,10 +17,188 @@
   # along with KoreanCodecs; if not, write to the Free Software
   # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   #
  -# $Id: johab.py,v 1.4 2003/01/09 21:35:48 perky Exp $
  +# $Id: johab.py,v 1.5 2003/01/12 22:54:12 perky Exp $
   #
   
  -try:
  -    from korean.c.johab import *
  -except ImportError:
  -    from korean.python.johab import *
  +import codecs
  +
  +from korean.hangul import Jaeum, Moeum, ishangul, split, join
  +encmap, decmap = {}, {}
  +
  +johab2uni_chosung = {
  +    1: u'',         2: Jaeum.G,     3: Jaeum.GG,    4: Jaeum.N,
  +    5: Jaeum.D,     6: Jaeum.DD,    7: Jaeum.L,     8: Jaeum.M,
  +    9: Jaeum.B,     10: Jaeum.BB,   11: Jaeum.S,    12: Jaeum.SS,
  +    13: Jaeum.NG,   14: Jaeum.J,    15: Jaeum.JJ,   16: Jaeum.C,
  +    17: Jaeum.K,    18: Jaeum.T,    19: Jaeum.P,    20: Jaeum.H,
  +}
  +johab2uni_jungsung = {
  +    2: u'',         3: Moeum.A,     4: Moeum.AE,    5: Moeum.YA,
  +    6: Moeum.YAE,   7: Moeum.EO,    10: Moeum.E,    11: Moeum.YEO,
  +    12: Moeum.YE,   13: Moeum.O,    14: Moeum.WA,   15: Moeum.WAE,
  +    18: Moeum.OE,   19: Moeum.YO,   20: Moeum.U,    21: Moeum.WEO,
  +    22: Moeum.WE,   23: Moeum.WI,   26: Moeum.YU,   27: Moeum.EU,
  +    28: Moeum.YI,   29: Moeum.I
  +}
  +johab2uni_jongsung = {
  +    1: u'',         2: Jaeum.G,     3: Jaeum.GG,    4: Jaeum.GS,
  +    5: Jaeum.N,     6: Jaeum.NJ,    7: Jaeum.NH,    8: Jaeum.D,
  +    9: Jaeum.L,     10: Jaeum.LG,   11: Jaeum.LM,   12: Jaeum.LB,
  +    13: Jaeum.LS,   14: Jaeum.LT,   15: Jaeum.LP,   16: Jaeum.LH,
  +    17: Jaeum.M,    19: Jaeum.B,    20: Jaeum.BS,   21: Jaeum.S,
  +    22: Jaeum.SS,   23: Jaeum.NG,   24: Jaeum.J,    25: Jaeum.C,
  +    26: Jaeum.K,    27: Jaeum.T,    28: Jaeum.P,    29: Jaeum.H
  +}
  +
  +uni2johab_chosung = {}
  +uni2johab_jungsung = {}
  +uni2johab_jongsung = {}
  +for k, v in johab2uni_chosung.items():
  +    uni2johab_chosung[v] = k
  +for k, v in johab2uni_jungsung.items():
  +    uni2johab_jungsung[v] = k
  +for k, v in johab2uni_jongsung.items():
  +    uni2johab_jongsung[v] = k
  +
  +class Codec(codecs.Codec):
  +
  +    # Unicode to character buffer
  +    def encode(self, data, errors='strict'):
  +        global encmap
  +
  +        if errors not in ('strict', 'ignore', 'replace'):
  +            raise ValueError, "unknown error handling"
  +        buffer = []
  +
  +        for c in data:
  +            if c < u'\u0080':
  +                buffer.append(c.encode("ascii", errors))
  +            elif ishangul(c):
  +                cho, jung, jong = split(c) # all hangul can success
  +                cho, jung, jong = (
  +                    uni2johab_chosung[cho],
  +                    uni2johab_jungsung[jung],
  +                    uni2johab_jongsung[jong]
  +                )
  +                code = 0x8000 | (cho<<10) | (jung<<5) | jong
  +                buffer.append(chr(code>>8) + chr(code&0xFF))
  +            else:
  +                if not encmap:
  +                    from korean.mappings import johab_ideograph
  +                    encmap = johab_ideograph.encoding_map
  +
  +                if encmap.has_key(c):
  +                    buffer.append(encmap[c])
  +                elif errors == 'replace':
  +                    buffer.append('\x84\x41')
  +                elif errors == 'strict':
  +                    raise UnicodeError, "cannot map \\u%04x to JOHAB" % ord(c)
  +
  +        return (''.join(buffer), len(data))
  +
  +    # character buffer to Unicode
  +    def decode(self, data, errors='strict'):
  +        global decmap
  +
  +        if errors not in ('strict', 'ignore', 'replace'):
  +            raise ValueError, "unknown error handling"
  +
  +        buffer = []
  +        data = str(data) # character buffer compatible object
  +        size = len(data)
  +        p = 0
  +        while p < size:
  +            if data[p] < '\x80':
  +                buffer.append(unicode(data[p], "ascii", errors))
  +                p += 1
  +            else:
  +                c = data[p:p+2]
  +                p += 2
  +                if len(c) == 2:
  +                    code = (ord(c[0])<<8) | ord(c[1])
  +                    cho = (code >> 10) & 0x1f
  +                    jung = (code >> 5) & 0x1f
  +                    jong = (code) & 0x1f
  +                    if ( johab2uni_chosung.has_key(cho) and
  +                         johab2uni_jungsung.has_key(jung) and
  +                         johab2uni_jongsung.has_key(jong) ):
  +                        buffer.append( join([
  +                            johab2uni_chosung[cho],
  +                            johab2uni_jungsung[jung],
  +                            johab2uni_jongsung[jong]
  +                        ]) )
  +                        continue
  +                        
  +                    if not decmap:
  +                        from korean.mappings import johab_ideograph
  +                        decmap = johab_ideograph.decoding_map
  +
  +                    if decmap.has_key(c):
  +                        buffer.append(decmap[c])
  +                        continue
  +
  +                if errors == 'replace':
  +                    buffer.append(u'\uFFFD') # REPLACEMENT CHARACTER
  +                elif errors == 'strict':
  +                    raise UnicodeError, "unexpected byte 0x%02x%02x found" % tuple(map(ord, c))
  +
  +        return (u''.join(buffer), size)
  +
  +
  +class StreamWriter(Codec, codecs.StreamWriter):
  +    pass
  +
  +
  +class StreamReader(Codec, codecs.StreamReader):
  +
  +    def __init__(self, stream, errors='strict'):
  +        codecs.StreamReader.__init__(self, stream, errors)
  +        self.data = ''
  +
  +    def _read(self, func, size):
  +        if size == 0:
  +            return u''
  +        if size is None or size < 0:
  +            data = self.data + func()
  +            self.data = ''
  +        else:
  +            data = self.data + func(max(size, 2) - len(self.data))
  +            size = len(data)
  +            p = 0
  +            while p < size:
  +                if data[p] < "\x80":
  +                    p = p + 1
  +                elif p + 2 <= size:
  +                    p = p + 2
  +                else:
  +                    break
  +            data, self.data = data[:p], data[p:]
  +        return self.decode(data)[0]
  +
  +    def read(self, size=-1):
  +        return self._read(self.stream.read, size)
  +
  +    def readline(self, size=-1):
  +        return self._read(self.stream.readline, size)
  +
  +    def readlines(self, size=-1):
  +        data = self._read(self.stream.read, size)
  +        buffer = []
  +        end = 0
  +        while 1:
  +            pos = data.find(u'\n', end)
  +            if pos < 0:
  +                if end < len(data):
  +                    buffer.append(data[end:])
  +                break
  +            buffer.append(data[end:pos+1])
  +            end = pos+1
  +        return buffer
  +    def reset(self):
  +        self.data = ''
  +
  +### encodings module API
  +
  +def getregentry():
  +    return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
  +
  
  
  
  1.2       +181 -5    KoreanCodecs/korean/mackorean.py
  
  Index: mackorean.py
  ===================================================================
  RCS file: /cvsroot/koco/KoreanCodecs/korean/mackorean.py,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- mackorean.py	9 Jan 2003 22:40:39 -0000	1.1
  +++ mackorean.py	12 Jan 2003 22:54:12 -0000	1.2
  @@ -17,10 +17,186 @@
   # along with KoreanCodecs; if not, write to the Free Software
   # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   #
  -# $Id: mackorean.py,v 1.1 2003/01/09 22:40:39 perky Exp $
  +# $Id: mackorean.py,v 1.2 2003/01/12 22:54:12 perky Exp $
   #
   
  -try:
  -    from korean.c.mackorean import *
  -except ImportError:
  -    from korean.python.mackorean import *
  +import codecs
  +from korean.mappings import ksc5601_hangul, appleextension
  +encmap_hangul, decmap_hangul = ksc5601_hangul.encoding_map, ksc5601_hangul.decoding_map
  +encmap_apple, decmap_apple = appleextension.encoding_map, appleextension.decoding_map
  +encmap_ideo, decmap_ideo = {}, {}
  +encmap_misc, decmap_misc = {}, {}
  +
  +class Codec(codecs.Codec):
  +
  +    # Unicode to character buffer
  +    def encode(self, data, errors='strict'):
  +        global encmap_ideo, encmap_misc
  +
  +        if errors not in ('strict', 'ignore', 'replace'):
  +            raise ValueError, "unknown error handling"
  +        buffer  = []
  +        p       = 0
  +        size    = len(data)
  +        
  +        while p < size:
  +            aemap = appleextension.multilevel_encmap
  +            relp  = 0
  +            while p + relp < size and aemap.has_key(data[p + relp]):
  +                aemap = aemap[data[p + relp]]
  +                relp += 1
  +            if aemap.has_key(None):
  +                buffer.append(aemap[None])
  +                p += relp
  +                continue
  +
  +            c = data[p]
  +            p += 1
  +
  +            if c < u'\u0080':
  +                buffer.append(c.encode("ascii", errors))
  +            elif encmap_hangul.has_key(c):
  +                buffer.append(encmap_hangul[c])
  +            elif encmap_apple.has_key(c):
  +                buffer.append(encmap_apple[c])
  +            else:
  +                if not encmap_misc:
  +                    from korean.mappings import ksc5601_misc
  +                    encmap_misc = ksc5601_misc.encoding_map
  +                if encmap_misc.has_key(c):
  +                    buffer.append(encmap_misc[c])
  +                    continue
  +
  +                if not encmap_ideo:
  +                    from korean.mappings import ksc5601_ideograph
  +                    encmap_ideo = ksc5601_ideograph.encoding_map
  +                if encmap_ideo.has_key(c):
  +                    buffer.append(encmap_ideo[c])
  +                    continue
  +
  +                if errors == 'replace':
  +                    buffer.append('\xa1\xa1')
  +                elif errors == 'strict':
  +                    raise UnicodeError, ("cannot map "
  +                            "\\u%04x to MacKorean") % ord(c)
  +
  +        return (''.join(buffer), len(data))
  +
  +    # character buffer to Unicode
  +    def decode(self, data, errors='strict'):
  +        global decmap_ideo, decmap_misc
  +
  +        if errors not in ('strict', 'ignore', 'replace'):
  +            raise ValueError, "unknown error handling"
  +
  +        buffer = []
  +        data = str(data) # character buffer compatible object
  +        size = len(data)
  +        p = 0
  +        while p < size:
  +            if data[p] < '\x80':
  +                buffer.append(unicode(data[p], "ascii", errors))
  +                p += 1
  +            elif data[p] <= '\xa0' or data[p] == '\xff':
  +                if decmap_apple.has_key(data[p]):
  +                    buffer.append(decmap_apple[data[p]])
  +                    p += 1
  +                    continue
  +
  +                if errors == 'replace':
  +                    buffer.append(u'\uFFFD') # REPLACEMENT CHARACTER
  +                elif errors == 'strict':
  +                    raise UnicodeError, "unexpected byte %s found" % (
  +                            hex(ord(data[p])))
  +                p += 1
  +            else:
  +                c = data[p:p+2]
  +                p += 2
  +                if len(c) == 2:
  +                    if decmap_hangul.has_key(c):
  +                        buffer.append(decmap_hangul[c])
  +                        continue
  +                    elif decmap_apple.has_key(c):
  +                        buffer.append(decmap_apple[c])
  +                        continue
  +
  +                    if not decmap_misc:
  +                        from korean.mappings import ksc5601_misc
  +                        decmap_misc = ksc5601_misc.decoding_map
  +                    if decmap_misc.has_key(c):
  +                        buffer.append(decmap_misc[c])
  +                        continue
  +    
  +                    if not decmap_ideo:
  +                        from korean.mappings import ksc5601_ideograph
  +                        decmap_ideo = ksc5601_ideograph.decoding_map
  +                    if decmap_ideo.has_key(c):
  +                        buffer.append(decmap_ideo[c])
  +                        continue
  +
  +                if errors == 'replace':
  +                    buffer.append(u'\uFFFD') # REPLACEMENT CHARACTER
  +                elif errors == 'strict':
  +                    raise UnicodeError, "unexpected byte 0x%s found" % (
  +                            ''.join(["%02x"%ord(x) for x in c]) )
  +
  +        return (u''.join(buffer), size)
  +
  +
  +class StreamWriter(Codec, codecs.StreamWriter):
  +    pass
  +
  +
  +class StreamReader(Codec, codecs.StreamReader):
  +
  +    def __init__(self, stream, errors='strict'):
  +        codecs.StreamReader.__init__(self, stream, errors)
  +        self.data = ''
  +
  +    def _read(self, func, size):
  +        if size == 0:
  +            return u''
  +        if size is None or size < 0:
  +            data = self.data + func()
  +            self.data = ''
  +        else:
  +            data = self.data + func(max(size, 2) - len(self.data))
  +            size = len(data)
  +            p = 0
  +            while p < size:
  +                if data[p] < "\xa1" or data[p] == "\xff":
  +                    p = p + 1
  +                elif p + 2 <= size:
  +                    p = p + 2
  +                else:
  +                    break
  +            data, self.data = data[:p], data[p:]
  +        return self.decode(data)[0]
  +
  +    def read(self, size=-1):
  +        return self._read(self.stream.read, size)
  +
  +    def readline(self, size=-1):
  +        return self._read(self.stream.readline, size)
  +
  +    def readlines(self, size=-1):
  +        data = self._read(self.stream.read, size)
  +        buffer = []
  +        end = 0
  +        while 1:
  +            pos = data.find(u'\n', end)
  +            if pos < 0:
  +                if end < len(data):
  +                    buffer.append(data[end:])
  +                break
  +            buffer.append(data[end:pos+1])
  +            end = pos+1
  +        return buffer
  +    def reset(self):
  +        self.data = ''
  +
  +
  +def getregentry():
  +    return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
  +
  +# ex: ts=8 sts=4 et
  
  
  
  1.5       +177 -5    KoreanCodecs/korean/qwerty2bul.py
  
  Index: qwerty2bul.py
  ===================================================================
  RCS file: /cvsroot/koco/KoreanCodecs/korean/qwerty2bul.py,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- qwerty2bul.py	9 Jan 2003 21:35:48 -0000	1.4
  +++ qwerty2bul.py	12 Jan 2003 22:54:12 -0000	1.5
  @@ -17,10 +17,182 @@
   # along with KoreanCodecs; if not, write to the Free Software
   # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   #
  -# $Id: qwerty2bul.py,v 1.4 2003/01/09 21:35:48 perky Exp $
  +# $Id: qwerty2bul.py,v 1.5 2003/01/12 22:54:12 perky Exp $
   #
   
  -try:
  -    from korean.c.qwerty2bul import *
  -except ImportError:
  -    from korean.python.qwerty2bul import *
  +import codecs
  +from korean.hangul import Moeum, Jaeum, Chosung, Jungsung, Jongsung
  +from korean.hangul import ishangul, join, split, isJaeum, isMoeum
  +
  +codekeymap = {
  +        Jaeum.G: 'r',        Jaeum.GG: 'R',       Jaeum.GS: 'rt',
  +        Jaeum.N: 's',        Jaeum.NJ:'sw',       Jaeum.NH: 'sg',       Jaeum.D: 'e',
  +        Jaeum.DD:'E',        Jaeum.L: 'f',        Jaeum.LG: 'fr',       Jaeum.LM: 'fa',
  +        Jaeum.LB:'fq',       Jaeum.LS:'ft',       Jaeum.LT: 'fx',       Jaeum.LP: 'fv',
  +        Jaeum.LH:'fg',       Jaeum.M: 'a',        Jaeum.B:  'q',        Jaeum.BB: 'Q',
  +        Jaeum.BS:'qt',       Jaeum.S: 't',        Jaeum.SS: 'T',        Jaeum.NG:  'd',
  +        Jaeum.J: 'w',        Jaeum.JJ:'W',        Jaeum.C:  'c',        Jaeum.K:  'z',
  +        Jaeum.T: 'x',        Jaeum.P: 'v',        Jaeum.H:  'g',
  +
  +        Moeum.A: 'k',        Moeum.AE:'o',        Moeum.YA: 'i',        Moeum.YAE:'O',
  +        Moeum.EO:'j',        Moeum.E: 'p',        Moeum.YEO:'u',        Moeum.YE: 'P',
  +        Moeum.O: 'h',        Moeum.WA:'hk',       Moeum.WAE:'ho',       Moeum.OE: 'hl',
  +        Moeum.YO:'y',        Moeum.U: 'n',        Moeum.WEO:'nj',       Moeum.WE: 'np',
  +        Moeum.WI:'nl',       Moeum.YU:'b',        Moeum.EU: 'm',        Moeum.YI: 'ml',
  +        Moeum.I: 'l',
  +
  +        u'': '',
  +}
  +
  +keycodemap = {}
  +for k, v in codekeymap.items():
  +        keycodemap[v] = k
  +        keycodemap.setdefault(v.upper(), k)
  +keycodes = ''.join(keycodemap.keys())
  +del k, v
  +
  +
  +class Automata_Hangul2:
  +    
  +    # must Unicode in / Unicode out
  +
  +    def __init__(self):
  +        self.clear()
  +
  +    def pushcomp(self):
  +        if self.chosung and not self.jungsung:
  +            self.word_valid = 0
  +        self.word_comp.append(join([self.chosung, self.jungsung, self.jongsung]))
  +        self.clearcomp()
  +
  +    def clearcomp(self):
  +        self.chosung = u''
  +        self.jungsung = u''
  +        self.jongsung = u''
  +
  +    def clear(self):
  +        self.buff = ['']
  +        self.word_raw = []
  +        self.word_comp = []
  +        self.word_valid = 1
  +        self.clearcomp()
  +
  +    def convert(self, s):
  +        self.clear()
  +
  +        map(self.feed, s)
  +        self.finalize()
  +
  +        return u''.join(self.buff)
  +    
  +    def finalize(self):
  +        if self.chosung or self.jungsung or self.jongsung:
  +            self.pushcomp()
  +        if self.word_raw or self.word_comp:
  +            if self.word_valid:
  +                self.buff.append(u''.join(self.word_comp))
  +            else:
  +                self.word_valid = 1
  +                self.buff.append(u''.join(self.word_raw))
  +            
  +            self.word_raw, self.word_comp = [], []
  +
  +    def feed(self, c):
  +        self.word_raw.append(c)
  +        if c in keycodes:
  +            code = keycodemap[c]
  +            if isJaeum(code):
  +                if not self.chosung: # chosung O
  +                    if self.jungsung or self.jongsung:
  +                        self.word_valid = 0
  +                    else:
  +                        self.chosung = code
  +                elif not self.jungsung: # chosung O  jungsung X
  +                    if self.jongsung:
  +                        self.word_valid = 0
  +                    else:
  +                        self.pushcomp()
  +                        self.chosung = code
  +                elif not self.jongsung: # chosung O  jungsung O  jongsung X
  +                    if code not in Jongsung:
  +                        self.pushcomp()
  +                        self.chosung = code
  +                    else:
  +                        self.jongsung = code
  +                else: # full
  +                    trymul = codekeymap[self.jongsung] + c
  +                    if keycodemap.has_key(trymul): # can be multi jongsung
  +                        self.jongsung = keycodemap[trymul]
  +                    else:
  +                        self.pushcomp()
  +                        self.chosung = code
  +            else: # MOEUM...
  +                if not self.jongsung:
  +                    if not self.jungsung: # jungsung X  jongsung X
  +                        self.jungsung = code
  +                    else: # jungsung O  jongsung X
  +                        trymul = codekeymap[self.jungsung] + c
  +                        if keycodemap.has_key(trymul): # can be multi jungsung
  +                            self.jungsung = keycodemap[trymul]
  +                        else:
  +                            self.pushcomp()
  +                            self.jungsung = code
  +                else: # jongsung O
  +                    if len(codekeymap[self.jongsung]) > 1:
  +                        ojong = keycodemap[codekeymap[self.jongsung][:-1]]
  +                        ncho  = keycodemap[codekeymap[self.jongsung][-1]]
  +                        self.jongsung = ojong
  +                        self.pushcomp()
  +                        self.chosung = ncho
  +                        self.jungsung = code
  +                    else:
  +                        njong = self.jongsung
  +                        self.jongsung = u''
  +                        self.pushcomp()
  +                        self.chosung = njong
  +                        self.jungsung = code
  +        else: # non key code
  +            self.finalize()
  +            self.buff.append(c)
  +
  +
  +class Codec(codecs.Codec):
  +
  +    BASECODEC = 'korean.cp949' # fallback codec of decoder
  +
  +    # Unicode to key stroke
  +    def encode(self, data, errors='strict'):
  +        if errors not in ('strict', 'ignore', 'replace'):
  +            raise ValueError, "unknown error handling"
  +
  +        r = []
  +        for c in data:
  +            if c <= u'\u0080':
  +                r.append(c.encode('ascii'))
  +            elif not ishangul(c):
  +                r.append(c.encode(self.BASECODEC, errors=errors))
  +            else:
  +                for k in split(c):
  +                    r.append(codekeymap[k])
  +
  +        r = ''.join(r)
  +        return (r, len(r))
  +
  +    # key stroke to Unicode
  +    def decode(self, data, errors='strict'):
  +        if errors not in ('strict', 'ignore', 'replace'):
  +            raise ValueError, "unknown error handling"
  +
  +        s = unicode(data, self.BASECODEC, errors)
  +        am = Automata_Hangul2()
  +        r = am.convert(s)
  +        return (r, len(r))
  +
  +class StreamWriter(Codec, codecs.StreamWriter):
  +    pass
  +
  +class StreamReader(Codec, codecs.StreamReader):
  +    pass
  +
  +def getregentry():
  +    return (Codec().encode, Codec().decode, StreamReader, StreamWriter)
  
  
  
  1.5       +31 -5     KoreanCodecs/korean/unijohab.py
  
  Index: unijohab.py
  ===================================================================
  RCS file: /cvsroot/koco/KoreanCodecs/korean/unijohab.py,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- unijohab.py	9 Jan 2003 21:35:48 -0000	1.4
  +++ unijohab.py	12 Jan 2003 22:54:12 -0000	1.5
  @@ -17,10 +17,36 @@
   # along with KoreanCodecs; if not, write to the Free Software
   # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   #
  -# $Id: unijohab.py,v 1.4 2003/01/09 21:35:48 perky Exp $
  +# $Id: unijohab.py,v 1.5 2003/01/12 22:54:12 perky Exp $
   #
   
  -try:
  -    from korean.c.unijohab import *
  -except ImportError:
  -    from korean.python.unijohab import *
  +import codecs
  +from korean.hangul import ishangul, disjoint, conjoin
  +
  +class Codec(codecs.Codec):
  +
  +    # Unicode to character buffer
  +    def encode(self, data, errors='strict'):
  +        if errors not in ('strict', 'ignore', 'replace'):
  +            raise ValueError, "unknown error handling"
  +
  +        return disjoint(data).encode('utf-8', errors), len(data)
  +
  +    # character buffer to Unicode
  +    def decode(self, data, errors='strict'):
  +        if errors not in ('strict', 'ignore', 'replace'):
  +            raise ValueError, "unknown error handling"
  +
  +        return conjoin(unicode(data, 'utf-8', errors)), len(data)
  +
  +class StreamWriter(Codec, codecs.StreamWriter):
  +    pass
  +
  +class StreamReader(Codec, codecs.StreamReader):
  +    pass
  +    # XXX: Temporarily None.
  +
  +### encodings module API
  +
  +def getregentry():
  +    return (Codec().encode,Codec().decode,StreamReader,StreamWriter)

[KoCo-CVS] [Commit] KoreanCodecs/korean cp949.py euc_kr.py iso_2022_kr.py johab.py mackorean.py qwer

[KoCo-CVS] [Commit] KoreanCodecs/korean cp949.py euc_kr.py iso_2022_kr.py johab.py mackorean.py qwerty2bul.py unijohab.py hangul.py