[KoCo-CVS] [Commit] KoreanCodecs/korean johab.py

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

perky       03/01/13 00:10:35

  Modified:    korean   johab.py
  Log:
  Add PEP293 support to johab codec.

  Revision  Changes    Path
  1.6       +30 -22    KoreanCodecs/korean/johab.py

  Index: johab.py
  ===================================================================
  RCS file: /cvsroot/koco/KoreanCodecs/korean/johab.py,v
  retrieving revision 1.5
  retrieving revision 1.6
  diff -u -r1.5 -r1.6
  --- johab.py	12 Jan 2003 22:54:12 -0000	1.5
  +++ johab.py	13 Jan 2003 08:10:35 -0000	1.6
  @@ -5,7 +5,7 @@
   #
   # KoreanCodecs is free software; you can redistribute it and/or modify
   # it under the terms of the GNU Lesser General Public License as published
  -# by the Free Software Foundation; either version 2 of the License, or
  +# by the Free Software Foundation; either version 2.1 of the License, or
   # (at your option) any later version.
   #
   # KoreanCodecs is distributed in the hope that it will be useful,
  @@ -17,14 +17,16 @@
   # along with KoreanCodecs; if not, write to the Free Software
   # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   #
  -# $Id: johab.py,v 1.5 2003/01/12 22:54:12 perky Exp $
  +# $Id: johab.py,v 1.6 2003/01/13 08:10:35 perky Exp $
   #

   import codecs
  -
  +from korean.error_callback import *
   from korean.hangul import Jaeum, Moeum, ishangul, split, join
  -encmap, decmap = {}, {}

  +ENCODING = 'korean.johab'
  +
  +encmap, decmap = {}, {}
   johab2uni_chosung = {
       1: u'',         2: Jaeum.G,     3: Jaeum.GG,    4: Jaeum.N,
       5: Jaeum.D,     6: Jaeum.DD,    7: Jaeum.L,     8: Jaeum.M,
  @@ -66,13 +68,16 @@
       def encode(self, data, errors='strict'):
           global encmap

  -        if errors not in ('strict', 'ignore', 'replace'):
  -            raise ValueError, "unknown error handling"
  -        buffer = []
  +        errcb   = lookup_error(errors)
  +        buffer  = []
  +        pos     = 0
  +        size    = len(data)
  +
  +        while pos < size: 
  +            c = data[pos]

  -        for c in data:
               if c < u'\u0080':
  -                buffer.append(c.encode("ascii", errors))
  +                buffer.append(chr(ord(c)))
               elif ishangul(c):
                   cho, jung, jong = split(c) # all hangul can success
                   cho, jung, jong = (
  @@ -89,10 +94,14 @@

                   if encmap.has_key(c):
                       buffer.append(encmap[c])
  -                elif errors == 'replace':
  -                    buffer.append('\x84\x41')
  -                elif errors == 'strict':
  -                    raise UnicodeError, "cannot map \\u%04x to JOHAB" % ord(c)
  +                else:
  +                    exc = UnicodeEncodeError(ENCODING, data, pos, pos+1,
  +                            "cannot map \\u%04x to JOHAB" % ord(c))
  +                    repl, pos = errcb(exc)
  +                    buffer.append(repl.encode(ENCODING)) # must be 'strict'.
  +                    continue
  +
  +            pos += 1

           return (''.join(buffer), len(data))

  @@ -100,8 +109,7 @@
       def decode(self, data, errors='strict'):
           global decmap

  -        if errors not in ('strict', 'ignore', 'replace'):
  -            raise ValueError, "unknown error handling"
  +        errcb = lookup_error(errors)

           buffer = []
           data = str(data) # character buffer compatible object
  @@ -109,7 +117,7 @@
           p = 0
           while p < size:
               if data[p] < '\x80':
  -                buffer.append(unicode(data[p], "ascii", errors))
  +                buffer.append(unichr(ord(data[p])))
                   p += 1
               else:
                   c = data[p:p+2]
  @@ -137,10 +145,11 @@
                           buffer.append(decmap[c])
                           continue

  -                if errors == 'replace':
  -                    buffer.append(u'\uFFFD') # REPLACEMENT CHARACTER
  -                elif errors == 'strict':
  -                    raise UnicodeError, "unexpected byte 0x%02x%02x found" % tuple(map(ord, c))
  +                exc = UnicodeDecodeError(ENCODING, data, p-2, p,
  +                        "unexpected byte 0x%02x%02x found" % (
  +                            ord(c[0]), ord(c[1])))
  +                repl, p = errcb(exc)
  +                buffer.append(repl)

           return (u''.join(buffer), size)

  @@ -197,8 +206,7 @@
       def reset(self):
           self.data = ''

  -### encodings module API
  -
   def getregentry():
       return (Codec().encode,Codec().decode,StreamReader,StreamWriter)

  +# ex: ts=8 sts=4 et