[KoCo-CVS] [Commit] KoreanCodecs/korean johab.py
Brought to you by:
perky
From: Hye-Shik C. <pe...@us...> - 2003-01-13 08:10:37
|
perky 03/01/13 00:10:35 Modified: korean johab.py Log: Add PEP293 support to johab codec. Revision Changes Path 1.6 +30 -22 KoreanCodecs/korean/johab.py Index: johab.py =================================================================== RCS file: /cvsroot/koco/KoreanCodecs/korean/johab.py,v retrieving revision 1.5 retrieving revision 1.6 diff -u -r1.5 -r1.6 --- johab.py 12 Jan 2003 22:54:12 -0000 1.5 +++ johab.py 13 Jan 2003 08:10:35 -0000 1.6 @@ -5,7 +5,7 @@ # # KoreanCodecs is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published -# by the Free Software Foundation; either version 2 of the License, or +# by the Free Software Foundation; either version 2.1 of the License, or # (at your option) any later version. # # KoreanCodecs is distributed in the hope that it will be useful, @@ -17,14 +17,16 @@ # along with KoreanCodecs; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # -# $Id: johab.py,v 1.5 2003/01/12 22:54:12 perky Exp $ +# $Id: johab.py,v 1.6 2003/01/13 08:10:35 perky Exp $ # import codecs - +from korean.error_callback import * from korean.hangul import Jaeum, Moeum, ishangul, split, join -encmap, decmap = {}, {} +ENCODING = 'korean.johab' + +encmap, decmap = {}, {} johab2uni_chosung = { 1: u'', 2: Jaeum.G, 3: Jaeum.GG, 4: Jaeum.N, 5: Jaeum.D, 6: Jaeum.DD, 7: Jaeum.L, 8: Jaeum.M, @@ -66,13 +68,16 @@ def encode(self, data, errors='strict'): global encmap - if errors not in ('strict', 'ignore', 'replace'): - raise ValueError, "unknown error handling" - buffer = [] + errcb = lookup_error(errors) + buffer = [] + pos = 0 + size = len(data) + + while pos < size: + c = data[pos] - for c in data: if c < u'\u0080': - buffer.append(c.encode("ascii", errors)) + buffer.append(chr(ord(c))) elif ishangul(c): cho, jung, jong = split(c) # all hangul can success cho, jung, jong = ( @@ -89,10 +94,14 @@ if encmap.has_key(c): buffer.append(encmap[c]) - elif errors == 'replace': - buffer.append('\x84\x41') - elif errors == 'strict': - raise UnicodeError, "cannot map \\u%04x to JOHAB" % ord(c) + else: + exc = UnicodeEncodeError(ENCODING, data, pos, pos+1, + "cannot map \\u%04x to JOHAB" % ord(c)) + repl, pos = errcb(exc) + buffer.append(repl.encode(ENCODING)) # must be 'strict'. + continue + + pos += 1 return (''.join(buffer), len(data)) @@ -100,8 +109,7 @@ def decode(self, data, errors='strict'): global decmap - if errors not in ('strict', 'ignore', 'replace'): - raise ValueError, "unknown error handling" + errcb = lookup_error(errors) buffer = [] data = str(data) # character buffer compatible object @@ -109,7 +117,7 @@ p = 0 while p < size: if data[p] < '\x80': - buffer.append(unicode(data[p], "ascii", errors)) + buffer.append(unichr(ord(data[p]))) p += 1 else: c = data[p:p+2] @@ -137,10 +145,11 @@ buffer.append(decmap[c]) continue - if errors == 'replace': - buffer.append(u'\uFFFD') # REPLACEMENT CHARACTER - elif errors == 'strict': - raise UnicodeError, "unexpected byte 0x%02x%02x found" % tuple(map(ord, c)) + exc = UnicodeDecodeError(ENCODING, data, p-2, p, + "unexpected byte 0x%02x%02x found" % ( + ord(c[0]), ord(c[1]))) + repl, p = errcb(exc) + buffer.append(repl) return (u''.join(buffer), size) @@ -197,8 +206,7 @@ def reset(self): self.data = '' -### encodings module API - def getregentry(): return (Codec().encode,Codec().decode,StreamReader,StreamWriter) +# ex: ts=8 sts=4 et |