[KoCo-CVS] [Commit] KoreanCodecs/korean/python mackorean.py
Brought to you by:
perky
From: Hye-Shik C. <pe...@us...> - 2003-01-10 02:27:09
|
perky 03/01/09 18:27:08 Added: korean/python mackorean.py Log: Add basic implementation of MacKorean codec. Revision Changes Path 1.1 KoreanCodecs/korean/python/mackorean.py Index: mackorean.py =================================================================== # # This file is part of KoreanCodecs. # # Copyright(C) 2002-2003 Hye-Shik Chang <pe...@Fr...>. # # KoreanCodecs is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published # by the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # KoreanCodecs is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License # along with KoreanCodecs; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # $Id: mackorean.py,v 1.1 2003/01/10 02:27:08 perky Exp $ # import codecs from korean.mappings import ksc5601_hangul, appleextension encmap_hangul, decmap_hangul = ksc5601_hangul.encoding_map, ksc5601_hangul.decoding_map encmap_apple, decmap_apple = appleextension.encoding_map, appleextension.decoding_map encmap_ideo, decmap_ideo = {}, {} encmap_misc, decmap_misc = {}, {} class Codec(codecs.Codec): # Unicode to character buffer def encode(self, data, errors='strict'): global encmap_ideo, encmap_misc if errors not in ('strict', 'ignore', 'replace'): raise ValueError, "unknown error handling" buffer = [] p = 0 size = len(data) while p < size: aemap = appleextension.multilevel_encmap relp = 0 while p + relp < size and aemap.has_key(data[p + relp]): aemap = aemap[data[p + relp]] relp += 1 if aemap.has_key(None): buffer.append(aemap[None]) p += relp continue c = data[p] p += 1 if c < u'\u0080': buffer.append(c.encode("ascii", errors)) elif encmap_hangul.has_key(c): buffer.append(encmap_hangul[c]) else: if not encmap_misc: from korean.mappings import ksc5601_misc encmap_misc = ksc5601_misc.encoding_map if encmap_misc.has_key(c): buffer.append(encmap_misc[c]) continue if not encmap_ideo: from korean.mappings import ksc5601_ideograph encmap_ideo = ksc5601_ideograph.encoding_map if encmap_ideo.has_key(c): buffer.append(encmap_ideo[c]) continue if errors == 'replace': buffer.append('\xa1\xa1') elif errors == 'strict': raise UnicodeError, "cannot map \\u%04x to EUC-KR" % ord(c) return (''.join(buffer), len(data)) # character buffer to Unicode def decode(self, data, errors='strict'): global decmap_ideo, decmap_misc if errors not in ('strict', 'ignore', 'replace'): raise ValueError, "unknown error handling" buffer = [] data = str(data) # character buffer compatible object size = len(data) p = 0 while p < size: if data[p] < '\x80': buffer.append(unicode(data[p], "ascii", errors)) p += 1 elif data[p] <= '\xa0' or data[p] == '\xff': if decmap_apple.has_key(data[p]): buffer.append(decmap_apple[data[p]]) p += 1 continue if errors == 'replace': buffer.append(u'\uFFFD') # REPLACEMENT CHARACTER elif errors == 'strict': raise UnicodeError, "unexpected byte %s found" % ( hex(ord(data[p]))) p += 1 else: c = data[p:p+2] p += 2 if len(c) == 2: if decmap_hangul.has_key(c): buffer.append(decmap_hangul[c]) continue elif decmap_apple.has_key(c): buffer.append(decmap_apple[c]) continue if not decmap_misc: from korean.mappings import ksc5601_misc decmap_misc = ksc5601_misc.decoding_map if decmap_misc.has_key(c): buffer.append(decmap_misc[c]) continue if not decmap_ideo: from korean.mappings import ksc5601_ideograph decmap_ideo = ksc5601_ideograph.decoding_map if decmap_ideo.has_key(c): buffer.append(decmap_ideo[c]) continue if errors == 'replace': buffer.append(u'\uFFFD') # REPLACEMENT CHARACTER elif errors == 'strict': raise UnicodeError, "unexpected byte 0x%s found" % ( ''.join(["%02x"%ord(x) for x in c]) ) return (u''.join(buffer), size) from korean.python import euc_kr class StreamWriter(Codec, euc_kr.StreamWriter): pass class StreamReader(Codec, euc_kr.StreamReader): def _read(self, func, size): if size == 0: return u'' if size is None or size < 0: data = self.data + func() self.data = '' else: data = self.data + func(max(size, 2) - len(self.data)) size = len(data) p = 0 while p < size: if data[p] < "\xa1" or data[p] == "\xff": p = p + 1 elif p + 2 <= size: p = p + 2 else: break data, self.data = data[:p], data[p:] return self.decode(data)[0] ### encodings module API def getregentry(): return (Codec().encode,Codec().decode,StreamReader,StreamWriter) # ex: ts=8 sts=4 et |