[KoCo-CVS] [Commit] KoreanCodecs-Pure/korean cp949.py euc_kr.py johab.py
Brought to you by:
perky
From: Hye-Shik C. <pe...@us...> - 2003-07-23 20:31:44
|
perky 03/07/23 13:31:43 Modified: korean cp949.py euc_kr.py johab.py Log: Fix for new mappings Revision Changes Path 1.2 +4 -4 KoreanCodecs-Pure/korean/cp949.py Index: cp949.py =================================================================== RCS file: /cvsroot/koco/KoreanCodecs-Pure/korean/cp949.py,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- cp949.py 23 Jul 2003 19:09:50 -0000 1.1 +++ cp949.py 23 Jul 2003 20:31:43 -0000 1.2 @@ -24,15 +24,15 @@ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # -# $Id: cp949.py,v 1.1 2003/07/23 19:09:50 perky Exp $ +# $Id: cp949.py,v 1.2 2003/07/23 20:31:43 perky Exp $ # from korean import euc_kr -from korean.mappings import uhc +from korean.mappings import cp949ext class Codec(euc_kr.Codec): - uhc_encmap = uhc.encoding_map - uhc_decmap = uhc.decoding_map + uhc_encmap = cp949ext.cp949extencmap + uhc_decmap = cp949ext.cp949extdecmap class StreamWriter(Codec, euc_kr.StreamWriter): pass 1.2 +10 -44 KoreanCodecs-Pure/korean/euc_kr.py Index: euc_kr.py =================================================================== RCS file: /cvsroot/koco/KoreanCodecs-Pure/korean/euc_kr.py,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- euc_kr.py 23 Jul 2003 19:09:50 -0000 1.1 +++ euc_kr.py 23 Jul 2003 20:31:43 -0000 1.2 @@ -24,16 +24,11 @@ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # -# $Id: euc_kr.py,v 1.1 2003/07/23 19:09:50 perky Exp $ +# $Id: euc_kr.py,v 1.2 2003/07/23 20:31:43 perky Exp $ # import codecs - -from korean.mappings import ksc5601_hangul -encmap_hangul = ksc5601_hangul.encoding_map -decmap_hangul = ksc5601_hangul.decoding_map -encmap_ideo, decmap_ideo = {}, {} -encmap_misc, decmap_misc = {}, {} +from korean.mappings.ksx1001 import * class Codec(codecs.Codec): @@ -51,29 +46,14 @@ for c in data: if c < u'\u0080': buffer.append(c.encode("ascii", errors)) - elif encmap_hangul.has_key(c): - buffer.append(encmap_hangul[c]) + elif ksx1001encmap.has_key(c): + buffer.append(ksx1001encmap[c]) elif self.uhc_encmap.has_key(c): buffer.append(self.uhc_encmap[c]) - else: - if not encmap_misc: - from korean.mappings import ksc5601_misc - encmap_misc = ksc5601_misc.encoding_map - if encmap_misc.has_key(c): - buffer.append(encmap_misc[c]) - continue - - if not encmap_ideo: - from korean.mappings import ksc5601_ideograph - encmap_ideo = ksc5601_ideograph.encoding_map - if encmap_ideo.has_key(c): - buffer.append(encmap_ideo[c]) - continue - - if errors == 'replace': - buffer.append('\xa1\xa1') - elif errors == 'strict': - raise UnicodeError, "cannot map \\u%04x to EUC-KR" % ord(c) + elif errors == 'replace': + buffer.append('?') + elif errors == 'strict': + raise UnicodeError, "cannot map \\u%04x to EUC-KR" % ord(c) return (''.join(buffer), len(data)) @@ -96,25 +76,11 @@ c = data[p:p+2] p += 2 if len(c) == 2: - if decmap_hangul.has_key(c): - buffer.append(decmap_hangul[c]) + if ksx1001decmap.has_key(c): + buffer.append(ksx1001decmap[c]) continue elif self.uhc_decmap.has_key(c): buffer.append(self.uhc_decmap[c]) - continue - - if not decmap_misc: - from korean.mappings import ksc5601_misc - decmap_misc = ksc5601_misc.decoding_map - if decmap_misc.has_key(c): - buffer.append(decmap_misc[c]) - continue - - if not decmap_ideo: - from korean.mappings import ksc5601_ideograph - decmap_ideo = ksc5601_ideograph.decoding_map - if decmap_ideo.has_key(c): - buffer.append(decmap_ideo[c]) continue if errors == 'replace': 1.2 +43 -20 KoreanCodecs-Pure/korean/johab.py Index: johab.py =================================================================== RCS file: /cvsroot/koco/KoreanCodecs-Pure/korean/johab.py,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- johab.py 23 Jul 2003 19:09:52 -0000 1.1 +++ johab.py 23 Jul 2003 20:31:43 -0000 1.2 @@ -24,11 +24,11 @@ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # -# $Id: johab.py,v 1.1 2003/07/23 19:09:52 perky Exp $ +# $Id: johab.py,v 1.2 2003/07/23 20:31:43 perky Exp $ # import codecs - +from korean.mappings.ksx1001 import * from korean.hangul import Jaeum, Moeum, ishangul, split, join encmap, decmap = {}, {} @@ -89,17 +89,26 @@ ) code = 0x8000 | (cho<<10) | (jung<<5) | jong buffer.append(chr(code>>8) + chr(code&0xFF)) - else: - if not encmap: - from korean.mappings import johab_ideograph - encmap = johab_ideograph.encoding_map - - if encmap.has_key(c): - buffer.append(encmap[c]) - elif errors == 'replace': - buffer.append('\x84\x41') - elif errors == 'strict': - raise UnicodeError, "cannot map \\u%04x to JOHAB" % ord(c) + elif ksx1001encmap.has_key(c): + c1, c2 = map(ord, ksx1001encmap[c]) + if c1 < 0xca: + c1 = c1 + 0x111 + else: + c1 = c1 + 0xf6 + if c1 & 1: + c2 = c2 - 0x43 + else: + c2 = c2 - 0xa1 + c1 = c1 >> 1 + if c2 < 0x4e: + c2 += 0x31 + else: + c2 += 0x43 + buffer.append(chr(c1) + chr(c2)) + elif errors == 'replace': + buffer.append('\x84\x41') + elif errors == 'strict': + raise UnicodeError, "cannot map \\u%04x to JOHAB" % ord(c) return (''.join(buffer), len(data)) @@ -135,14 +144,26 @@ johab2uni_jongsung[jong] ]) ) continue - - if not decmap: - from korean.mappings import johab_ideograph - decmap = johab_ideograph.decoding_map - if decmap.has_key(c): - buffer.append(decmap[c]) - continue + c1, c2 = ord(c[0]), ord(c[1]) + if ((0x31 <= c2 <= 0x7e or 0x91 <= c2 <= 0xfe) + and not (c1 == 0xda and (0xa1 <= c2 <= 0xd3))): + if c1 < 0xe0: + t1 = 2 * (c1 - 0xd9) + else: + t1 = 2 * c1 - 0x197 + if c2 < 0x91: + t2 = c2 - 0x31 + else: + t2 = c2 - 0x43 + if t2 < 0x5e: + ch = chr(t1 + 0xa1) + chr(t2 + 0xa1) + else: + ch = chr(t1 + 0xa2) + chr(t2 + 0x43) + + if ksx1001decmap.has_key(ch): + buffer.append(ksx1001decmap[ch]) + continue if errors == 'replace': buffer.append(u'\uFFFD') # REPLACEMENT CHARACTER @@ -208,3 +229,5 @@ def getregentry(): return (Codec().encode,Codec().decode,StreamReader,StreamWriter) + +# ex: ts=8 sts=4 et sw=4 |