[KoCo-CVS] [Commit] KoreanCodecs/korean hangul.py
Brought to you by:
perky
From: Chang <pe...@us...> - 2002-04-24 03:36:03
|
perky 02/04/23 20:36:00 Modified: korean hangul.py Log: - Move hangul python implementation into python/ - Added hangul.format, the hangul adaptive formatter Revision Changes Path 1.3 +4 -165 KoreanCodecs/korean/hangul.py Index: hangul.py =================================================================== RCS file: /cvsroot/koco/KoreanCodecs/korean/hangul.py,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- hangul.py 8 Apr 2002 12:41:41 -0000 1.2 +++ hangul.py 24 Apr 2002 03:36:00 -0000 1.3 @@ -1,165 +1,4 @@ -#!/usr/local/bin/python -# ex:ts=4 -# -# Unicode hangul abstractive controller -# -# written by Hye-Shik Chang <pe...@fa...> -# -# Unicode Hangul Code-Area Specifications: -# http://www.unicode.org/charts/PDF/UAC00.pdf -# -# Jamo Short Name property confirms to sections 3.1 and 4.4 of Unicode 3.2.0 -# ftp://ftp.unicode.org/Public/UNIDATA/Jamo.txt -# -# ---------------------------------------------------------------------------- -# "THE BEER-WARE LICENSE" (Revision 42): -# <pe...@fa...> wrote this file. As long as you retain this notice you -# can do whatever you want with this stuff. If we meet some day, and you think -# this stuff is worth it, you can buy me a beer in return. Hye-Shik Chang -# ---------------------------------------------------------------------------- -# -# $LinuxKorea: UnicodeHangul.py,v 1.3 2001/08/04 05:51:29 perky Exp $' -# $Id: hangul.py,v 1.2 2002/04/08 12:41:41 perky Exp $ -# - -class UnicodeHangulError(Exception): - - def __init__ (self, msg): - self.msg = msg - - def __repr__ (self): - return self.msg - - __str__ = __repr__ - -Null = u'' - -class Jaeum: # XXX: 1100-1159 Old Jaeum need? - - Codes = (u'\u3131', u'\u3132', u'\u3133', u'\u3134', u'\u3135', u'\u3136', - # G GG GS N NJ NH - u'\u3137', u'\u3138', u'\u3139', u'\u313a', u'\u313b', u'\u313c', - # D DD L LG LM LB - u'\u313d', u'\u313e', u'\u313f', u'\u3140', u'\u3141', u'\u3142', - # LS LT LP LH M B - u'\u3143', u'\u3144', u'\u3145', u'\u3146', u'\u3147', u'\u3148', - # BB BS S SS NG J - u'\u3149', u'\u314a', u'\u314b', u'\u314c', u'\u314d', u'\u314e') - # JJ C K T P H - Width = len(Codes) - G, GG, GS, N, NJ, NH, D, DD, L, LG, LM, LB, LS, LT, LP, LH, M, B, \ - BB, BS, S, SS, NG, J, JJ, C, K, T, P, H = Codes - Chosung = [G, GG, N, D, DD, L, M, B, BB, S, SS, NG, J, JJ, C, K, T, P, H] - Jongsung = [Null, G, GG, GS, N, NJ, NH, D, L, LG, LM, LB, LS, LT, \ - LP, LH, M, B, BS, S, SS, NG, J, C, K, T, P, H] - MultiElement = { - GG: (G, G), GS: (G, S), NJ: (N, J), NH: (N, H), DD: (D, D), - LG: (L, G), LM: (L, M), LB: (L, B), LS: (L, S), LT: (L, T), - LP: (L, P), LH: (L, H), BB: (B, B), BS: (B, S), SS: (S, S), - JJ: (J, J) - } - - -class Moeum: # XXX: 1161-117f Old Moeum need? - - Codes = (u'\u314f', u'\u3150', u'\u3151', u'\u3152', u'\u3153', u'\u3154', - # A AE YA YAE EO E - u'\u3155', u'\u3156', u'\u3157', u'\u3158', u'\u3159', u'\u315a', - # YEO YE O WA WAE OE - u'\u315b', u'\u315c', u'\u315d', u'\u315e', u'\u315f', u'\u3160', - # YO U WEO WE WI YU - u'\u3161', u'\u3162', u'\u3163') - # EU YI I - Width = len(Codes) - A, AE, YA, YAE, EO, E, YEO, YE, O, WA, WAE, OE, YO, \ - U, WEO, WE, WI, YU, EU, YI, I = Codes - Jungsung = list(Codes) - MultiElement = { - AE: (A, I), YAE: (YA, I), YE: (YEO, I), WA: (O, A), WAE: (O, A, I), - OE: (O, I), WEO: (U, EO), WE: (U, E), WI: (U, I), YI: (EU, I) - } - - -# Aliases for your convinience -Chosung = Jaeum.Chosung -Jungsung = Moeum.Jungsung -Jongsung = Jaeum.Jongsung - -isJaeum = lambda c: c in Jaeum.Codes -isMoeum = lambda c: c in Moeum.Codes - -# Unicode Hangul Syllables Characteristics -zone = (u'\uAC00', u'\uD7A3') -splitters = [ ( len(Jongsung)*len(Jungsung), Chosung ), - ( len(Jongsung), Jungsung ), - ( 1, Jongsung ) ] - -ishangul = ( - lambda code: - zone[0] <= code <= zone[1] or - code in Jaeum.Codes or - code in Moeum.Codes -) - -def join(codes): - """ Join function which makes hangul syllable from jamos """ - if len(codes) is not 3: - raise UnicodeHangulError("needs 3-element tuple") - if not codes[0] or not codes[1]: # single jamo - return codes[0] or codes[1] - - r = ord(zone[0]) - codes = codes[:] # simple copy :D - for multiplier, codeset in splitters: - r = r + multiplier*codeset.index(codes.pop(0)) - - return unichr(r) - -def split(code): - """ Split function which splits hangul syllable into jamos """ - if len(code) != 1 or not ishangul(code): - raise UnicodeHangulError("needs 1 hangul letter") - if code in Jaeum.Codes: - return [code, Null, Null] - if code in Moeum.Codes: - return [Null, code, Null] - - code = ord(code) - ord(zone[0]) - r = [] - for divider, codeset in splitters: - value, code = code / divider, code % divider - r.append(codeset[value]) - return r - -def dividestring(str, intoelements=0): - if type(str) is not type(u''): - raise UnicodeHangulError("needs unicode string") - - r = u'' - for char in str: - if ishangul(char): - elems = split(char) - for elem in elems: - for htype in (Jaeum, Moeum, None): - if htype == None: - r += elem - elif intoelements and \ - htype.MultiElement.has_key(elem): - r += u''.join(htype.MultiElement[elem]) - break - else: - r += char - - return r - - -if __name__ == '__main__': - - print ( join([Jaeum.P, Moeum.EO, Null]) + \ - join([Jaeum.K, Moeum.I, Null]) + \ - join([Jaeum.JJ, Moeum.A, Jaeum.NG]) ).encode("utf-8") - - while 1: - code = raw_input(">>> ") - print dividestring(unicode(code, "utf-8"), 1).encode("utf-8") - +try: + from korean.c.hangul import * +except: + from korean.python.hangul import * |