Thread: [KoCo-CVS] [Commit] KoreanCodecs/korean/python hangul.py
Brought to you by:
perky
From: Chang <pe...@us...> - 2002-04-24 03:36:03
|
perky 02/04/23 20:36:01 Added: korean/python hangul.py Log: - Move hangul python implementation into python/ - Added hangul.format, the hangul adaptive formatter Revision Changes Path 1.1 KoreanCodecs/korean/python/hangul.py Index: hangul.py =================================================================== #!/usr/local/bin/python # ex:ts=4 # # Unicode hangul abstractive controller # # written by Hye-Shik Chang <pe...@fa...> # # # Unicode Hangul Code-Area Specifications: # http://www.unicode.org/charts/PDF/UAC00.pdf # # Jamo Short Name Conventions: # http://www.unicode.org/unicode/uni2book/ch04.pdf (section 4.4) # # Conjoining Jamo Behavior: # http://www.unicode.org/unicode/uni2book/ch03.pdf (section 3.11) # # $Id: hangul.py,v 1.1 2002/04/24 03:36:01 perky Exp $ # class UnicodeHangulError(Exception): def __init__ (self, msg): self.msg = msg def __repr__ (self): return self.msg __str__ = __repr__ Null = u'' class Jaeum: # XXX: 1100-1159 Old Jaeum need? Codes = (u'\u3131', u'\u3132', u'\u3133', u'\u3134', u'\u3135', u'\u3136', # G GG GS N NJ NH u'\u3137', u'\u3138', u'\u3139', u'\u313a', u'\u313b', u'\u313c', # D DD L LG LM LB u'\u313d', u'\u313e', u'\u313f', u'\u3140', u'\u3141', u'\u3142', # LS LT LP LH M B u'\u3143', u'\u3144', u'\u3145', u'\u3146', u'\u3147', u'\u3148', # BB BS S SS NG J u'\u3149', u'\u314a', u'\u314b', u'\u314c', u'\u314d', u'\u314e') # JJ C K T P H Width = len(Codes) G, GG, GS, N, NJ, NH, D, DD, L, LG, LM, LB, LS, LT, LP, LH, M, B, \ BB, BS, S, SS, NG, J, JJ, C, K, T, P, H = Codes Chosung = [G, GG, N, D, DD, L, M, B, BB, S, SS, NG, J, JJ, C, K, T, P, H] Jongsung = [Null, G, GG, GS, N, NJ, NH, D, L, LG, LM, LB, LS, LT, \ LP, LH, M, B, BS, S, SS, NG, J, C, K, T, P, H] MultiElement = { GG: (G, G), GS: (G, S), NJ: (N, J), NH: (N, H), DD: (D, D), LG: (L, G), LM: (L, M), LB: (L, B), LS: (L, S), LT: (L, T), LP: (L, P), LH: (L, H), BB: (B, B), BS: (B, S), SS: (S, S), JJ: (J, J) } class Moeum: # XXX: 1161-117f Old Moeum need? Codes = (u'\u314f', u'\u3150', u'\u3151', u'\u3152', u'\u3153', u'\u3154', # A AE YA YAE EO E u'\u3155', u'\u3156', u'\u3157', u'\u3158', u'\u3159', u'\u315a', # YEO YE O WA WAE OE u'\u315b', u'\u315c', u'\u315d', u'\u315e', u'\u315f', u'\u3160', # YO U WEO WE WI YU u'\u3161', u'\u3162', u'\u3163') # EU YI I Width = len(Codes) A, AE, YA, YAE, EO, E, YEO, YE, O, WA, WAE, OE, YO, \ U, WEO, WE, WI, YU, EU, YI, I = Codes Jungsung = list(Codes) MultiElement = { AE: (A, I), YAE: (YA, I), YE: (YEO, I), WA: (O, A), WAE: (O, A, I), OE: (O, I), WEO: (U, EO), WE: (U, E), WI: (U, I), YI: (EU, I) } # Aliases for your convinience Chosung = Jaeum.Chosung Jungsung = Moeum.Jungsung Jongsung = Jaeum.Jongsung for name, code in Jaeum.__dict__.items() + Moeum.__dict__.items(): if name.isupper() and len(name) <= 3: exec "%s = %s" % (name, repr(code)) isJaeum = lambda c: c in Jaeum.Codes isMoeum = lambda c: c in Moeum.Codes # Unicode Hangul Syllables Characteristics zone = (u'\uAC00', u'\uD7A3') splitters = [ ( len(Jongsung)*len(Jungsung), Chosung ), ( len(Jongsung), Jungsung ), ( 1, Jongsung ) ] ishangul = ( lambda code: zone[0] <= code <= zone[1] or code in Jaeum.Codes or code in Moeum.Codes ) # Alternative Suffixes ALT_SUFFIXES = { u'\uc744': (u'\ub97c', u'\uc744'), # reul, eul u'\ub97c': (u'\ub97c', u'\uc744'), # reul, eul u'\uc740': (u'\ub294', u'\uc740'), # neun, eun u'\ub294': (u'\ub294', u'\uc740'), # neun, eun u'\uc774': (u'\uac00', u'\uc774'), # yi, ga u'\uac00': (u'\uac00', u'\uc774'), # yi, ga u'\uc640': (u'\uc640', u'\uacfc'), # wa, gwa u'\uacfc': (u'\uc640', u'\uacfc'), # wa, gwa } # Ida-Varitaion Suffixes IDA_SUFFIXES = { u'(\uc774)': (u'', u'\uc774'), # (yi)da u'(\uc785)': (17, u'\uc785'), # (ip)nida u'(\uc778)': (4, u'\uc778'), # (in)- } def join(codes): """ Join function which makes hangul syllable from jamos """ if len(codes) is not 3: raise UnicodeHangulError("needs 3-element tuple") if not codes[0] or not codes[1]: # single jamo return codes[0] or codes[1] r = ord(zone[0]) codes = codes[:] # simple copy :D for multiplier, codeset in splitters: r = r + multiplier*codeset.index(codes.pop(0)) return unichr(r) def split(code): """ Split function which splits hangul syllable into jamos """ if len(code) != 1 or not ishangul(code): raise UnicodeHangulError("needs 1 hangul letter") if code in Jaeum.Codes: return [code, Null, Null] if code in Moeum.Codes: return [Null, code, Null] code = ord(code) - ord(zone[0]) r = [] for divider, codeset in splitters: value, code = code / divider, code % divider r.append(codeset[value]) return r def dividestring(str, intoelements=0): if type(str) is not type(u''): raise UnicodeHangulError("needs unicode string") r = u'' for char in str: if ishangul(char): elems = split(char) for elem in elems: for htype in (Jaeum, Moeum, None): if htype == None: r += elem elif intoelements and \ htype.MultiElement.has_key(elem): r += u''.join(htype.MultiElement[elem]) break else: r += char return r def _has_final(c): # for internal use only if u'\uac00' <= c <= u'\ud7a3': # hangul return 1, (ord(c) - 0xac00) % 28 > 0 else: return 0, c in u'013678.bklmnptMN' def format(fmtstr, args): if not isinstance(args, dict): argget = iter(args).next else: argget = lambda:args obuff = [] ncur = escape = fmtinpth = 0 ofmt = fmt = u'' while ncur < len(fmtstr): c = fmtstr[ncur] if escape: obuff.append(c) escape = 0 ofmt = u'' elif c == u'\\': escape = 1 elif fmt: fmt += c if not fmtinpth and c.isalpha(): ofmt = fmt % argget() obuff.append(ofmt) fmt = u'' elif fmtinpth and c == u')': fmtinpth = 0 elif c == u'(': fmtinpth = 1 elif c == u'%': obuff.append(u'%') elif c == u'%': fmt += c ofmt = u'' else: if ofmt and ALT_SUFFIXES.has_key(c): obuff.append(ALT_SUFFIXES[c][ _has_final(ofmt[-1])[1] and 1 or 0 ]) elif ofmt and IDA_SUFFIXES.has_key(fmtstr[ncur:ncur+3]): sel = IDA_SUFFIXES[fmtstr[ncur:ncur+3]] ishan, hasfinal = _has_final(ofmt[-1]) if hasfinal: obuff.append(sel[1]) elif ishan: if sel[0]: obuff[-1] = obuff[-1][:-1] + unichr(ord(ofmt[-1]) + sel[0]) else: obuff.append(sel[0] and sel[1]) ncur += 2 else: obuff.append(c) ofmt = u'' ncur += 1 return u''.join(obuff) if __name__ == '__main__': print ( join([Jaeum.P, Moeum.EO, Null]) + \ join([Jaeum.K, Moeum.I, Null]) + \ join([Jaeum.JJ, Moeum.A, Jaeum.NG]) ).encode("utf-8") while 1: code = raw_input(">>> ") print dividestring(unicode(code, "utf-8"), 1).encode("utf-8") |
From: Chang <pe...@us...> - 2002-04-24 05:00:05
|
perky 02/04/23 22:00:03 Modified: korean/python hangul.py Log: - Simpilify join, split function implementation. Revision Changes Path 1.2 +16 -16 KoreanCodecs/korean/python/hangul.py Index: hangul.py =================================================================== RCS file: /cvsroot/koco/KoreanCodecs/korean/python/hangul.py,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- hangul.py 24 Apr 2002 03:36:01 -0000 1.1 +++ hangul.py 24 Apr 2002 05:00:03 -0000 1.2 @@ -15,7 +15,7 @@ # Conjoining Jamo Behavior: # http://www.unicode.org/unicode/uni2book/ch03.pdf (section 3.11) # -# $Id: hangul.py,v 1.1 2002/04/24 03:36:01 perky Exp $ +# $Id: hangul.py,v 1.2 2002/04/24 05:00:03 perky Exp $ # class UnicodeHangulError(Exception): @@ -90,9 +90,9 @@ # Unicode Hangul Syllables Characteristics zone = (u'\uAC00', u'\uD7A3') -splitters = [ ( len(Jongsung)*len(Jungsung), Chosung ), - ( len(Jongsung), Jungsung ), - ( 1, Jongsung ) ] +NCHOSUNG = len(Chosung) +NJUNGSUNG = len(Jungsung) +NJONGSUNG = len(Jongsung) ishangul = ( lambda code: @@ -127,12 +127,12 @@ if not codes[0] or not codes[1]: # single jamo return codes[0] or codes[1] - r = ord(zone[0]) - codes = codes[:] # simple copy :D - for multiplier, codeset in splitters: - r = r + multiplier*codeset.index(codes.pop(0)) - - return unichr(r) + return unichr( + 0xac00 + ( + Chosung.index(codes[0])*NJUNGSUNG + + Jungsung.index(codes[1]) + )*NJONGSUNG + Jongsung.index(codes[2]) + ) def split(code): """ Split function which splits hangul syllable into jamos """ @@ -143,12 +143,12 @@ if code in Moeum.Codes: return [Null, code, Null] - code = ord(code) - ord(zone[0]) - r = [] - for divider, codeset in splitters: - value, code = code / divider, code % divider - r.append(codeset[value]) - return r + code = ord(code) - 0xac00 + return [ + Chosung[int(code / (NJUNGSUNG*NJONGSUNG))], # Python3000 safe + Jungsung[int(code / NJONGSUNG) % NJUNGSUNG], + Jongsung[code % NJONGSUNG] + ] def dividestring(str, intoelements=0): if type(str) is not type(u''): |
From: Chang <pe...@us...> - 2002-04-24 07:33:13
|
perky 02/04/24 00:20:27 Modified: korean/python hangul.py Log: - Add hangul.conjoin and hangul.disjoint functions (this function set provides converter between U+AC00 and U+1100 pages) Revision Changes Path 1.3 +58 -9 KoreanCodecs/korean/python/hangul.py Index: hangul.py =================================================================== RCS file: /cvsroot/koco/KoreanCodecs/korean/python/hangul.py,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- hangul.py 24 Apr 2002 05:00:03 -0000 1.2 +++ hangul.py 24 Apr 2002 07:20:27 -0000 1.3 @@ -15,7 +15,7 @@ # Conjoining Jamo Behavior: # http://www.unicode.org/unicode/uni2book/ch03.pdf (section 3.11) # -# $Id: hangul.py,v 1.2 2002/04/24 05:00:03 perky Exp $ +# $Id: hangul.py,v 1.3 2002/04/24 07:20:27 perky Exp $ # class UnicodeHangulError(Exception): @@ -30,7 +30,7 @@ Null = u'' -class Jaeum: # XXX: 1100-1159 Old Jaeum need? +class Jaeum: Codes = (u'\u3131', u'\u3132', u'\u3133', u'\u3134', u'\u3135', u'\u3136', # G GG GS N NJ NH @@ -56,7 +56,7 @@ } -class Moeum: # XXX: 1161-117f Old Moeum need? +class Moeum: Codes = (u'\u314f', u'\u3150', u'\u3151', u'\u3152', u'\u3153', u'\u3154', # A AE YA YAE EO E @@ -75,7 +75,6 @@ OE: (O, I), WEO: (U, EO), WE: (U, E), WI: (U, I), YI: (EU, I) } - # Aliases for your convinience Chosung = Jaeum.Chosung Jungsung = Moeum.Jungsung @@ -89,14 +88,19 @@ isMoeum = lambda c: c in Moeum.Codes # Unicode Hangul Syllables Characteristics -zone = (u'\uAC00', u'\uD7A3') +ZONE = (u'\uAC00', u'\uD7A3') NCHOSUNG = len(Chosung) NJUNGSUNG = len(Jungsung) NJONGSUNG = len(Jongsung) +JBASE_CHOSUNG = u'\u1100' +JBASE_JUNGSUNG = u'\u1161' +JBASE_JONGSUNG = u'\u11A8' +CHOSUNG_FILLER = u'\u115F' +JUNGSUNG_FILLER = u'\u1160' ishangul = ( lambda code: - zone[0] <= code <= zone[1] or + ZONE[0] <= code <= ZONE[1] or code in Jaeum.Codes or code in Moeum.Codes ) @@ -150,10 +154,55 @@ Jongsung[code % NJONGSUNG] ] -def dividestring(str, intoelements=0): - if type(str) is not type(u''): - raise UnicodeHangulError("needs unicode string") +def conjoin(s): + obuff = [] + ncur = 0 + + while ncur < len(s): + c = s[ncur] + if JBASE_CHOSUNG <= c <= u'\u1112' or c == CHOSUNG_FILLER: # starts with chosung + if len(s) > ncur+1 and JUNGSUNG_FILLER <= s[ncur+1] <= u'\u1175': + cho = Chosung[ord(c) - ord(JBASE_CHOSUNG)] + jung = Jungsung[ord(s[ncur+1]) - ord(JBASE_JUNGSUNG)] + if len(s) > ncur+2 and JBASE_JONGSUNG <= s[ncur+2] <= u'\u11C2': + jong = Jongsung[ord(s[ncur+2]) - ord(JBASE_JONGSUNG) + 1] + ncur += 2 + else: + jong = Null + ncur += 1 + obuff.append(join([cho, jung, jong])) + else: + obuff.append(join([Chosung[ord(c) - ord(JBASE_CHOSUNG)], Null, Null])) + elif JBASE_JUNGSUNG <= c <= u'\u1175': + obuff.append(join([Null, Jungsung[ord(c) - ord(JBASE_JUNGSUNG)], Null])) + else: + obuff.append(c) + ncur += 1 + + return u''.join(obuff) +def disjoint(s): + obuff = [] + for c in s: + if ishangul(c): + cho, jung, jong = split(c) + if cho: + obuff.append( unichr(ord(JBASE_CHOSUNG) + Chosung.index(cho)) ) + else: + obuff.append( CHOSUNG_FILLER ) + + if jung: + obuff.append( unichr(ord(JBASE_JUNGSUNG) + Jungsung.index(jung)) ) + else: + obuff.append( JUNGSUNG_FILLER ) + + if jong: + obuff.append( unichr(ord(JBASE_JONGSUNG) + Jongsung.index(jong) - 1) ) + else: + obuff.append(c) + return u''.join(obuff) + +def dividestring(str, intoelements=0): r = u'' for char in str: if ishangul(char): |
From: Chang <pe...@us...> - 2002-04-25 03:46:37
|
perky 02/04/24 20:46:34 Modified: korean/python hangul.py Log: - Clean up namespace - Change hangul.split's return type to Tuple (make compatible with c.hangul) Revision Changes Path 1.4 +8 -7 KoreanCodecs/korean/python/hangul.py Index: hangul.py =================================================================== RCS file: /cvsroot/koco/KoreanCodecs/korean/python/hangul.py,v retrieving revision 1.3 retrieving revision 1.4 diff -u -r1.3 -r1.4 --- hangul.py 24 Apr 2002 07:20:27 -0000 1.3 +++ hangul.py 25 Apr 2002 03:46:34 -0000 1.4 @@ -15,7 +15,7 @@ # Conjoining Jamo Behavior: # http://www.unicode.org/unicode/uni2book/ch03.pdf (section 3.11) # -# $Id: hangul.py,v 1.3 2002/04/24 07:20:27 perky Exp $ +# $Id: hangul.py,v 1.4 2002/04/25 03:46:34 perky Exp $ # class UnicodeHangulError(Exception): @@ -83,6 +83,7 @@ for name, code in Jaeum.__dict__.items() + Moeum.__dict__.items(): if name.isupper() and len(name) <= 3: exec "%s = %s" % (name, repr(code)) +del name, code isJaeum = lambda c: c in Jaeum.Codes isMoeum = lambda c: c in Moeum.Codes @@ -105,7 +106,7 @@ code in Moeum.Codes ) -# Alternative Suffixes +# Alternative Suffixes : do not use outside ALT_SUFFIXES = { u'\uc744': (u'\ub97c', u'\uc744'), # reul, eul u'\ub97c': (u'\ub97c', u'\uc744'), # reul, eul @@ -117,7 +118,7 @@ u'\uacfc': (u'\uc640', u'\uacfc'), # wa, gwa } -# Ida-Varitaion Suffixes +# Ida-Varitaion Suffixes : do not use outside IDA_SUFFIXES = { u'(\uc774)': (u'', u'\uc774'), # (yi)da u'(\uc785)': (17, u'\uc785'), # (ip)nida @@ -143,16 +144,16 @@ if len(code) != 1 or not ishangul(code): raise UnicodeHangulError("needs 1 hangul letter") if code in Jaeum.Codes: - return [code, Null, Null] + return (code, Null, Null) if code in Moeum.Codes: - return [Null, code, Null] + return (Null, code, Null) code = ord(code) - 0xac00 - return [ + return ( Chosung[int(code / (NJUNGSUNG*NJONGSUNG))], # Python3000 safe Jungsung[int(code / NJONGSUNG) % NJUNGSUNG], Jongsung[code % NJONGSUNG] - ] + ) def conjoin(s): obuff = [] |
From: Chang <pe...@us...> - 2002-04-25 05:01:10
|
perky 02/04/24 22:01:06 Modified: korean/python hangul.py Log: - Remove hangul.dividestring method (it was just fancy feature..) - Add c.hangul to distutil build chain. Yeah~ Revision Changes Path 1.5 +1 -30 KoreanCodecs/korean/python/hangul.py Index: hangul.py =================================================================== RCS file: /cvsroot/koco/KoreanCodecs/korean/python/hangul.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -r1.4 -r1.5 --- hangul.py 25 Apr 2002 03:46:34 -0000 1.4 +++ hangul.py 25 Apr 2002 05:01:06 -0000 1.5 @@ -15,7 +15,7 @@ # Conjoining Jamo Behavior: # http://www.unicode.org/unicode/uni2book/ch03.pdf (section 3.11) # -# $Id: hangul.py,v 1.4 2002/04/25 03:46:34 perky Exp $ +# $Id: hangul.py,v 1.5 2002/04/25 05:01:06 perky Exp $ # class UnicodeHangulError(Exception): @@ -203,24 +203,6 @@ obuff.append(c) return u''.join(obuff) -def dividestring(str, intoelements=0): - r = u'' - for char in str: - if ishangul(char): - elems = split(char) - for elem in elems: - for htype in (Jaeum, Moeum, None): - if htype == None: - r += elem - elif intoelements and \ - htype.MultiElement.has_key(elem): - r += u''.join(htype.MultiElement[elem]) - break - else: - r += char - - return r - def _has_final(c): # for internal use only if u'\uac00' <= c <= u'\ud7a3': # hangul @@ -287,15 +269,4 @@ ncur += 1 return u''.join(obuff) - - -if __name__ == '__main__': - - print ( join([Jaeum.P, Moeum.EO, Null]) + \ - join([Jaeum.K, Moeum.I, Null]) + \ - join([Jaeum.JJ, Moeum.A, Jaeum.NG]) ).encode("utf-8") - - while 1: - code = raw_input(">>> ") - print dividestring(unicode(code, "utf-8"), 1).encode("utf-8") |
From: Chang <pe...@us...> - 2002-04-25 21:13:49
|
perky 02/04/25 14:13:44 Modified: korean/python hangul.py Log: - Change format argument passing to *args, **kwargs form - Split unittests into CExtension and PurePython Revision Changes Path 1.6 +5 -5 KoreanCodecs/korean/python/hangul.py Index: hangul.py =================================================================== RCS file: /cvsroot/koco/KoreanCodecs/korean/python/hangul.py,v retrieving revision 1.5 retrieving revision 1.6 diff -u -r1.5 -r1.6 --- hangul.py 25 Apr 2002 05:01:06 -0000 1.5 +++ hangul.py 25 Apr 2002 21:13:44 -0000 1.6 @@ -15,7 +15,7 @@ # Conjoining Jamo Behavior: # http://www.unicode.org/unicode/uni2book/ch03.pdf (section 3.11) # -# $Id: hangul.py,v 1.5 2002/04/25 05:01:06 perky Exp $ +# $Id: hangul.py,v 1.6 2002/04/25 21:13:44 perky Exp $ # class UnicodeHangulError(Exception): @@ -210,11 +210,11 @@ else: return 0, c in u'013678.bklmnptMN' -def format(fmtstr, args): - if not isinstance(args, dict): - argget = iter(args).next +def format(fmtstr, *args, **kwargs): + if kwargs: + argget = lambda:kwargs else: - argget = lambda:args + argget = iter(args).next obuff = [] ncur = escape = fmtinpth = 0 |
From: Chang <pe...@us...> - 2002-04-26 07:47:00
|
perky 02/04/26 00:46:59 Modified: korean/python hangul.py Log: - minor style fix : expand tab to 4 spaces Revision Changes Path 1.8 +20 -20 KoreanCodecs/korean/python/hangul.py Index: hangul.py =================================================================== RCS file: /cvsroot/koco/KoreanCodecs/korean/python/hangul.py,v retrieving revision 1.7 retrieving revision 1.8 diff -u -r1.7 -r1.8 --- hangul.py 26 Apr 2002 07:29:43 -0000 1.7 +++ hangul.py 26 Apr 2002 07:46:59 -0000 1.8 @@ -15,7 +15,7 @@ # Conjoining Jamo Behavior: # http://www.unicode.org/unicode/uni2book/ch03.pdf (section 3.11) # -# $Id: hangul.py,v 1.7 2002/04/26 07:29:43 perky Exp $ +# $Id: hangul.py,v 1.8 2002/04/26 07:46:59 perky Exp $ # class UnicodeHangulError(Exception): @@ -212,35 +212,35 @@ # Iterator Emulator for ancient versions before 2.1 try: - iter + iter except: - class iter: - def __init__(self, obj): - self.obj = obj - self.ptr = 0 - def next(self): - try: - return self.obj[self.ptr] - finally: - self.ptr += 1 + class iter: + def __init__(self, obj): + self.obj = obj + self.ptr = 0 + def next(self): + try: + return self.obj[self.ptr] + finally: + self.ptr += 1 # Nested scope lambda emulation for versions before 2.2 import sys if sys.hexversion < '0x2020000': - class plambda: - def __init__(self, obj): - self.obj = obj - def __call__(self): - return self.obj + class plambda: + def __init__(self, obj): + self.obj = obj + def __call__(self): + return self.obj else: - plambda = None + plambda = None del sys def format(fmtstr, *args, **kwargs): if kwargs: - argget = lambda:kwargs - if plambda: - argget = plambda(kwargs) + argget = lambda:kwargs + if plambda: + argget = plambda(kwargs) else: argget = iter(args).next |
From: Chang <pe...@us...> - 2002-04-29 14:24:27
|
perky 02/04/29 07:24:25 Modified: korean/python hangul.py Log: - Add 'L', 'R', 'Z' as pseudo final alphabets Revision Changes Path 1.9 +2 -2 KoreanCodecs/korean/python/hangul.py Index: hangul.py =================================================================== RCS file: /cvsroot/koco/KoreanCodecs/korean/python/hangul.py,v retrieving revision 1.8 retrieving revision 1.9 diff -u -r1.8 -r1.9 --- hangul.py 26 Apr 2002 07:46:59 -0000 1.8 +++ hangul.py 29 Apr 2002 14:24:25 -0000 1.9 @@ -15,7 +15,7 @@ # Conjoining Jamo Behavior: # http://www.unicode.org/unicode/uni2book/ch03.pdf (section 3.11) # -# $Id: hangul.py,v 1.8 2002/04/26 07:46:59 perky Exp $ +# $Id: hangul.py,v 1.9 2002/04/29 14:24:25 perky Exp $ # class UnicodeHangulError(Exception): @@ -208,7 +208,7 @@ if u'\uac00' <= c <= u'\ud7a3': # hangul return 1, (ord(c) - 0xac00) % 28 > 0 else: - return 0, c in u'013678.bklmnptMN' + return 0, c in u'013678.bklmnptLMNRZ' # Iterator Emulator for ancient versions before 2.1 try: |
From: Chang <pe...@us...> - 2002-05-01 11:10:48
|
perky 02/05/01 04:10:44 Modified: korean/python hangul.py Log: - Test long unicode string for ishangul, isJaeum, isMoeum Suggested by: Lee Gang-Seong <gs...@gw...> Revision Changes Path 1.10 +36 -7 KoreanCodecs/korean/python/hangul.py Index: hangul.py =================================================================== RCS file: /cvsroot/koco/KoreanCodecs/korean/python/hangul.py,v retrieving revision 1.9 retrieving revision 1.10 diff -u -r1.9 -r1.10 --- hangul.py 29 Apr 2002 14:24:25 -0000 1.9 +++ hangul.py 1 May 2002 11:10:44 -0000 1.10 @@ -15,7 +15,7 @@ # Conjoining Jamo Behavior: # http://www.unicode.org/unicode/uni2book/ch03.pdf (section 3.11) # -# $Id: hangul.py,v 1.9 2002/04/29 14:24:25 perky Exp $ +# $Id: hangul.py,v 1.10 2002/05/01 11:10:44 perky Exp $ # class UnicodeHangulError(Exception): @@ -29,6 +29,11 @@ __str__ = __repr__ Null = u'' +try: + True +except: + True = 1 + False = 0 class Jaeum: @@ -85,9 +90,6 @@ exec "%s = %s" % (name, repr(code)) del name, code -isJaeum = lambda c: c in Jaeum.Codes -isMoeum = lambda c: c in Moeum.Codes - # Unicode Hangul Syllables Characteristics ZONE = (u'\uAC00', u'\uD7A3') NCHOSUNG = len(Chosung) @@ -99,7 +101,7 @@ CHOSUNG_FILLER = u'\u115F' JUNGSUNG_FILLER = u'\u1160' -ishangul = ( +_ishangul = ( lambda code: ZONE[0] <= code <= ZONE[1] or code in Jaeum.Codes or @@ -125,6 +127,33 @@ u'(\uc778)': (4, u'\uc778'), # (in)- } +def isJaeum(u): + if u: + for c in u: + if c not in Jaeum.Codes: + break + else: + return True + return False + +def isMoeum(u): + if u: + for c in u: + if c not in Moeum.Codes: + break + else: + return True + return False + +def ishangul(u): + if u: + for c in u: + if not _ishangul(c): + break + else: + return True + return False + def join(codes): """ Join function which makes hangul syllable from jamos """ if len(codes) is not 3: @@ -141,7 +170,7 @@ def split(code): """ Split function which splits hangul syllable into jamos """ - if len(code) != 1 or not ishangul(code): + if len(code) != 1 or not _ishangul(code): raise UnicodeHangulError("needs 1 hangul letter") if code in Jaeum.Codes: return (code, Null, Null) @@ -185,7 +214,7 @@ def disjoint(s): obuff = [] for c in s: - if ishangul(c): + if _ishangul(c): cho, jung, jong = split(c) if cho: obuff.append( unichr(ord(JBASE_CHOSUNG) + Chosung.index(cho)) ) |
From: Chang <pe...@us...> - 2002-07-13 06:10:23
|
perky 02/07/12 23:10:21 Modified: korean/python hangul.py Log: - Update my new official mail addr Revision Changes Path 1.12 +2 -2 KoreanCodecs/korean/python/hangul.py Index: hangul.py =================================================================== RCS file: /cvsroot/koco/KoreanCodecs/korean/python/hangul.py,v retrieving revision 1.11 retrieving revision 1.12 diff -u -r1.11 -r1.12 --- hangul.py 5 May 2002 19:13:57 -0000 1.11 +++ hangul.py 13 Jul 2002 06:10:21 -0000 1.12 @@ -3,7 +3,7 @@ # # Unicode hangul abstractive controller # -# written by Hye-Shik Chang <pe...@fa...> +# written by Hye-Shik Chang <pe...@Fr...> # # # Unicode Hangul Code-Area Specifications: @@ -15,7 +15,7 @@ # Conjoining Jamo Behavior: # http://www.unicode.org/unicode/uni2book/ch03.pdf (section 3.11) # -# $Id: hangul.py,v 1.11 2002/05/05 19:13:57 perky Exp $ +# $Id: hangul.py,v 1.12 2002/07/13 06:10:21 perky Exp $ # class UnicodeHangulError(Exception): |