[KoCo-CVS] [Commit] KoreanCodecs/korean/python hangul.py

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

perky       02/04/24 00:20:27

  Modified:    korean/python hangul.py
  Log:
  - Add hangul.conjoin and hangul.disjoint functions
    (this function set provides converter between U+AC00 and U+1100 pages)
  
  Revision  Changes    Path
  1.3       +58 -9     KoreanCodecs/korean/python/hangul.py
  
  Index: hangul.py
  ===================================================================
  RCS file: /cvsroot/koco/KoreanCodecs/korean/python/hangul.py,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- hangul.py	24 Apr 2002 05:00:03 -0000	1.2
  +++ hangul.py	24 Apr 2002 07:20:27 -0000	1.3
  @@ -15,7 +15,7 @@
   # Conjoining Jamo Behavior:
   #  http://www.unicode.org/unicode/uni2book/ch03.pdf  (section 3.11)
   #
  -# $Id: hangul.py,v 1.2 2002/04/24 05:00:03 perky Exp $
  +# $Id: hangul.py,v 1.3 2002/04/24 07:20:27 perky Exp $
   #
   
   class UnicodeHangulError(Exception):
  @@ -30,7 +30,7 @@
   
   Null = u''
   
  -class Jaeum: # XXX: 1100-1159 Old Jaeum need?
  +class Jaeum:
   
       Codes = (u'\u3131', u'\u3132', u'\u3133', u'\u3134', u'\u3135', u'\u3136',
               #    G         GG          GS         N          NJ         NH
  @@ -56,7 +56,7 @@
       }
   
   
  -class Moeum: # XXX: 1161-117f Old Moeum need?
  +class Moeum:
   
       Codes = (u'\u314f', u'\u3150', u'\u3151', u'\u3152', u'\u3153', u'\u3154',
               #    A          AE        YA         YAE         EO         E
  @@ -75,7 +75,6 @@
           OE: (O, I),  WEO: (U, EO),  WE: (U, E),   WI: (U, I),  YI: (EU, I)
       }
   
  -
   # Aliases for your convinience
   Chosung = Jaeum.Chosung
   Jungsung = Moeum.Jungsung
  @@ -89,14 +88,19 @@
   isMoeum = lambda c: c in Moeum.Codes
   
   # Unicode Hangul Syllables Characteristics
  -zone = (u'\uAC00', u'\uD7A3')
  +ZONE = (u'\uAC00', u'\uD7A3')
   NCHOSUNG  = len(Chosung)
   NJUNGSUNG = len(Jungsung)
   NJONGSUNG = len(Jongsung)
  +JBASE_CHOSUNG  = u'\u1100'
  +JBASE_JUNGSUNG = u'\u1161'
  +JBASE_JONGSUNG = u'\u11A8'
  +CHOSUNG_FILLER = u'\u115F'
  +JUNGSUNG_FILLER = u'\u1160'
   
   ishangul = (
       lambda code:
  -        zone[0] <= code <= zone[1] or
  +        ZONE[0] <= code <= ZONE[1] or
           code in Jaeum.Codes or
           code in Moeum.Codes
   )
  @@ -150,10 +154,55 @@
           Jongsung[code % NJONGSUNG]
       ]
   
  -def dividestring(str, intoelements=0):
  -    if type(str) is not type(u''):
  -        raise UnicodeHangulError("needs unicode string")
  +def conjoin(s):
  +    obuff = []
  +    ncur = 0
  +
  +    while ncur < len(s):
  +        c = s[ncur]
  +        if JBASE_CHOSUNG <= c <= u'\u1112' or c == CHOSUNG_FILLER: # starts with chosung
  +            if len(s) > ncur+1 and JUNGSUNG_FILLER <= s[ncur+1] <= u'\u1175':
  +                cho = Chosung[ord(c) - ord(JBASE_CHOSUNG)]
  +                jung = Jungsung[ord(s[ncur+1]) - ord(JBASE_JUNGSUNG)]
  +                if len(s) > ncur+2 and JBASE_JONGSUNG <= s[ncur+2] <= u'\u11C2':
  +                    jong = Jongsung[ord(s[ncur+2]) - ord(JBASE_JONGSUNG) + 1]
  +                    ncur += 2
  +                else:
  +                    jong = Null
  +                    ncur += 1
  +                obuff.append(join([cho, jung, jong]))
  +            else:
  +                obuff.append(join([Chosung[ord(c) - ord(JBASE_CHOSUNG)], Null, Null]))
  +        elif JBASE_JUNGSUNG <= c <= u'\u1175':
  +            obuff.append(join([Null, Jungsung[ord(c) - ord(JBASE_JUNGSUNG)], Null]))
  +        else:
  +            obuff.append(c)
  +        ncur += 1
  +    
  +    return u''.join(obuff)
   
  +def disjoint(s):
  +    obuff = []
  +    for c in s:
  +        if ishangul(c):
  +            cho, jung, jong = split(c)
  +            if cho:
  +                obuff.append( unichr(ord(JBASE_CHOSUNG) + Chosung.index(cho)) )
  +            else:
  +                obuff.append( CHOSUNG_FILLER )
  +
  +            if jung:
  +                obuff.append( unichr(ord(JBASE_JUNGSUNG) + Jungsung.index(jung)) )
  +            else:
  +                obuff.append( JUNGSUNG_FILLER )
  +
  +            if jong:
  +                obuff.append( unichr(ord(JBASE_JONGSUNG) + Jongsung.index(jong) - 1) )
  +        else:
  +            obuff.append(c)
  +    return u''.join(obuff)
  +
  +def dividestring(str, intoelements=0):
       r = u''
       for char in str:
           if ishangul(char):