From: Christophe R. <cr...@us...> - 2009-08-05 14:18:04
|
Update of /cvsroot/sbcl/sbcl/tools-for-build In directory fdv4jf1.ch3.sourceforge.com:/tmp/cvs-serv18976/tools-for-build Modified Files: ucd.lisp Added Files: Jamo.txt Log Message: 1.0.30.36: Hangul syllable character names Provide for the construction of Hangul syllable character names, as required by Unicode ("This character name is a normative property of the character"). At present done at build-time; if the increase in core size is too painful, it can be done algorithmically in CHAR-NAME and NAME-CHAR. --- NEW FILE: Jamo.txt --- # Jamo-5.1.0.txt # Date: 2008-03-20, 17:59:00 PDT [KW] # # Unicode Character Database # Copyright (c) 1991-2008 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see UCD.html # # This file defines the Jamo Short Name property. # # See Section 3.12 of The Unicode Standard, Version 5.0 # for more information. # # Each line contains two fields, separated by a semicolon. # # The first field gives the code point, in 4-digit hexadecimal # form, of a combining jamo character that participates in # the algorithmic determination Hangul syllable character names. # The second field gives the Jamo Short Name as a one-, two-, # or three-character ASCII string (or in one case, for U+110B, # the null string). # # ############################################################# 1100; G # HANGUL CHOSEONG KIYEOK 1101; GG # HANGUL CHOSEONG SSANGKIYEOK 1102; N # HANGUL CHOSEONG NIEUN 1103; D # HANGUL CHOSEONG TIKEUT 1104; DD # HANGUL CHOSEONG SSANGTIKEUT 1105; R # HANGUL CHOSEONG RIEUL 1106; M # HANGUL CHOSEONG MIEUM 1107; B # HANGUL CHOSEONG PIEUP 1108; BB # HANGUL CHOSEONG SSANGPIEUP 1109; S # HANGUL CHOSEONG SIOS 110A; SS # HANGUL CHOSEONG SSANGSIOS 110B; # HANGUL CHOSEONG IEUNG 110C; J # HANGUL CHOSEONG CIEUC 110D; JJ # HANGUL CHOSEONG SSANGCIEUC 110E; C # HANGUL CHOSEONG CHIEUCH 110F; K # HANGUL CHOSEONG KHIEUKH 1110; T # HANGUL CHOSEONG THIEUTH 1111; P # HANGUL CHOSEONG PHIEUPH 1112; H # HANGUL CHOSEONG HIEUH 1161; A # HANGUL JUNGSEONG A 1162; AE # HANGUL JUNGSEONG AE 1163; YA # HANGUL JUNGSEONG YA 1164; YAE # HANGUL JUNGSEONG YAE 1165; EO # HANGUL JUNGSEONG EO 1166; E # HANGUL JUNGSEONG E 1167; YEO # HANGUL JUNGSEONG YEO 1168; YE # HANGUL JUNGSEONG YE 1169; O # HANGUL JUNGSEONG O 116A; WA # HANGUL JUNGSEONG WA 116B; WAE # HANGUL JUNGSEONG WAE 116C; OE # HANGUL JUNGSEONG OE 116D; YO # HANGUL JUNGSEONG YO 116E; U # HANGUL JUNGSEONG U 116F; WEO # HANGUL JUNGSEONG WEO 1170; WE # HANGUL JUNGSEONG WE 1171; WI # HANGUL JUNGSEONG WI 1172; YU # HANGUL JUNGSEONG YU 1173; EU # HANGUL JUNGSEONG EU 1174; YI # HANGUL JUNGSEONG YI 1175; I # HANGUL JUNGSEONG I 11A8; G # HANGUL JONGSEONG KIYEOK 11A9; GG # HANGUL JONGSEONG SSANGKIYEOK 11AA; GS # HANGUL JONGSEONG KIYEOK-SIOS 11AB; N # HANGUL JONGSEONG NIEUN 11AC; NJ # HANGUL JONGSEONG NIEUN-CIEUC 11AD; NH # HANGUL JONGSEONG NIEUN-HIEUH 11AE; D # HANGUL JONGSEONG TIKEUT 11AF; L # HANGUL JONGSEONG RIEUL 11B0; LG # HANGUL JONGSEONG RIEUL-KIYEOK 11B1; LM # HANGUL JONGSEONG RIEUL-MIEUM 11B2; LB # HANGUL JONGSEONG RIEUL-PIEUP 11B3; LS # HANGUL JONGSEONG RIEUL-SIOS 11B4; LT # HANGUL JONGSEONG RIEUL-THIEUTH 11B5; LP # HANGUL JONGSEONG RIEUL-PHIEUPH 11B6; LH # HANGUL JONGSEONG RIEUL-HIEUH 11B7; M # HANGUL JONGSEONG MIEUM 11B8; B # HANGUL JONGSEONG PIEUP 11B9; BS # HANGUL JONGSEONG PIEUP-SIOS 11BA; S # HANGUL JONGSEONG SIOS 11BB; SS # HANGUL JONGSEONG SSANGSIOS 11BC; NG # HANGUL JONGSEONG IEUNG 11BD; J # HANGUL JONGSEONG CIEUC 11BE; C # HANGUL JONGSEONG CHIEUCH 11BF; K # HANGUL JONGSEONG KHIEUKH 11C0; T # HANGUL JONGSEONG THIEUTH 11C1; P # HANGUL JONGSEONG PHIEUPH 11C2; H # HANGUL JONGSEONG HIEUH Index: ucd.lisp =================================================================== RCS file: /cvsroot/sbcl/sbcl/tools-for-build/ucd.lisp,v retrieving revision 1.6 retrieving revision 1.7 diff -u -d -r1.6 -r1.7 --- ucd.lisp 3 Aug 2009 16:13:23 -0000 1.6 +++ ucd.lisp 5 Aug 2009 14:17:51 -0000 1.7 @@ -114,8 +114,44 @@ do (slurp-ucd-line line))) (second-pass) (build-misc-table) + (fixup-hangul-syllables) *decompositions*) +(defun fixup-hangul-syllables () + ;; "Hangul Syllable Composition, Unicode 5.1 section 3-12" + (let* ((sbase #xac00) + (lbase #x1100) + (vbase #x1161) + (tbase #x11a7) + (scount 11172) + (lcount 19) + (vcount 21) + (tcount 28) + (ncount (* vcount tcount)) + (table (make-hash-table))) + (with-open-file (*standard-input* + (make-pathname :name "Jamo" :type "txt" + :defaults *unicode-character-database*)) + (loop for line = (read-line nil nil) + while line + if (position #\; line) + do (add-jamo-information line table))) + (dotimes (sindex scount) + (let* ((l (+ lbase (floor sindex ncount))) + (v (+ vbase (floor (mod sindex ncount) tcount))) + (tee (+ tbase (mod sindex tcount))) + (name (format nil "HANGUL_SYLLABLE_~A~A~:[~A~;~]" + (gethash l table) (gethash v table) + (= tee tbase) (gethash tee table)))) + (setf (gethash (+ sbase sindex) *unicode-names*) name))))) + +(defun add-jamo-information (line table) + (let* ((split (split-string line #\;)) + (code (parse-integer (first split) :radix 16)) + (syllable (string-trim '(#\Space) + (subseq (second split) 0 (position #\# (second split)))))) + (setf (gethash code table) syllable))) + (defun split-string (line character) (loop for prev-position = 0 then (1+ position) for position = (position character line :start prev-position) |