[KoCo-CVS] [Commit] KoreanCodecs/src hangul.c Setup.in
Brought to you by:
perky
From: Chang <pe...@us...> - 2002-04-24 14:17:01
|
perky 02/04/24 07:16:56 Modified: src Setup.in Added: src hangul.c Log: - Add ROUGH implementation of korean.c.hangul module Revision Changes Path 1.2 +1 -0 KoreanCodecs/src/Setup.in Index: Setup.in =================================================================== RCS file: /cvsroot/koco/KoreanCodecs/src/Setup.in,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- Setup.in 14 Mar 2002 21:10:53 -0000 1.1 +++ Setup.in 24 Apr 2002 14:16:56 -0000 1.2 @@ -1,2 +1,3 @@ *shared* _koco _koco.c +hangul hangul.c 1.1 KoreanCodecs/src/hangul.c Index: hangul.c =================================================================== /* * hangul.c * * KoreanCodecs Hangul Module C Implementation * * Author : Hye-Shik Chang <pe...@fa...> * Date : $Date: 2002/04/24 14:16:56 $ * Created : 25 April 2002 * * $Revision: 1.1 $ */ static char *version = "$Id: hangul.c,v 1.1 2002/04/24 14:16:56 perky Exp $"; #include "Python.h" enum { /* Jaeum Codes on U+3100 */ G = 0x3131, GG, GS, N, NJ, NH, D, DD, L, LG, LM, LB, LS, LT, LP, LH, M, B, BB, BS, S, SS, NG, J, JJ, C, K, T, P, H }; enum { /* Moeum Codes on U+3100 */ A = 0x314f, AE, YA, YAE, EO, E, YEO, YE, O, WA, WAE, OE, YO, U, WEO, WE, WI, YU, EU, YI, I }; #define NCHOSUNG 19 #define NJUNGSUNG 21 #define NJONGSUNG 28 #define NJAEUM 30 #define NMOEUM 21 #define JAEUM_BOTTOM G #define JAEUM_TOP H #define MOEUM_BOTTOM A #define MOEUM_TOP I #define HANGUL_BOTTOM 0xac00 #define HANGUL_TOP 0xd7a3 #define JBASE_CHOSUNG 0x1100 #define JBASE_JUNGSUNG 0x1161 #define JBASE_JONGSUNG 0x11A8 #define CHOSUNG_FILLER 0x115f #define JUNGSUNG_FILLER 0x1160 #define F_JAEUM 0x01 #define F_MOEUM 0x02 #define F_CHOSUNG 0x04 #define F_JUNGSUNG 0x08 #define F_JONGSUNG 0x10 #define MAX_MULTIJAMO 3 typedef struct _jamotype { char *name; Py_UNICODE code; int multi[MAX_MULTIJAMO]; int flags; } jamotype; #define CODE(c) #c,c #define NOMULTI {0,0,0} #define JC (F_JAEUM | F_CHOSUNG) #define JJ (F_JAEUM | F_JONGSUNG) #define JCJ (F_JAEUM | F_CHOSUNG | F_JONGSUNG) #define MJ (F_MOEUM | F_JUNGSUNG) jamotype jamos[] = { /* JAEUM */ { CODE(G), NOMULTI, JCJ }, { CODE(GG), {G, G,}, JCJ }, { CODE(GS), {G, S,}, JJ }, { CODE(N), NOMULTI, JCJ }, { CODE(NJ), {N, J,}, JJ }, { CODE(NH), {N, H,}, JJ }, { CODE(D), NOMULTI, JCJ }, { CODE(DD), {D, D,}, JC }, { CODE(L), NOMULTI, JCJ }, { CODE(LG), {L, G,}, JJ }, { CODE(LM), {L, M,}, JJ }, { CODE(LB), {L, B,}, JJ }, { CODE(LS), {L, S,}, JJ }, { CODE(LT), {L, T,}, JJ }, { CODE(LP), {L, P,}, JJ }, { CODE(LH), {L, H,}, JJ }, { CODE(M), NOMULTI, JCJ }, { CODE(B), NOMULTI, JCJ }, { CODE(BB), {B, B,}, JC }, { CODE(BS), {B, S,}, JJ }, { CODE(S), NOMULTI, JCJ }, { CODE(SS), {S, S,}, JCJ }, { CODE(NG), NOMULTI, JCJ }, { CODE(J), NOMULTI, JCJ }, { CODE(JJ), {J, J,}, JC }, { CODE(C), NOMULTI, JCJ }, { CODE(K), NOMULTI, JCJ }, { CODE(T), NOMULTI, JCJ }, { CODE(P), NOMULTI, JCJ }, { CODE(H), NOMULTI, JCJ }, /* MOEUM */ { CODE(A), NOMULTI, MJ }, { CODE(AE), {A, I,}, MJ }, { CODE(YA), NOMULTI, MJ }, { CODE(YAE), {YA,I}, MJ }, { CODE(EO), NOMULTI, MJ }, { CODE(E), NOMULTI, MJ }, { CODE(YEO), NOMULTI, MJ }, { CODE(YE), {YEO,I}, MJ }, { CODE(O), NOMULTI, MJ }, { CODE(WA), {O, A}, MJ }, { CODE(WAE), {O,A,I}, MJ }, { CODE(OE), {O, I}, MJ }, { CODE(YO), NOMULTI, MJ }, { CODE(U), NOMULTI, MJ }, { CODE(WEO), {U, EO}, MJ }, { CODE(WE), {U, E}, MJ }, { CODE(WI), {U, I}, MJ }, { CODE(YU), NOMULTI, MJ }, { CODE(EU), NOMULTI, MJ }, { CODE(YI), {EU, I}, MJ }, { CODE(I), NOMULTI, MJ }, /* END MARKER */ { 0, 0, NOMULTI, 0 }, }; #undef JC, JJ, JCJ, MJ, NOMULTI, CODE #define isJaeum(c) (JAEUM_BOTTOM <= (c) && (c) <= JAEUM_TOP) #define isMoeum(c) (MOEUM_BOTTOM <= (c) && (c) <= MOEUM_TOP) static char Py_isJaeum__doc__[] = "isJaeum(code): Verify whether the code is Jaeum."; static PyObject * Py_isJaeum(PyObject *self, PyObject *args) { Py_UNICODE *code; int codelen; if (!PyArg_ParseTuple(args, "u#:isJaeum", &code, &codelen)) return NULL; if (codelen < 1) { PyErr_Format(PyExc_ValueError, "need not null unicode string"); return NULL; } if (isJaeum(*code)) { Py_INCREF(Py_True); return Py_True; } else { Py_INCREF(Py_False); return Py_False; } } static char Py_isMoeum__doc__[] = "isMoeum(code): Verify whether the code is Moeum."; static PyObject * Py_isMoeum(PyObject *self, PyObject *args) { Py_UNICODE *code; int codelen; if (!PyArg_ParseTuple(args, "u#:isMoeum", &code, &codelen)) return NULL; if (codelen < 1) { PyErr_Format(PyExc_ValueError, "need not null unicode string"); return NULL; } if (isMoeum(*code)) { Py_INCREF(Py_True); return Py_True; } else { Py_INCREF(Py_False); return Py_False; } } #if 0 static char cp949_encode__doc__[] = "CP949 encoder"; static PyObject * cp949_encode(PyObject *self, PyObject *args) { Py_UNICODE *argptr, *srccur, *srcend; int arglen, errtype = error_strict; char *errors = NULL; unsigned char *destptr, *destcur, *decbuf; PyObject *r; if (!PyArg_ParseTuple(args, "u#|z:cp949_encode", &argptr, &arglen, &errors)) return NULL; errtype = error_type(errors); if (errtype == error_undef) return NULL; destcur = destptr = PyMem_New(unsigned char, arglen*2+1); for (srccur = argptr, srcend = argptr + arglen; srccur < srcend; srccur++) { if (*srccur <= 0x7F) *(destcur++) = *srccur; else { decbuf = _ksc5601_encode(*srccur); if (!decbuf) decbuf = _uhc_encode(*srccur); if(decbuf == 0) { switch (errtype) { case error_strict: PyMem_Del(destptr); PyErr_Format(PyExc_UnicodeError, "CP949 encoding error: invalid character \\u%04x", *srccur); return NULL; break; case error_replace: *(destcur++) = 0xa1; *(destcur++) = 0xa1; break; /* case error_ignore: break; */ } } else { *(destcur++) = decbuf[0]; *(destcur++) = decbuf[1]; } } } r = codec_tuple(PyString_FromStringAndSize((char*)destptr, destcur - destptr), arglen); PyMem_Del(destptr); return r; } #endif /* List of methods defined in the module */ #define meth(name, func, doc) {name, (PyCFunction)func, METH_VARARGS, doc} static struct PyMethodDef hangul_methods[] = { meth("isJaeum", Py_isJaeum, Py_isJaeum__doc__), meth("isMoeum", Py_isMoeum, Py_isMoeum__doc__), {NULL, NULL}, }; #define SET_INTCONSTANT(dict, value) \ PyDict_SetItemString(dict, #value, PyInt_FromLong((long) value)) #define SET_STRCONSTANT(dict, value) \ PyDict_SetItemString(dict, #value, PyString_FromString(value)) #define SET_CHARCONSTANT(dict, value) \ PyDict_SetItemString(dict, #value, PyString_FromFormat("%c", value)) /* Initialization function for the module */ void inithangul(void) { PyObject *m, *d, *tmp; Py_UNICODE tuni[2]; int i; /* Create the module and add the functions */ m = Py_InitModule("hangul", hangul_methods); /* Add some symbolic constants to the module */ d = PyModule_GetDict(m); SET_INTCONSTANT(d, NCHOSUNG); SET_INTCONSTANT(d, NJUNGSUNG); SET_INTCONSTANT(d, NJONGSUNG); { PyObject *Chosung, *Jungsung, *Jongsung; PyObject *Jaeum, *Moeum; PyObject *JaeumDict, *MoeumDict; PyObject *JaeumCodes, *MoeumCodes; PyObject *JaeumMulti, *MoeumMulti; int cur_cho, cur_jung, cur_jong; int cur_jaeum, cur_moeum; jamotype *jamo; /* Bind Chosung, Jungsung, Jongsung lists */ cur_cho = cur_jung = cur_jong = 0; Chosung = PyList_New(NCHOSUNG); Jungsung = PyList_New(NJUNGSUNG); Jongsung = PyList_New(NJONGSUNG); PyDict_SetItemString(d, "Chosung", Chosung); PyDict_SetItemString(d, "Jungsung", Jungsung); PyDict_SetItemString(d, "Jongsung", Jongsung); PyList_SET_ITEM(Jongsung, cur_jong++, PyUnicode_FromUnicode(NULL, 0)); /* Create Jaeum and Moeum meta class */ JaeumDict = PyDict_New(); MoeumDict = PyDict_New(); tmp = PyString_FromString("Jaeum"); Jaeum = PyClass_New(NULL, JaeumDict, tmp); Py_DECREF(tmp); tmp = PyString_FromString("Moeum"); Moeum = PyClass_New(NULL, MoeumDict, tmp); Py_DECREF(tmp); /* Bind meta class members */ PyDict_SetItemString(d, "Jaeum", Jaeum); PyDict_SetItemString(d, "Moeum", Moeum); PyDict_SetItemString(JaeumDict, "Chosung", Chosung); PyDict_SetItemString(MoeumDict, "Jungsung", Jungsung); PyDict_SetItemString(JaeumDict, "Jongsung", Jongsung); /* Create Jaeum and Moeum Members */ JaeumCodes = PyTuple_New(NJAEUM); MoeumCodes = PyTuple_New(NMOEUM); JaeumMulti = PyDict_New(); MoeumMulti = PyDict_New(); cur_jaeum = cur_moeum = 0; PyDict_SetItemString(JaeumDict, "Codes", JaeumCodes); PyDict_SetItemString(MoeumDict, "Codes", MoeumCodes); PyDict_SetItemString(JaeumDict, "Width", PyInt_FromLong(NJAEUM)); PyDict_SetItemString(MoeumDict, "Width", PyInt_FromLong(NMOEUM)); PyDict_SetItemString(JaeumDict, "MultiElement", JaeumMulti); PyDict_SetItemString(MoeumDict, "MultiElement", MoeumMulti); for (jamo = jamos; jamo->name; jamo++) { PyObject *unijamo, *multicls; int tuplen; tuni[0] = jamo->code; unijamo = PyUnicode_FromUnicode(tuni, 1); PyDict_SetItemString(d, jamo->name, unijamo); Py_INCREF(unijamo); /* PuTyple_SET_ITEM steals reference */ if (jamo->flags & F_JAEUM) { PyTuple_SET_ITEM(JaeumCodes, cur_jaeum++, unijamo); if (jamo->flags & F_CHOSUNG) { PyList_SET_ITEM(Chosung, cur_cho++, unijamo); PyDict_SetItemString(JaeumDict, jamo->name, unijamo); } if (jamo->flags & F_JONGSUNG) { PyList_SET_ITEM(Jongsung, cur_jong++, unijamo); PyDict_SetItemString(JaeumDict, jamo->name, unijamo); } multicls = JaeumMulti; } else { /* Moeum */ PyTuple_SET_ITEM(MoeumCodes, cur_moeum++, unijamo); if (jamo->flags & F_JUNGSUNG) { PyList_SET_ITEM(Jungsung, cur_jung++, unijamo); PyDict_SetItemString(MoeumDict, jamo->name, unijamo); } multicls = MoeumMulti; } if (jamo->multi[0]) { tuplen = jamo->multi[2] ? 3 : 2; tmp = PyTuple_New(tuplen); for (i = 0; i < tuplen; i++) { tuni[0] = jamo->multi[i]; PyTuple_SET_ITEM(tmp, i, PyUnicode_FromUnicode(tuni, 1)); } PyDict_SetItem(multicls, unijamo, tmp); Py_DECREF(tmp); } } Py_DECREF(JaeumDict); Py_DECREF(MoeumDict); } tmp = PyTuple_New(2); tuni[0] = HANGUL_BOTTOM; PyTuple_SET_ITEM(tmp, 0, PyUnicode_FromUnicode(tuni, 1)); tuni[0] = HANGUL_TOP; PyTuple_SET_ITEM(tmp, 1, PyUnicode_FromUnicode(tuni, 1)); PyDict_SetItemString(d, "ZONE", tmp); Py_DECREF(tmp); tuni[0] = JBASE_CHOSUNG; PyDict_SetItemString(d, "JBASE_CHOSUNG", PyUnicode_FromUnicode(tuni, 1)); tuni[0] = JBASE_JUNGSUNG; PyDict_SetItemString(d, "JBASE_JUNGSUNG", PyUnicode_FromUnicode(tuni, 1)); tuni[0] = JBASE_JONGSUNG; PyDict_SetItemString(d, "JBASE_JONGSUNG", PyUnicode_FromUnicode(tuni, 1)); tuni[0] = CHOSUNG_FILLER; PyDict_SetItemString(d, "CHOSUNG_FILLER", PyUnicode_FromUnicode(tuni, 1)); tuni[0] = JUNGSUNG_FILLER; PyDict_SetItemString(d, "JUNGSUNG_FILLER", PyUnicode_FromUnicode(tuni, 1)); PyDict_SetItemString(d, "version", PyString_FromString(version)); /* Check for errors */ if (PyErr_Occurred()) Py_FatalError("can't initialize the hangul module"); } |