Thread: [KoCo-CVS] [Commit] KoreanCodecs/src hangul.c
Brought to you by:
perky
From: Chang <pe...@us...> - 2002-04-25 04:49:04
|
perky 02/04/24 21:49:01 Modified: src hangul.c Log: - Implement join, split, conjoin, disjoint methods on korean.c.hangul Revision Changes Path 1.2 +323 -82 KoreanCodecs/src/hangul.c Index: hangul.c =================================================================== RCS file: /cvsroot/koco/KoreanCodecs/src/hangul.c,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- hangul.c 24 Apr 2002 14:16:56 -0000 1.1 +++ hangul.c 25 Apr 2002 04:49:01 -0000 1.2 @@ -4,14 +4,14 @@ * KoreanCodecs Hangul Module C Implementation * * Author : Hye-Shik Chang <pe...@fa...> - * Date : $Date: 2002/04/24 14:16:56 $ + * Date : $Date: 2002/04/25 04:49:01 $ * Created : 25 April 2002 * - * $Revision: 1.1 $ + * $Revision: 1.2 $ */ static char *version = -"$Id: hangul.c,v 1.1 2002/04/24 14:16:56 perky Exp $"; +"$Id: hangul.c,v 1.2 2002/04/25 04:49:01 perky Exp $"; #include "Python.h" @@ -46,53 +46,61 @@ #define CHOSUNG_FILLER 0x115f #define JUNGSUNG_FILLER 0x1160 -#define F_JAEUM 0x01 -#define F_MOEUM 0x02 -#define F_CHOSUNG 0x04 -#define F_JUNGSUNG 0x08 -#define F_JONGSUNG 0x10 +static PyObject *UniNull, *UniSpace; +static PyObject *ErrorObject; #define MAX_MULTIJAMO 3 typedef struct _jamotype { char *name; Py_UNICODE code; - int multi[MAX_MULTIJAMO]; - int flags; + int multi[MAX_MULTIJAMO]; + char orders[3]; /* cho, jung, jong */ } jamotype; #define CODE(c) #c,c #define NOMULTI {0,0,0} -#define JC (F_JAEUM | F_CHOSUNG) -#define JJ (F_JAEUM | F_JONGSUNG) -#define JCJ (F_JAEUM | F_CHOSUNG | F_JONGSUNG) -#define MJ (F_MOEUM | F_JUNGSUNG) -jamotype jamos[] = { +#define J_C {0,-1,-1} +#define J_J {-1,-1,0} +#define J_CJ {0,-1,0} +#define M_J {-1,0,-1} +static jamotype jamos[] = { /* JAEUM */ - { CODE(G), NOMULTI, JCJ }, { CODE(GG), {G, G,}, JCJ }, { CODE(GS), {G, S,}, JJ }, - { CODE(N), NOMULTI, JCJ }, { CODE(NJ), {N, J,}, JJ }, { CODE(NH), {N, H,}, JJ }, - { CODE(D), NOMULTI, JCJ }, { CODE(DD), {D, D,}, JC }, { CODE(L), NOMULTI, JCJ }, - { CODE(LG), {L, G,}, JJ }, { CODE(LM), {L, M,}, JJ }, { CODE(LB), {L, B,}, JJ }, - { CODE(LS), {L, S,}, JJ }, { CODE(LT), {L, T,}, JJ }, { CODE(LP), {L, P,}, JJ }, - { CODE(LH), {L, H,}, JJ }, { CODE(M), NOMULTI, JCJ }, { CODE(B), NOMULTI, JCJ }, - { CODE(BB), {B, B,}, JC }, { CODE(BS), {B, S,}, JJ }, { CODE(S), NOMULTI, JCJ }, - { CODE(SS), {S, S,}, JCJ }, { CODE(NG), NOMULTI, JCJ }, { CODE(J), NOMULTI, JCJ }, - { CODE(JJ), {J, J,}, JC }, { CODE(C), NOMULTI, JCJ }, { CODE(K), NOMULTI, JCJ }, - { CODE(T), NOMULTI, JCJ }, { CODE(P), NOMULTI, JCJ }, { CODE(H), NOMULTI, JCJ }, + { CODE(G), NOMULTI, J_CJ }, { CODE(GG), {G, G,}, J_CJ }, { CODE(GS), {G, S,}, J_J }, + { CODE(N), NOMULTI, J_CJ }, { CODE(NJ), {N, J,}, J_J }, { CODE(NH), {N, H,}, J_J }, + { CODE(D), NOMULTI, J_CJ }, { CODE(DD), {D, D,}, J_C }, { CODE(L), NOMULTI, J_CJ }, + { CODE(LG), {L, G,}, J_J }, { CODE(LM), {L, M,}, J_J }, { CODE(LB), {L, B,}, J_J }, + { CODE(LS), {L, S,}, J_J }, { CODE(LT), {L, T,}, J_J }, { CODE(LP), {L, P,}, J_J }, + { CODE(LH), {L, H,}, J_J }, { CODE(M), NOMULTI, J_CJ }, { CODE(B), NOMULTI, J_CJ }, + { CODE(BB), {B, B,}, J_C }, { CODE(BS), {B, S,}, J_J }, { CODE(S), NOMULTI, J_CJ }, + { CODE(SS), {S, S,}, J_CJ }, { CODE(NG), NOMULTI, J_CJ }, { CODE(J), NOMULTI, J_CJ }, + { CODE(JJ), {J, J,}, J_C }, { CODE(C), NOMULTI, J_CJ }, { CODE(K), NOMULTI, J_CJ }, + { CODE(T), NOMULTI, J_CJ }, { CODE(P), NOMULTI, J_CJ }, { CODE(H), NOMULTI, J_CJ }, /* MOEUM */ - { CODE(A), NOMULTI, MJ }, { CODE(AE), {A, I,}, MJ }, { CODE(YA), NOMULTI, MJ }, - { CODE(YAE), {YA,I}, MJ }, { CODE(EO), NOMULTI, MJ }, { CODE(E), NOMULTI, MJ }, - { CODE(YEO), NOMULTI, MJ }, { CODE(YE), {YEO,I}, MJ }, { CODE(O), NOMULTI, MJ }, - { CODE(WA), {O, A}, MJ }, { CODE(WAE), {O,A,I}, MJ }, { CODE(OE), {O, I}, MJ }, - { CODE(YO), NOMULTI, MJ }, { CODE(U), NOMULTI, MJ }, { CODE(WEO), {U, EO}, MJ }, - { CODE(WE), {U, E}, MJ }, { CODE(WI), {U, I}, MJ }, { CODE(YU), NOMULTI, MJ }, - { CODE(EU), NOMULTI, MJ }, { CODE(YI), {EU, I}, MJ }, { CODE(I), NOMULTI, MJ }, + { CODE(A), NOMULTI, M_J }, { CODE(AE), {A, I,}, M_J }, { CODE(YA), NOMULTI, M_J }, + { CODE(YAE), {YA,I}, M_J }, { CODE(EO), NOMULTI, M_J }, { CODE(E), NOMULTI, M_J }, + { CODE(YEO), NOMULTI, M_J }, { CODE(YE), {YEO,I}, M_J }, { CODE(O), NOMULTI, M_J }, + { CODE(WA), {O, A}, M_J }, { CODE(WAE), {O,A,I}, M_J }, { CODE(OE), {O, I}, M_J }, + { CODE(YO), NOMULTI, M_J }, { CODE(U), NOMULTI, M_J }, { CODE(WEO), {U, EO}, M_J }, + { CODE(WE), {U, E}, M_J }, { CODE(WI), {U, I}, M_J }, { CODE(YU), NOMULTI, M_J }, + { CODE(EU), NOMULTI, M_J }, { CODE(YI), {EU, I}, M_J }, { CODE(I), NOMULTI, M_J }, /* END MARKER */ - { 0, 0, NOMULTI, 0 }, + { 0, 0, NOMULTI, {0,} }, }; -#undef JC, JJ, JCJ, MJ, NOMULTI, CODE +#undef J_C, J_J, J_CJ, M_J, NOMULTI, CODE +static jamotype *jamo_chosung[NCHOSUNG], *jamo_jungsung[NJUNGSUNG], *jamo_jongsung[NJONGSUNG]; + +#define getJamotype(c) jamos[(c)-JAEUM_BOTTOM] #define isJaeum(c) (JAEUM_BOTTOM <= (c) && (c) <= JAEUM_TOP) #define isMoeum(c) (MOEUM_BOTTOM <= (c) && (c) <= MOEUM_TOP) +#define isHangulSyllable(c) (HANGUL_BOTTOM <= (c) && (c) <= HANGUL_TOP) +#define isChosung(c) (getJamotype(c).orders[0] >= 0) +#define isJungsung(c) (getJamotype(c).orders[1] >= 0) +#define isJongsung(c) (getJamotype(c).orders[2] >= 0) +#define getChosungOrder(c) (getJamotype(c).orders[0]) +#define getJungsungOrder(c) (getJamotype(c).orders[1]) +#define getJongsungOrder(c) (getJamotype(c).orders[2]) + static char Py_isJaeum__doc__[] = "isJaeum(code): Verify whether the code is Jaeum."; @@ -113,7 +121,8 @@ if (isJaeum(*code)) { Py_INCREF(Py_True); return Py_True; - } else { + } + else { Py_INCREF(Py_False); return Py_False; } @@ -138,76 +147,290 @@ if (isMoeum(*code)) { Py_INCREF(Py_True); return Py_True; - } else { + } + else { + Py_INCREF(Py_False); + return Py_False; + } +} + +static char Py_ishangul__doc__[] = "ishangul(code): Verify whether the code is hangul."; + +static PyObject * +Py_ishangul(PyObject *self, PyObject *args) +{ + Py_UNICODE *code; + int codelen; + + if (!PyArg_ParseTuple(args, "u#:ishangul", &code, &codelen)) + return NULL; + + if (codelen < 1) { + PyErr_Format(PyExc_ValueError, "need not null unicode string"); + return NULL; + } + + if (isHangulSyllable(*code) || isJaeum(*code) || isMoeum(*code)) { + Py_INCREF(Py_True); + return Py_True; + } + else { Py_INCREF(Py_False); return Py_False; } } -#if 0 -static char cp949_encode__doc__[] = "CP949 encoder"; +static char Py_join__doc__[] = "join([chosung, jungsung, jongsung]): Assemble hangul syllable from jamos."; static PyObject * -cp949_encode(PyObject *self, PyObject *args) +Py_join(PyObject *self, PyObject *args) { - Py_UNICODE *argptr, *srccur, *srcend; - int arglen, errtype = error_strict; - char *errors = NULL; - unsigned char *destptr, *destcur, *decbuf; + PyObject *argchar, *argelems[3]; + Py_UNICODE elems[3], *uobj; + int i; + + if (!PyArg_ParseTuple(args, "O:join", &argchar)) + return NULL; + + if (PyList_Check(argchar)) { + if (PyList_GET_SIZE(argchar) != 3) + goto argerr; + for (i = 0; i < 3; i ++) + argelems[i] = PyList_GET_ITEM(argchar, i); + } + else if (PyTuple_Check(argchar)) { + if (PyTuple_GET_SIZE(argchar) != 3) + goto argerr; + for (i = 0; i < 3; i ++) + argelems[i] = PyTuple_GET_ITEM(argchar, i); + } + else { +argerr: PyErr_Format(PyExc_ValueError, "need list or tuple with 3 unicode elements"); + return NULL; + } + + for (i = 0; i < 3; i ++) { + if ((uobj = PyUnicode_AsUnicode(argelems[i])) == NULL) + goto argerr; + if (PyUnicode_GET_SIZE(argelems[i])) + elems[i] = *uobj; + else + elems[i] = NULL; + } + + if ( (elems[0] && (!isJaeum(elems[0]) || !isChosung(elems[0]))) /* Chosung validity */ + || (elems[1] && (!isMoeum(elems[1]))) /* Jungsung validity */ + || (elems[2] && (!isJaeum(elems[2]) || !isJongsung(elems[2]))) ) { + PyErr_Format(ErrorObject, "not valid jamo combination"); + return NULL; + } + + if ((!elems[0] || !elems[1]) && elems[2]) { + PyErr_Format(ErrorObject, "trying to assemble character which " + "is not in unicode map"); + return NULL; + } + else if (elems[0] && !elems[1]) { + Py_INCREF(argelems[0]); + return argelems[0]; + } + else if (elems[1] && !elems[0]) { + Py_INCREF(argelems[1]); + return argelems[1]; + } + else if (!elems[0]) { /* [Null, Null, Null] */ + Py_INCREF(UniSpace); + return UniSpace; + } + else { + Py_UNICODE code; + + code = ((getChosungOrder(elems[0]) * NJUNGSUNG) + getJungsungOrder(elems[1])) * + NJONGSUNG + getJongsungOrder(elems[2]) + HANGUL_BOTTOM; + return PyUnicode_FromUnicode(&code, 1); + } +} + +static char Py_split__doc__[] = "split(code): Disassemble hangul syllable into jamos."; + +static PyObject * +Py_split(PyObject *self, PyObject *args) +{ + Py_UNICODE *code; PyObject *r; + int codelen; + + if (!PyArg_ParseTuple(args, "u#:split", &code, &codelen)) + return NULL; + + if (codelen < 1) { + PyErr_Format(PyExc_ValueError, "need not null unicode string"); + return NULL; + } + + if (isHangulSyllable(*code)) { + Py_UNICODE cho, jung, jong; + PyObject *jongobj; + Py_UNICODE hseq, t; + + hseq = *code - HANGUL_BOTTOM; + + cho = jamo_chosung[hseq / (NJUNGSUNG*NJONGSUNG)]->code; + jung = jamo_jungsung[(hseq / NJONGSUNG) % NJUNGSUNG]->code; + + if ((t = hseq % NJONGSUNG) != NULL) { + jong = jamo_jongsung[t]->code; + jongobj = PyUnicode_FromUnicode(&jong, 1); + } else { + jongobj = UniNull; + Py_INCREF(UniNull); + } + + r = PyTuple_New(3); + PyTuple_SET_ITEM(r, 0, PyUnicode_FromUnicode(&cho, 1)); + PyTuple_SET_ITEM(r, 1, PyUnicode_FromUnicode(&jung, 1)); + PyTuple_SET_ITEM(r, 2, jongobj); - if (!PyArg_ParseTuple(args, "u#|z:cp949_encode", &argptr, &arglen, &errors)) + return r; + } + else if (isJaeum(*code)) { + r = PyTuple_New(3); + PyTuple_SET_ITEM(r, 0, PyUnicode_FromUnicode(code, 1)); + PyTuple_SET_ITEM(r, 1, UniNull); Py_INCREF(UniNull); + PyTuple_SET_ITEM(r, 2, UniNull); Py_INCREF(UniNull); + return r; + } + else if (isMoeum(*code)) { + r = PyTuple_New(3); + PyTuple_SET_ITEM(r, 0, UniNull); Py_INCREF(UniNull); + PyTuple_SET_ITEM(r, 1, PyUnicode_FromUnicode(code, 1)); + PyTuple_SET_ITEM(r, 2, UniNull); Py_INCREF(UniNull); + return r; + } + else { + PyErr_Format(ErrorObject, "not a hangul code"); return NULL; + } +} + +static char Py_conjoin__doc__[] = "conjoin(unicodestring): conjoin unicode johab string into unicode syllable string"; + +static PyObject * +Py_conjoin(PyObject *self, PyObject *args) +{ + PyObject *r; + Py_UNICODE *code, *dst, *dstorg, c; + int cho, jung, jong; + int codelen, i; - errtype = error_type(errors); - if (errtype == error_undef) + if (!PyArg_ParseTuple(args, "u#:conjoin", &code, &codelen)) return NULL; - destcur = destptr = PyMem_New(unsigned char, arglen*2+1); - for (srccur = argptr, srcend = argptr + arglen; srccur < srcend; srccur++) { - if (*srccur <= 0x7F) - *(destcur++) = *srccur; - else { - decbuf = _ksc5601_encode(*srccur); - if (!decbuf) - decbuf = _uhc_encode(*srccur); - if(decbuf == 0) { - switch (errtype) { - case error_strict: - PyMem_Del(destptr); - PyErr_Format(PyExc_UnicodeError, - "CP949 encoding error: invalid character \\u%04x", - *srccur); - return NULL; - break; - case error_replace: - *(destcur++) = 0xa1; - *(destcur++) = 0xa1; - break; - /* case error_ignore: break; */ + dstorg = dst = PyMem_New(Py_UNICODE, codelen); + + for (i = 0; i < codelen; i++) { + c = code[i]; + if ((JBASE_CHOSUNG <= c && c <= 0x1112) || c == CHOSUNG_FILLER) { + if (codelen > i+1 && JUNGSUNG_FILLER <= code[i+1] && code[i+1] <= 0x1175) { + if (c == CHOSUNG_FILLER) cho = -1; + else cho = c - JBASE_CHOSUNG; + if (code[i+1] == JUNGSUNG_FILLER) jung = -1; + else jung = code[i+1] - JBASE_JUNGSUNG; + + if (codelen > i+2 && JBASE_JONGSUNG <= code[i+2] && code[i+2] <= 0x11c2) { + jong = code[i+2] - JBASE_JONGSUNG + 1; + i += 2; + } + else { + jong = 0; i++; + } + + if (jong && (cho == -1 || jung == -1)) { /* can't trans to syllable */ + if (cho >= 0) *(dst++) = jamo_chosung[cho]->code; + if (jung >= 0) *(dst++) = jamo_jungsung[jung]->code; + *(dst++) = jamo_jongsung[jong]->code; } - } else { - *(destcur++) = decbuf[0]; - *(destcur++) = decbuf[1]; + else if (cho == -1) /* jungsung only */ + *(dst++) = jamo_jungsung[jung]->code; + else if (jung == -1) /* chosung only */ + *(dst++) = jamo_chosung[cho]->code; + else /* full set */ + *(dst++) = HANGUL_BOTTOM + (cho * NJUNGSUNG + jung) * NJONGSUNG + jong; } + else if (c != CHOSUNG_FILLER) /* chosung only */ + *(dst++) = jamo_chosung[c-JBASE_CHOSUNG]->code; } + else if (JBASE_JUNGSUNG <= c && c <= 0x1175) /* jungsung only */ + *(dst++) = jamo_jungsung[c-JBASE_JUNGSUNG]->code; + else + *(dst++) = c; } - r = codec_tuple(PyString_FromStringAndSize((char*)destptr, destcur - destptr), arglen); - PyMem_Del(destptr); + r = PyUnicode_FromUnicode(dstorg, dst-dstorg); + PyMem_Del(dstorg); + return r; } -#endif +static char Py_disjoint__doc__[] = "disjoint(unicodestring): disjoint unicode syllable string into unicode johab string"; + +static PyObject * +Py_disjoint(PyObject *self, PyObject *args) +{ + Py_UNICODE *code, *dst, *dstorg, c; + PyObject *r; + int codelen, i; + + if (!PyArg_ParseTuple(args, "u#:split", &code, &codelen)) + return NULL; + + dstorg = dst = PyMem_New(Py_UNICODE, codelen*3); + + for (i = 0; i < codelen; i++) { + c = code[i]; + if (isHangulSyllable(c)) { + int hseq; + Py_UNICODE jong; + + hseq = c - HANGUL_BOTTOM; + jong = hseq % NJONGSUNG; + + *(dst++) = hseq / (NJUNGSUNG * NJONGSUNG) + JBASE_CHOSUNG; + *(dst++) = (hseq / NJONGSUNG) % NJUNGSUNG + JBASE_JUNGSUNG; + if (jong) + *(dst++) = jong + JBASE_JONGSUNG - 1; + } + else if (isJaeum(c) && isChosung(c)) { + *(dst++) = getChosungOrder(c) + JBASE_CHOSUNG; + *(dst++) = JUNGSUNG_FILLER; + } + else if (isMoeum(c)) { + *(dst++) = CHOSUNG_FILLER; + *(dst++) = getJungsungOrder(c) + JBASE_JUNGSUNG; + } else + *(dst++) = c; + } + + r = PyUnicode_FromUnicode(dstorg, dst-dstorg); + PyMem_Del(dstorg); + + return r; +} + /* List of methods defined in the module */ #define meth(name, func, doc) {name, (PyCFunction)func, METH_VARARGS, doc} static struct PyMethodDef hangul_methods[] = { - meth("isJaeum", Py_isJaeum, Py_isJaeum__doc__), - meth("isMoeum", Py_isMoeum, Py_isMoeum__doc__), + meth("isJaeum", Py_isJaeum, Py_isJaeum__doc__), + meth("isMoeum", Py_isMoeum, Py_isMoeum__doc__), + meth("ishangul", Py_ishangul, Py_ishangul__doc__), + meth("join", Py_join, Py_join__doc__), + meth("split", Py_split, Py_split__doc__), + meth("conjoin", Py_conjoin, Py_conjoin__doc__), + meth("disjoint", Py_disjoint, Py_disjoint__doc__), {NULL, NULL}, }; @@ -230,6 +453,11 @@ /* Create the module and add the functions */ m = Py_InitModule("hangul", hangul_methods); + UniNull = PyUnicode_FromUnicode(NULL, 0); + tuni[0] = 0x3000; /* Unicode Double-wide Space */ + UniSpace = PyUnicode_FromUnicode(tuni, 1); + Py_INCREF(UniSpace); + /* Add some symbolic constants to the module */ d = PyModule_GetDict(m); SET_INTCONSTANT(d, NCHOSUNG); @@ -253,7 +481,9 @@ PyDict_SetItemString(d, "Chosung", Chosung); PyDict_SetItemString(d, "Jungsung", Jungsung); PyDict_SetItemString(d, "Jongsung", Jongsung); - PyList_SET_ITEM(Jongsung, cur_jong++, PyUnicode_FromUnicode(NULL, 0)); + jamo_jongsung[cur_jong] = NULL; + Py_INCREF(UniNull); + PyList_SET_ITEM(Jongsung, cur_jong++, UniNull); /* Create Jaeum and Moeum meta class */ JaeumDict = PyDict_New(); @@ -294,20 +524,27 @@ PyDict_SetItemString(d, jamo->name, unijamo); Py_INCREF(unijamo); /* PuTyple_SET_ITEM steals reference */ - if (jamo->flags & F_JAEUM) { + if (isJaeum(jamo->code)) { PyTuple_SET_ITEM(JaeumCodes, cur_jaeum++, unijamo); - if (jamo->flags & F_CHOSUNG) { + if (isChosung(jamo->code)) { + jamo->orders[0] = cur_cho; + jamo_chosung[cur_cho] = jamo; PyList_SET_ITEM(Chosung, cur_cho++, unijamo); PyDict_SetItemString(JaeumDict, jamo->name, unijamo); } - if (jamo->flags & F_JONGSUNG) { + if (isJongsung(jamo->code)) { + jamo->orders[2] = cur_jong; + jamo_jongsung[cur_jong] = jamo; PyList_SET_ITEM(Jongsung, cur_jong++, unijamo); PyDict_SetItemString(JaeumDict, jamo->name, unijamo); } multicls = JaeumMulti; - } else { /* Moeum */ + } + else { /* Moeum */ PyTuple_SET_ITEM(MoeumCodes, cur_moeum++, unijamo); - if (jamo->flags & F_JUNGSUNG) { + if (isJungsung(jamo->code)) { + jamo->orders[1] = cur_jung; + jamo_jungsung[cur_jung] = jamo; PyList_SET_ITEM(Jungsung, cur_jung++, unijamo); PyDict_SetItemString(MoeumDict, jamo->name, unijamo); } @@ -347,8 +584,12 @@ PyDict_SetItemString(d, "CHOSUNG_FILLER", PyUnicode_FromUnicode(tuni, 1)); tuni[0] = JUNGSUNG_FILLER; PyDict_SetItemString(d, "JUNGSUNG_FILLER", PyUnicode_FromUnicode(tuni, 1)); + PyDict_SetItemString(d, "Null", UniNull); PyDict_SetItemString(d, "version", PyString_FromString(version)); + + ErrorObject = PyErr_NewException("hangul.UnicodeHangulError", NULL, NULL); + PyDict_SetItemString(d, "UnicodeHangulError", ErrorObject); /* Check for errors */ if (PyErr_Occurred()) |
From: Chang <pe...@us...> - 2002-04-25 05:12:19
|
perky 02/04/24 22:12:18 Modified: src hangul.c Log: - Fix the problem around syllable without jongsung. Revision Changes Path 1.3 +4 -4 KoreanCodecs/src/hangul.c Index: hangul.c =================================================================== RCS file: /cvsroot/koco/KoreanCodecs/src/hangul.c,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- hangul.c 25 Apr 2002 04:49:01 -0000 1.2 +++ hangul.c 25 Apr 2002 05:12:17 -0000 1.3 @@ -4,14 +4,14 @@ * KoreanCodecs Hangul Module C Implementation * * Author : Hye-Shik Chang <pe...@fa...> - * Date : $Date: 2002/04/25 04:49:01 $ + * Date : $Date: 2002/04/25 05:12:17 $ * Created : 25 April 2002 * - * $Revision: 1.2 $ + * $Revision: 1.3 $ */ static char *version = -"$Id: hangul.c,v 1.2 2002/04/25 04:49:01 perky Exp $"; +"$Id: hangul.c,v 1.3 2002/04/25 05:12:17 perky Exp $"; #include "Python.h" @@ -246,7 +246,7 @@ Py_UNICODE code; code = ((getChosungOrder(elems[0]) * NJUNGSUNG) + getJungsungOrder(elems[1])) * - NJONGSUNG + getJongsungOrder(elems[2]) + HANGUL_BOTTOM; + NJONGSUNG + (elems[2]?getJongsungOrder(elems[2]):0) + HANGUL_BOTTOM; return PyUnicode_FromUnicode(&code, 1); } } |
From: Chang <pe...@us...> - 2002-04-25 20:55:28
|
perky 02/04/25 13:55:25 Modified: src hangul.c Log: - Add hangul.format C implementation. Revision Changes Path 1.4 +175 -24 KoreanCodecs/src/hangul.c Index: hangul.c =================================================================== RCS file: /cvsroot/koco/KoreanCodecs/src/hangul.c,v retrieving revision 1.3 retrieving revision 1.4 diff -u -r1.3 -r1.4 --- hangul.c 25 Apr 2002 05:12:17 -0000 1.3 +++ hangul.c 25 Apr 2002 20:55:25 -0000 1.4 @@ -4,14 +4,14 @@ * KoreanCodecs Hangul Module C Implementation * * Author : Hye-Shik Chang <pe...@fa...> - * Date : $Date: 2002/04/25 05:12:17 $ + * Date : $Date: 2002/04/25 20:55:25 $ * Created : 25 April 2002 * - * $Revision: 1.3 $ + * $Revision: 1.4 $ */ static char *version = -"$Id: hangul.c,v 1.3 2002/04/25 05:12:17 perky Exp $"; +"$Id: hangul.c,v 1.4 2002/04/25 20:55:25 perky Exp $"; #include "Python.h" @@ -102,10 +102,10 @@ #define getJongsungOrder(c) (getJamotype(c).orders[2]) -static char Py_isJaeum__doc__[] = "isJaeum(code): Verify whether the code is Jaeum."; +static char hangul_isJaeum__doc__[] = "isJaeum(code): Verify whether the code is Jaeum."; static PyObject * -Py_isJaeum(PyObject *self, PyObject *args) +hangul_isJaeum(PyObject *self, PyObject *args) { Py_UNICODE *code; int codelen; @@ -128,10 +128,10 @@ } } -static char Py_isMoeum__doc__[] = "isMoeum(code): Verify whether the code is Moeum."; +static char hangul_isMoeum__doc__[] = "isMoeum(code): Verify whether the code is Moeum."; static PyObject * -Py_isMoeum(PyObject *self, PyObject *args) +hangul_isMoeum(PyObject *self, PyObject *args) { Py_UNICODE *code; int codelen; @@ -154,10 +154,10 @@ } } -static char Py_ishangul__doc__[] = "ishangul(code): Verify whether the code is hangul."; +static char hangul_ishangul__doc__[] = "ishangul(code): Verify whether the code is hangul."; static PyObject * -Py_ishangul(PyObject *self, PyObject *args) +hangul_ishangul(PyObject *self, PyObject *args) { Py_UNICODE *code; int codelen; @@ -180,10 +180,10 @@ } } -static char Py_join__doc__[] = "join([chosung, jungsung, jongsung]): Assemble hangul syllable from jamos."; +static char hangul_join__doc__[] = "join([chosung, jungsung, jongsung]): Assemble hangul syllable from jamos."; static PyObject * -Py_join(PyObject *self, PyObject *args) +hangul_join(PyObject *self, PyObject *args) { PyObject *argchar, *argelems[3]; Py_UNICODE elems[3], *uobj; @@ -251,10 +251,10 @@ } } -static char Py_split__doc__[] = "split(code): Disassemble hangul syllable into jamos."; +static char hangul_split__doc__[] = "split(code): Disassemble hangul syllable into jamos."; static PyObject * -Py_split(PyObject *self, PyObject *args) +hangul_split(PyObject *self, PyObject *args) { Py_UNICODE *code; PyObject *r; @@ -313,10 +313,10 @@ } } -static char Py_conjoin__doc__[] = "conjoin(unicodestring): conjoin unicode johab string into unicode syllable string"; +static char hangul_conjoin__doc__[] = "conjoin(unicodestring): conjoin unicode johab string into unicode syllable string"; static PyObject * -Py_conjoin(PyObject *self, PyObject *args) +hangul_conjoin(PyObject *self, PyObject *args) { PyObject *r; Py_UNICODE *code, *dst, *dstorg, c; @@ -373,10 +373,10 @@ } -static char Py_disjoint__doc__[] = "disjoint(unicodestring): disjoint unicode syllable string into unicode johab string"; +static char hangul_disjoint__doc__[] = "disjoint(unicodestring): disjoint unicode syllable string into unicode johab string"; static PyObject * -Py_disjoint(PyObject *self, PyObject *args) +hangul_disjoint(PyObject *self, PyObject *args) { Py_UNICODE *code, *dst, *dstorg, c; PyObject *r; @@ -419,18 +419,169 @@ } +static char pseudofinal[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0 */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1 */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, /* 2 */ + 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, /* 3 */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, /* 4 */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 5 */ + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, /* 6 */ + 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 7 */ +}; + +static char hangul_format__doc__[] = "format(fmt, arg1, arg2, ...) or format(fmt, kw1=arg1, kw2=arg2" + ", ...):\nformat unicode string and fix korean suffixes after arguments"; + +static PyObject * +hangul_format(PyObject *self, PyObject *args, PyObject *kwargs) +{ +/*--- Poor Structure of this function ;) + hangul_format(fmt, *args, **kwargs) + -> insert end fmtmarkers(U+115E which is not used by Unicode) after every format position + -> PyUnicode_Format + -> Fix and update hangul suffixes in place of fmtmarkers + -> make PyObject and return. + */ +#define FMTMARKER 0x115E + Py_UNICODE *fmt, *fmtout, *fcur; + PyObject *r; + int fmtsize; + int inpth, infmt, escape; + + { + PyObject *fmtobj; + int argsize; + + argsize = PyTuple_GET_SIZE(args); + if (!argsize || !PyUnicode_Check(fmtobj = PyTuple_GET_ITEM(args, 0))) { + PyErr_Format(PyExc_TypeError, "needs unicode format string."); + return NULL; + } + fmtsize = PyUnicode_GET_SIZE(fmtobj); + fmt = PyUnicode_AS_UNICODE(fmtobj); + + if (!kwargs) + args = PyTuple_GetSlice(args, 1, argsize); + } + + fmtout = PyMem_New(Py_UNICODE, fmtsize + fmtsize/2); + inpth = infmt = escape = 0; + + for (fcur = fmtout; fmtsize--; fmt++) { + if (*fmt != FMTMARKER) /* skip bogus markers */ + *(fcur++) = *fmt; + + if (escape) + escape = 0; + else if (*fmt == '\\') + escape = 1; + else if (infmt) { + if (!inpth && (('A' <= *fmt && *fmt <= 'Z') || ('a' <= *fmt && *fmt <= 'z'))) { + *(fcur++) = FMTMARKER; + infmt = 0; + } + else if (inpth && *fmt == ')') + inpth = 0; + else if (*fmt == '(') + inpth = 1; + else if (*fmt == '%') + infmt = 0; + } + else if (*fmt == '%') + infmt = 1; + } + + r = PyUnicode_Format( + PyUnicode_FromUnicode(fmtout, fcur-fmtout), + kwargs?kwargs:args + ); + if (!kwargs) { + Py_DECREF(args); + } /* {} to avoid gcc warning */ + if (!r) + goto out; + + fmt = PyUnicode_AS_UNICODE(r); + fmtsize = PyUnicode_GET_SIZE(r); + Py_DECREF(r); + +#define HAS_FINAL() ( \ + (past = *(fmt-1)), \ + isHangulSyllable(past) ? \ + ((past-HANGUL_BOTTOM) % NJONGSUNG > 0) \ + : (past < 0x80 ? pseudofinal[past] : 0) \ +) + +#define HAS_FINAL_OR_NOTSYL() ( \ + (past = *(fmt-1)), \ + isHangulSyllable(past) ? \ + ((past-HANGUL_BOTTOM) % NJONGSUNG > 0) \ + : 1 \ +) + +#define PROCESSSUFFIX(nofinal, existfinal) \ + if (next == nofinal || next == existfinal) { \ + *(fcur++) = HAS_FINAL() ? (existfinal) : (nofinal); \ + fmtsize--; fmt++; \ + } + +#define PROCESSSUFFIX_IDA(jongsungadder, existfinal) \ + if (next == existfinal) { \ + if (HAS_FINAL_OR_NOTSYL()) \ + *(fcur++) = existfinal; \ + else \ + *(fcur-1) += jongsungadder; \ + fmtsize-=3; fmt+=3; \ + } + + for (fcur = fmtout; fmtsize--; fmt++) { + if (*fmt == FMTMARKER) { + if (fcur > fmtout && fmtsize > 0) { + Py_UNICODE past, next = *(fmt+1); + + if (next == '(' && fmtsize > 2 && *(fmt+3) == ')') { /* ida suffxes */ + next = *(fmt+2); + PROCESSSUFFIX_IDA(0, 0xc774) /* (I)DA */ + else PROCESSSUFFIX_IDA(17, 0xc785) /* (IP)NIDA */ + else PROCESSSUFFIX_IDA(4, 0xc778) /* (IN)- */ + } + else if (0xac00 <= next && next <= 0xc774) { + PROCESSSUFFIX(0xc744, 0xb97c) /* REUL, EUL */ + else PROCESSSUFFIX(0xc740, 0xb294) /* NEUN, EUN */ + else PROCESSSUFFIX(0xac00, 0xc774) /* I, GA */ + else PROCESSSUFFIX(0xc640, 0xacfc) /* WA, GWA */ + } + } + } + else + *(fcur++) = *fmt; + } + +#undef PROCESSSUFFIX, PROCESSSUFFIX_IDA +#undef HAS_FINAL, HAS_FINAL_OR_NOTSYL + + r = PyUnicode_FromUnicode(fmtout, fcur-fmtout); + +out: + PyMem_Free(fmtout); + return r; +} + /* List of methods defined in the module */ #define meth(name, func, doc) {name, (PyCFunction)func, METH_VARARGS, doc} +#define meth_kw(name, func, doc) {name, (PyCFunction)func, METH_VARARGS|METH_KEYWORDS, doc} static struct PyMethodDef hangul_methods[] = { - meth("isJaeum", Py_isJaeum, Py_isJaeum__doc__), - meth("isMoeum", Py_isMoeum, Py_isMoeum__doc__), - meth("ishangul", Py_ishangul, Py_ishangul__doc__), - meth("join", Py_join, Py_join__doc__), - meth("split", Py_split, Py_split__doc__), - meth("conjoin", Py_conjoin, Py_conjoin__doc__), - meth("disjoint", Py_disjoint, Py_disjoint__doc__), + meth("isJaeum", hangul_isJaeum, hangul_isJaeum__doc__), + meth("isMoeum", hangul_isMoeum, hangul_isMoeum__doc__), + meth("ishangul", hangul_ishangul, hangul_ishangul__doc__), + meth("join", hangul_join, hangul_join__doc__), + meth("split", hangul_split, hangul_split__doc__), + meth("conjoin", hangul_conjoin, hangul_conjoin__doc__), + meth("disjoint", hangul_disjoint, hangul_disjoint__doc__), + meth_kw("format", hangul_format, hangul_format__doc__), {NULL, NULL}, }; |
From: Chang <pe...@us...> - 2002-04-25 21:13:48
|
perky 02/04/25 14:13:45 Modified: src hangul.c Log: - Change format argument passing to *args, **kwargs form - Split unittests into CExtension and PurePython Revision Changes Path 1.5 +6 -6 KoreanCodecs/src/hangul.c Index: hangul.c =================================================================== RCS file: /cvsroot/koco/KoreanCodecs/src/hangul.c,v retrieving revision 1.4 retrieving revision 1.5 diff -u -r1.4 -r1.5 --- hangul.c 25 Apr 2002 20:55:25 -0000 1.4 +++ hangul.c 25 Apr 2002 21:13:45 -0000 1.5 @@ -4,14 +4,14 @@ * KoreanCodecs Hangul Module C Implementation * * Author : Hye-Shik Chang <pe...@fa...> - * Date : $Date: 2002/04/25 20:55:25 $ + * Date : $Date: 2002/04/25 21:13:45 $ * Created : 25 April 2002 * - * $Revision: 1.4 $ + * $Revision: 1.5 $ */ static char *version = -"$Id: hangul.c,v 1.4 2002/04/25 20:55:25 perky Exp $"; +"$Id: hangul.c,v 1.5 2002/04/25 21:13:45 perky Exp $"; #include "Python.h" @@ -547,9 +547,9 @@ else PROCESSSUFFIX_IDA(4, 0xc778) /* (IN)- */ } else if (0xac00 <= next && next <= 0xc774) { - PROCESSSUFFIX(0xc744, 0xb97c) /* REUL, EUL */ - else PROCESSSUFFIX(0xc740, 0xb294) /* NEUN, EUN */ - else PROCESSSUFFIX(0xac00, 0xc774) /* I, GA */ + PROCESSSUFFIX(0xb97c, 0xc744) /* REUL, EUL */ + else PROCESSSUFFIX(0xb294, 0xc740) /* NEUN, EUN */ + else PROCESSSUFFIX(0xac00, 0xc774) /* GA, I */ else PROCESSSUFFIX(0xc640, 0xacfc) /* WA, GWA */ } } |
From: Chang <pe...@us...> - 2002-04-26 08:08:03
|
perky 02/04/26 01:03:18 Modified: src hangul.c Log: - Make glibc+gcc happy on linux platform (suppressing warnings ~) - Fix fatal memory deallocation bug on hangul_format Submitted by: Anonymous of www.python.or.kr Revision Changes Path 1.6 +6 -6 KoreanCodecs/src/hangul.c Index: hangul.c =================================================================== RCS file: /cvsroot/koco/KoreanCodecs/src/hangul.c,v retrieving revision 1.5 retrieving revision 1.6 diff -u -r1.5 -r1.6 --- hangul.c 25 Apr 2002 21:13:45 -0000 1.5 +++ hangul.c 26 Apr 2002 08:03:18 -0000 1.6 @@ -4,14 +4,14 @@ * KoreanCodecs Hangul Module C Implementation * * Author : Hye-Shik Chang <pe...@fa...> - * Date : $Date: 2002/04/25 21:13:45 $ + * Date : $Date: 2002/04/26 08:03:18 $ * Created : 25 April 2002 * - * $Revision: 1.5 $ + * $Revision: 1.6 $ */ static char *version = -"$Id: hangul.c,v 1.5 2002/04/25 21:13:45 perky Exp $"; +"$Id: hangul.c,v 1.6 2002/04/26 08:03:18 perky Exp $"; #include "Python.h" @@ -215,7 +215,7 @@ if (PyUnicode_GET_SIZE(argelems[i])) elems[i] = *uobj; else - elems[i] = NULL; + elems[i] = 0; } if ( (elems[0] && (!isJaeum(elems[0]) || !isChosung(elems[0]))) /* Chosung validity */ @@ -278,7 +278,7 @@ cho = jamo_chosung[hseq / (NJUNGSUNG*NJONGSUNG)]->code; jung = jamo_jungsung[(hseq / NJONGSUNG) % NJUNGSUNG]->code; - if ((t = hseq % NJONGSUNG) != NULL) { + if ((t = hseq % NJONGSUNG)) { jong = jamo_jongsung[t]->code; jongobj = PyUnicode_FromUnicode(&jong, 1); } else { @@ -504,7 +504,6 @@ fmt = PyUnicode_AS_UNICODE(r); fmtsize = PyUnicode_GET_SIZE(r); - Py_DECREF(r); #define HAS_FINAL() ( \ (past = *(fmt-1)), \ @@ -561,6 +560,7 @@ #undef PROCESSSUFFIX, PROCESSSUFFIX_IDA #undef HAS_FINAL, HAS_FINAL_OR_NOTSYL + Py_DECREF(r); r = PyUnicode_FromUnicode(fmtout, fcur-fmtout); out: |
From: Chang <pe...@us...> - 2002-04-28 19:39:33
|
perky 02/04/27 13:59:21 Modified: src hangul.c Log: - Fix garbage collection errors Revision Changes Path 1.8 +16 -7 KoreanCodecs/src/hangul.c Index: hangul.c =================================================================== RCS file: /cvsroot/koco/KoreanCodecs/src/hangul.c,v retrieving revision 1.7 retrieving revision 1.8 diff -u -r1.7 -r1.8 --- hangul.c 26 Apr 2002 08:21:54 -0000 1.7 +++ hangul.c 27 Apr 2002 20:59:21 -0000 1.8 @@ -4,14 +4,14 @@ * KoreanCodecs Hangul Module C Implementation * * Author : Hye-Shik Chang <pe...@fa...> - * Date : $Date: 2002/04/26 08:21:54 $ + * Date : $Date: 2002/04/27 20:59:21 $ * Created : 25 April 2002 * - * $Revision: 1.7 $ + * $Revision: 1.8 $ */ static char *version = -"$Id: hangul.c,v 1.7 2002/04/26 08:21:54 perky Exp $"; +"$Id: hangul.c,v 1.8 2002/04/27 20:59:21 perky Exp $"; #include "Python.h" @@ -611,10 +611,11 @@ UniNull = PyUnicode_FromUnicode(NULL, 0); tuni[0] = 0x3000; /* Unicode Double-wide Space */ UniSpace = PyUnicode_FromUnicode(tuni, 1); - Py_INCREF(UniSpace); /* Add some symbolic constants to the module */ d = PyModule_GetDict(m); + PyDict_SetItemString(d, "Space", UniSpace); + /*Py_DECREF(UniSpace); never die */ SET_INTCONSTANT(d, NCHOSUNG); SET_INTCONSTANT(d, NJUNGSUNG); SET_INTCONSTANT(d, NJONGSUNG); @@ -677,30 +678,34 @@ tuni[0] = jamo->code; unijamo = PyUnicode_FromUnicode(tuni, 1); PyDict_SetItemString(d, jamo->name, unijamo); - Py_INCREF(unijamo); /* PuTyple_SET_ITEM steals reference */ if (isJaeum(jamo->code)) { PyTuple_SET_ITEM(JaeumCodes, cur_jaeum++, unijamo); + Py_INCREF(unijamo); if (isChosung(jamo->code)) { jamo->orders[0] = cur_cho; jamo_chosung[cur_cho] = jamo; PyList_SET_ITEM(Chosung, cur_cho++, unijamo); + Py_INCREF(unijamo); PyDict_SetItemString(JaeumDict, jamo->name, unijamo); } if (isJongsung(jamo->code)) { jamo->orders[2] = cur_jong; jamo_jongsung[cur_jong] = jamo; PyList_SET_ITEM(Jongsung, cur_jong++, unijamo); + Py_INCREF(unijamo); PyDict_SetItemString(JaeumDict, jamo->name, unijamo); } multicls = JaeumMulti; } else { /* Moeum */ PyTuple_SET_ITEM(MoeumCodes, cur_moeum++, unijamo); + Py_INCREF(unijamo); if (isJungsung(jamo->code)) { jamo->orders[1] = cur_jung; jamo_jungsung[cur_jung] = jamo; PyList_SET_ITEM(Jungsung, cur_jung++, unijamo); + Py_INCREF(unijamo); PyDict_SetItemString(MoeumDict, jamo->name, unijamo); } multicls = MoeumMulti; @@ -715,10 +720,13 @@ PyDict_SetItem(multicls, unijamo, tmp); Py_DECREF(tmp); } + Py_DECREF(unijamo); } - Py_DECREF(JaeumDict); - Py_DECREF(MoeumDict); + Py_DECREF(Chosung); Py_DECREF(Jungsung); Py_DECREF(Jongsung); + Py_DECREF(JaeumDict); Py_DECREF(MoeumDict); + Py_DECREF(JaeumCodes); Py_DECREF(MoeumCodes); + Py_DECREF(JaeumMulti); Py_DECREF(MoeumMulti); } tmp = PyTuple_New(2); @@ -745,6 +753,7 @@ ErrorObject = PyErr_NewException("hangul.UnicodeHangulError", NULL, NULL); PyDict_SetItemString(d, "UnicodeHangulError", ErrorObject); + Py_DECREF(ErrorObject); /* Check for errors */ if (PyErr_Occurred()) |
From: Chang <pe...@us...> - 2002-04-28 19:40:05
|
perky 02/04/27 14:01:19 Modified: src hangul.c Log: - Just a style fix Revision Changes Path 1.9 +4 -5 KoreanCodecs/src/hangul.c Index: hangul.c =================================================================== RCS file: /cvsroot/koco/KoreanCodecs/src/hangul.c,v retrieving revision 1.8 retrieving revision 1.9 diff -u -r1.8 -r1.9 --- hangul.c 27 Apr 2002 20:59:21 -0000 1.8 +++ hangul.c 27 Apr 2002 21:01:19 -0000 1.9 @@ -4,14 +4,14 @@ * KoreanCodecs Hangul Module C Implementation * * Author : Hye-Shik Chang <pe...@fa...> - * Date : $Date: 2002/04/27 20:59:21 $ + * Date : $Date: 2002/04/27 21:01:19 $ * Created : 25 April 2002 * - * $Revision: 1.8 $ + * $Revision: 1.9 $ */ static char *version = -"$Id: hangul.c,v 1.8 2002/04/27 20:59:21 perky Exp $"; +"$Id: hangul.c,v 1.9 2002/04/27 21:01:19 perky Exp $"; #include "Python.h" @@ -614,8 +614,6 @@ /* Add some symbolic constants to the module */ d = PyModule_GetDict(m); - PyDict_SetItemString(d, "Space", UniSpace); - /*Py_DECREF(UniSpace); never die */ SET_INTCONSTANT(d, NCHOSUNG); SET_INTCONSTANT(d, NJUNGSUNG); SET_INTCONSTANT(d, NJONGSUNG); @@ -748,6 +746,7 @@ tuni[0] = JUNGSUNG_FILLER; PyDict_SetItemString(d, "JUNGSUNG_FILLER", PyUnicode_FromUnicode(tuni, 1)); PyDict_SetItemString(d, "Null", UniNull); + PyDict_SetItemString(d, "Space", UniSpace); PyDict_SetItemString(d, "version", PyString_FromString(version)); |
From: Chang <pe...@us...> - 2002-04-29 14:24:27
|
perky 02/04/29 07:24:25 Modified: src hangul.c Log: - Add 'L', 'R', 'Z' as pseudo final alphabets Revision Changes Path 1.10 +5 -5 KoreanCodecs/src/hangul.c Index: hangul.c =================================================================== RCS file: /cvsroot/koco/KoreanCodecs/src/hangul.c,v retrieving revision 1.9 retrieving revision 1.10 diff -u -r1.9 -r1.10 --- hangul.c 27 Apr 2002 21:01:19 -0000 1.9 +++ hangul.c 29 Apr 2002 14:24:25 -0000 1.10 @@ -4,14 +4,14 @@ * KoreanCodecs Hangul Module C Implementation * * Author : Hye-Shik Chang <pe...@fa...> - * Date : $Date: 2002/04/27 21:01:19 $ + * Date : $Date: 2002/04/29 14:24:25 $ * Created : 25 April 2002 * - * $Revision: 1.9 $ + * $Revision: 1.10 $ */ static char *version = -"$Id: hangul.c,v 1.9 2002/04/27 21:01:19 perky Exp $"; +"$Id: hangul.c,v 1.10 2002/04/29 14:24:25 perky Exp $"; #include "Python.h" @@ -425,8 +425,8 @@ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, /* 2 */ 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, /* 3 */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, /* 4 */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 5 */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, /* 4 */ + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, /* 5 */ 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, /* 6 */ 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 7 */ }; |
From: Chang <pe...@us...> - 2002-05-01 11:10:46
|
perky 02/05/01 04:10:44 Modified: src hangul.c Log: - Test long unicode string for ishangul, isJaeum, isMoeum Suggested by: Lee Gang-Seong <gs...@gw...> Revision Changes Path 1.11 +27 -21 KoreanCodecs/src/hangul.c Index: hangul.c =================================================================== RCS file: /cvsroot/koco/KoreanCodecs/src/hangul.c,v retrieving revision 1.10 retrieving revision 1.11 diff -u -r1.10 -r1.11 --- hangul.c 29 Apr 2002 14:24:25 -0000 1.10 +++ hangul.c 1 May 2002 11:10:43 -0000 1.11 @@ -4,14 +4,14 @@ * KoreanCodecs Hangul Module C Implementation * * Author : Hye-Shik Chang <pe...@fa...> - * Date : $Date: 2002/04/29 14:24:25 $ + * Date : $Date: 2002/05/01 11:10:43 $ * Created : 25 April 2002 * - * $Revision: 1.10 $ + * $Revision: 1.11 $ */ static char *version = -"$Id: hangul.c,v 1.10 2002/04/29 14:24:25 perky Exp $"; +"$Id: hangul.c,v 1.11 2002/05/01 11:10:43 perky Exp $"; #include "Python.h" @@ -109,17 +109,19 @@ hangul_isJaeum(PyObject *self, PyObject *args) { Py_UNICODE *code; - int codelen; + int codelen, istrue = 0; if (!PyArg_ParseTuple(args, "u#:isJaeum", &code, &codelen)) return NULL; - if (codelen < 1) { - PyErr_Format(PyExc_ValueError, "need not null unicode string"); - return NULL; - } + if (codelen) + for (istrue = 1; codelen--; code++) + if (!isJaeum(*code)) { + istrue = 0; + break; + } - if (isJaeum(*code)) { + if (istrue) { Py_INCREF(Py_True); return Py_True; } @@ -135,17 +137,19 @@ hangul_isMoeum(PyObject *self, PyObject *args) { Py_UNICODE *code; - int codelen; + int codelen, istrue = 0; if (!PyArg_ParseTuple(args, "u#:isMoeum", &code, &codelen)) return NULL; - if (codelen < 1) { - PyErr_Format(PyExc_ValueError, "need not null unicode string"); - return NULL; - } + if (codelen) + for (istrue = 1; codelen--; code++) + if (!isMoeum(*code)) { + istrue = 0; + break; + } - if (isMoeum(*code)) { + if (istrue) { Py_INCREF(Py_True); return Py_True; } @@ -161,17 +165,19 @@ hangul_ishangul(PyObject *self, PyObject *args) { Py_UNICODE *code; - int codelen; + int codelen, istrue = 0; if (!PyArg_ParseTuple(args, "u#:ishangul", &code, &codelen)) return NULL; - if (codelen < 1) { - PyErr_Format(PyExc_ValueError, "need not null unicode string"); - return NULL; - } + if (codelen) + for (istrue = 1; codelen--; code++) + if (!(isHangulSyllable(*code) || isJaeum(*code) || isMoeum(*code))) { + istrue = 0; + break; + } - if (isHangulSyllable(*code) || isJaeum(*code) || isMoeum(*code)) { + if (istrue) { Py_INCREF(Py_True); return Py_True; } |
From: Hye-Shik C. <pe...@us...> - 2003-01-02 03:44:42
|
perky 03/01/01 19:44:41 Modified: src hangul.c Log: Minor style fixes Revision Changes Path 1.16 +48 -44 KoreanCodecs/src/hangul.c Index: hangul.c =================================================================== RCS file: /cvsroot/koco/KoreanCodecs/src/hangul.c,v retrieving revision 1.15 retrieving revision 1.16 diff -u -r1.15 -r1.16 --- hangul.c 14 Oct 2002 10:27:13 -0000 1.15 +++ hangul.c 2 Jan 2003 03:44:41 -0000 1.16 @@ -1,10 +1,10 @@ /* - * hangul.c - $Revision: 1.15 $ + * hangul.c - $Revision: 1.16 $ * * KoreanCodecs Hangul Module C Implementation * * Author : Hye-Shik Chang <pe...@Fr...> - * Date : $Date: 2002/10/14 10:27:13 $ + * Date : $Date: 2003/01/02 03:44:41 $ * Created : 25 April 2002 * * This file is part of KoreanCodecs. @@ -24,7 +24,7 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -static char *version = "$Revision: 1.15 $"; +static char *version = "$Revision: 1.16 $"; #include "Python.h" @@ -377,11 +377,9 @@ if (codelen > i+1 && JUNGSUNG_FILLER <= code[i+1] && code[i+1] <= 0x1175) { - if (c == CHOSUNG_FILLER) cho = -1; - else cho = c - JBASE_CHOSUNG; - - if (code[i+1] == JUNGSUNG_FILLER) jung = -1; - else jung = code[i+1] - JBASE_JUNGSUNG; + cho = (c == CHOSUNG_FILLER) ? -1 : c - JBASE_CHOSUNG; + jung = (code[i+1] == JUNGSUNG_FILLER) ? -1 : + code[i+1] - JBASE_JUNGSUNG; if (codelen > i+2 && JBASE_JONGSUNG <= code[i+2] && code[i+2] <= 0x11c2) { @@ -393,8 +391,10 @@ if (jong && (cho == -1 || jung == -1)) { /* can't trans to syllable */ - if (cho >= 0) *(dst++) = jamo_chosung[cho]->code; - if (jung >= 0) *(dst++) = jamo_jungsung[jung]->code; + if (cho >= 0) + *(dst++) = jamo_chosung[cho]->code; + if (jung >= 0) + *(dst++) = jamo_jungsung[jung]->code; *(dst++) = jamo_jongsung[jong]->code; } else if (cho == -1) /* jungsung only */ @@ -558,33 +558,33 @@ fmt = PyUnicode_AS_UNICODE(r); fmtsize = PyUnicode_GET_SIZE(r); -#define HAS_FINAL() ( \ - (past = *(fmt-1)), \ - isHangulSyllable(past) ? \ - ((past-HANGUL_BOTTOM) % NJONGSUNG > 0) \ - : (past < 0x80 ? pseudofinal[past] : 0) \ +#define HAS_FINAL() ( \ + (past = *(fmt-1)), \ + isHangulSyllable(past) ? \ + ((past-HANGUL_BOTTOM) % NJONGSUNG > 0) \ + : (past < 0x80 ? pseudofinal[past] : 0) \ ) -#define HAS_FINAL_OR_NOTSYL() ( \ - (past = *(fmt-1)), \ - isHangulSyllable(past) ? \ - ((past-HANGUL_BOTTOM) % NJONGSUNG > 0) \ - : 1 \ +#define HAS_FINAL_OR_NOTSYL() ( \ + (past = *(fmt-1)), \ + isHangulSyllable(past) ? \ + ((past-HANGUL_BOTTOM) % NJONGSUNG > 0) \ + : 1 \ ) -#define PROCESSSUFFIX(nofinal, existfinal) \ - if (next == nofinal || next == existfinal) { \ - *(fcur++) = HAS_FINAL() ? (existfinal) : (nofinal); \ - fmtsize--; fmt++; \ +#define PROCESSSUFFIX(nofinal, existfinal) \ + if (next == nofinal || next == existfinal) { \ + *(fcur++) = HAS_FINAL() ? (existfinal) : (nofinal); \ + fmtsize--; fmt++; \ } -#define PROCESSSUFFIX_IDA(jongsungadder, existfinal) \ - if (next == existfinal) { \ - if (HAS_FINAL_OR_NOTSYL()) \ - *(fcur++) = existfinal; \ - else \ - *(fcur-1) += jongsungadder; \ - fmtsize-=3; fmt+=3; \ +#define PROCESSSUFFIX_IDA(jongsungadder, existfinal) \ + if (next == existfinal) { \ + if (HAS_FINAL_OR_NOTSYL()) \ + *(fcur++) = existfinal; \ + else \ + *(fcur-1) += jongsungadder; \ + fmtsize-=3; fmt+=3; \ } for (fcur = fmtout; fmtsize--; fmt++) { @@ -611,7 +611,6 @@ *(fcur++) = *fmt; } -/* these were written separatedly for win32 compilers */ #undef PROCESSSUFFIX #undef PROCESSSUFFIX_IDA #undef HAS_FINAL @@ -663,6 +662,13 @@ PyDict_SetItemString(dict, #value, mp); \ Py_DECREF(mp); \ } +#define SET_UNICHARCONSTANT(dict, value) { \ + PyObject *mp; \ + Py_UNICODE tmpuni = value; \ + mp = PyUnicode_FromUnicode(&tmpuni, 1); \ + PyDict_SetItemString(dict, #value, mp); \ + Py_DECREF(mp); \ +} void inithangul(void) @@ -801,20 +807,18 @@ PyDict_SetItemString(d, "ZONE", tmp); Py_DECREF(tmp); - tuni[0] = JBASE_CHOSUNG; - PyDict_SetItemString(d, "JBASE_CHOSUNG", PyUnicode_FromUnicode(tuni, 1)); - tuni[0] = JBASE_JUNGSUNG; - PyDict_SetItemString(d, "JBASE_JUNGSUNG", PyUnicode_FromUnicode(tuni, 1)); - tuni[0] = JBASE_JONGSUNG; - PyDict_SetItemString(d, "JBASE_JONGSUNG", PyUnicode_FromUnicode(tuni, 1)); - tuni[0] = CHOSUNG_FILLER; - PyDict_SetItemString(d, "CHOSUNG_FILLER", PyUnicode_FromUnicode(tuni, 1)); - tuni[0] = JUNGSUNG_FILLER; - PyDict_SetItemString(d, "JUNGSUNG_FILLER", PyUnicode_FromUnicode(tuni, 1)); + SET_UNICHARCONSTANT(d, JBASE_CHOSUNG); + SET_UNICHARCONSTANT(d, JBASE_JUNGSUNG); + SET_UNICHARCONSTANT(d, JBASE_JONGSUNG); + SET_UNICHARCONSTANT(d, CHOSUNG_FILLER); + SET_UNICHARCONSTANT(d, JUNGSUNG_FILLER); + PyDict_SetItemString(d, "Null", UniNull); PyDict_SetItemString(d, "Space", UniSpace); - PyDict_SetItemString(d, "version", PyString_FromString(version)); + tmp = PyString_FromString(version); + PyDict_SetItemString(d, "__version__", tmp); + Py_DECREF(tmp); ErrorObject = PyErr_NewException("hangul.UnicodeHangulError", NULL, NULL); PyDict_SetItemString(d, "UnicodeHangulError", ErrorObject); @@ -825,6 +829,6 @@ } /* - * $Id: hangul.c,v 1.15 2002/10/14 10:27:13 perky Exp $ + * $Id: hangul.c,v 1.16 2003/01/02 03:44:41 perky Exp $ * ex: ts=8 sts=4 et */ |
From: Hye-Shik C. <pe...@us...> - 2003-05-09 16:36:46
|
perky 03/05/09 09:36:45 Modified: src hangul.c Log: arm, powerpc and s390 uses unsigned char as default. (From debian bug #170535, by Gerhard Tonn <Ger...@ga...>) Revision Changes Path 1.18 +5 -5 KoreanCodecs/src/hangul.c Index: hangul.c =================================================================== RCS file: /cvsroot/koco/KoreanCodecs/src/hangul.c,v retrieving revision 1.17 retrieving revision 1.18 diff -u -r1.17 -r1.18 --- hangul.c 13 Jan 2003 09:09:58 -0000 1.17 +++ hangul.c 9 May 2003 16:36:45 -0000 1.18 @@ -1,10 +1,10 @@ /* - * hangul.c - $Revision: 1.17 $ + * hangul.c - $Revision: 1.18 $ * * KoreanCodecs Hangul Module C Implementation * * Author : Hye-Shik Chang <pe...@Fr...> - * Date : $Date: 2003/01/13 09:09:58 $ + * Date : $Date: 2003/05/09 16:36:45 $ * Created : 25 April 2002 * * This file is part of KoreanCodecs. @@ -24,7 +24,7 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -static char *version = "$Revision: 1.17 $"; +static char *version = "$Revision: 1.18 $"; #include "Python.h" @@ -67,7 +67,7 @@ char *name; Py_UNICODE code; int multi[MAX_MULTIJAMO]; - char orders[3]; /* cho, jung, jong */ + signed char orders[3]; /* cho, jung, jong */ } jamotype; #define CODE(c) #c,c @@ -829,6 +829,6 @@ } /* - * $Id: hangul.c,v 1.17 2003/01/13 09:09:58 perky Exp $ + * $Id: hangul.c,v 1.18 2003/05/09 16:36:45 perky Exp $ * ex: ts=8 sts=4 et */ |