[KoCo-CVS] [Commit] KoreanCodecs/src hangul.c
Brought to you by:
perky
From: Chang <pe...@us...> - 2002-04-25 20:55:28
|
perky 02/04/25 13:55:25 Modified: src hangul.c Log: - Add hangul.format C implementation. Revision Changes Path 1.4 +175 -24 KoreanCodecs/src/hangul.c Index: hangul.c =================================================================== RCS file: /cvsroot/koco/KoreanCodecs/src/hangul.c,v retrieving revision 1.3 retrieving revision 1.4 diff -u -r1.3 -r1.4 --- hangul.c 25 Apr 2002 05:12:17 -0000 1.3 +++ hangul.c 25 Apr 2002 20:55:25 -0000 1.4 @@ -4,14 +4,14 @@ * KoreanCodecs Hangul Module C Implementation * * Author : Hye-Shik Chang <pe...@fa...> - * Date : $Date: 2002/04/25 05:12:17 $ + * Date : $Date: 2002/04/25 20:55:25 $ * Created : 25 April 2002 * - * $Revision: 1.3 $ + * $Revision: 1.4 $ */ static char *version = -"$Id: hangul.c,v 1.3 2002/04/25 05:12:17 perky Exp $"; +"$Id: hangul.c,v 1.4 2002/04/25 20:55:25 perky Exp $"; #include "Python.h" @@ -102,10 +102,10 @@ #define getJongsungOrder(c) (getJamotype(c).orders[2]) -static char Py_isJaeum__doc__[] = "isJaeum(code): Verify whether the code is Jaeum."; +static char hangul_isJaeum__doc__[] = "isJaeum(code): Verify whether the code is Jaeum."; static PyObject * -Py_isJaeum(PyObject *self, PyObject *args) +hangul_isJaeum(PyObject *self, PyObject *args) { Py_UNICODE *code; int codelen; @@ -128,10 +128,10 @@ } } -static char Py_isMoeum__doc__[] = "isMoeum(code): Verify whether the code is Moeum."; +static char hangul_isMoeum__doc__[] = "isMoeum(code): Verify whether the code is Moeum."; static PyObject * -Py_isMoeum(PyObject *self, PyObject *args) +hangul_isMoeum(PyObject *self, PyObject *args) { Py_UNICODE *code; int codelen; @@ -154,10 +154,10 @@ } } -static char Py_ishangul__doc__[] = "ishangul(code): Verify whether the code is hangul."; +static char hangul_ishangul__doc__[] = "ishangul(code): Verify whether the code is hangul."; static PyObject * -Py_ishangul(PyObject *self, PyObject *args) +hangul_ishangul(PyObject *self, PyObject *args) { Py_UNICODE *code; int codelen; @@ -180,10 +180,10 @@ } } -static char Py_join__doc__[] = "join([chosung, jungsung, jongsung]): Assemble hangul syllable from jamos."; +static char hangul_join__doc__[] = "join([chosung, jungsung, jongsung]): Assemble hangul syllable from jamos."; static PyObject * -Py_join(PyObject *self, PyObject *args) +hangul_join(PyObject *self, PyObject *args) { PyObject *argchar, *argelems[3]; Py_UNICODE elems[3], *uobj; @@ -251,10 +251,10 @@ } } -static char Py_split__doc__[] = "split(code): Disassemble hangul syllable into jamos."; +static char hangul_split__doc__[] = "split(code): Disassemble hangul syllable into jamos."; static PyObject * -Py_split(PyObject *self, PyObject *args) +hangul_split(PyObject *self, PyObject *args) { Py_UNICODE *code; PyObject *r; @@ -313,10 +313,10 @@ } } -static char Py_conjoin__doc__[] = "conjoin(unicodestring): conjoin unicode johab string into unicode syllable string"; +static char hangul_conjoin__doc__[] = "conjoin(unicodestring): conjoin unicode johab string into unicode syllable string"; static PyObject * -Py_conjoin(PyObject *self, PyObject *args) +hangul_conjoin(PyObject *self, PyObject *args) { PyObject *r; Py_UNICODE *code, *dst, *dstorg, c; @@ -373,10 +373,10 @@ } -static char Py_disjoint__doc__[] = "disjoint(unicodestring): disjoint unicode syllable string into unicode johab string"; +static char hangul_disjoint__doc__[] = "disjoint(unicodestring): disjoint unicode syllable string into unicode johab string"; static PyObject * -Py_disjoint(PyObject *self, PyObject *args) +hangul_disjoint(PyObject *self, PyObject *args) { Py_UNICODE *code, *dst, *dstorg, c; PyObject *r; @@ -419,18 +419,169 @@ } +static char pseudofinal[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0 */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1 */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, /* 2 */ + 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, /* 3 */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, /* 4 */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 5 */ + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, /* 6 */ + 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 7 */ +}; + +static char hangul_format__doc__[] = "format(fmt, arg1, arg2, ...) or format(fmt, kw1=arg1, kw2=arg2" + ", ...):\nformat unicode string and fix korean suffixes after arguments"; + +static PyObject * +hangul_format(PyObject *self, PyObject *args, PyObject *kwargs) +{ +/*--- Poor Structure of this function ;) + hangul_format(fmt, *args, **kwargs) + -> insert end fmtmarkers(U+115E which is not used by Unicode) after every format position + -> PyUnicode_Format + -> Fix and update hangul suffixes in place of fmtmarkers + -> make PyObject and return. + */ +#define FMTMARKER 0x115E + Py_UNICODE *fmt, *fmtout, *fcur; + PyObject *r; + int fmtsize; + int inpth, infmt, escape; + + { + PyObject *fmtobj; + int argsize; + + argsize = PyTuple_GET_SIZE(args); + if (!argsize || !PyUnicode_Check(fmtobj = PyTuple_GET_ITEM(args, 0))) { + PyErr_Format(PyExc_TypeError, "needs unicode format string."); + return NULL; + } + fmtsize = PyUnicode_GET_SIZE(fmtobj); + fmt = PyUnicode_AS_UNICODE(fmtobj); + + if (!kwargs) + args = PyTuple_GetSlice(args, 1, argsize); + } + + fmtout = PyMem_New(Py_UNICODE, fmtsize + fmtsize/2); + inpth = infmt = escape = 0; + + for (fcur = fmtout; fmtsize--; fmt++) { + if (*fmt != FMTMARKER) /* skip bogus markers */ + *(fcur++) = *fmt; + + if (escape) + escape = 0; + else if (*fmt == '\\') + escape = 1; + else if (infmt) { + if (!inpth && (('A' <= *fmt && *fmt <= 'Z') || ('a' <= *fmt && *fmt <= 'z'))) { + *(fcur++) = FMTMARKER; + infmt = 0; + } + else if (inpth && *fmt == ')') + inpth = 0; + else if (*fmt == '(') + inpth = 1; + else if (*fmt == '%') + infmt = 0; + } + else if (*fmt == '%') + infmt = 1; + } + + r = PyUnicode_Format( + PyUnicode_FromUnicode(fmtout, fcur-fmtout), + kwargs?kwargs:args + ); + if (!kwargs) { + Py_DECREF(args); + } /* {} to avoid gcc warning */ + if (!r) + goto out; + + fmt = PyUnicode_AS_UNICODE(r); + fmtsize = PyUnicode_GET_SIZE(r); + Py_DECREF(r); + +#define HAS_FINAL() ( \ + (past = *(fmt-1)), \ + isHangulSyllable(past) ? \ + ((past-HANGUL_BOTTOM) % NJONGSUNG > 0) \ + : (past < 0x80 ? pseudofinal[past] : 0) \ +) + +#define HAS_FINAL_OR_NOTSYL() ( \ + (past = *(fmt-1)), \ + isHangulSyllable(past) ? \ + ((past-HANGUL_BOTTOM) % NJONGSUNG > 0) \ + : 1 \ +) + +#define PROCESSSUFFIX(nofinal, existfinal) \ + if (next == nofinal || next == existfinal) { \ + *(fcur++) = HAS_FINAL() ? (existfinal) : (nofinal); \ + fmtsize--; fmt++; \ + } + +#define PROCESSSUFFIX_IDA(jongsungadder, existfinal) \ + if (next == existfinal) { \ + if (HAS_FINAL_OR_NOTSYL()) \ + *(fcur++) = existfinal; \ + else \ + *(fcur-1) += jongsungadder; \ + fmtsize-=3; fmt+=3; \ + } + + for (fcur = fmtout; fmtsize--; fmt++) { + if (*fmt == FMTMARKER) { + if (fcur > fmtout && fmtsize > 0) { + Py_UNICODE past, next = *(fmt+1); + + if (next == '(' && fmtsize > 2 && *(fmt+3) == ')') { /* ida suffxes */ + next = *(fmt+2); + PROCESSSUFFIX_IDA(0, 0xc774) /* (I)DA */ + else PROCESSSUFFIX_IDA(17, 0xc785) /* (IP)NIDA */ + else PROCESSSUFFIX_IDA(4, 0xc778) /* (IN)- */ + } + else if (0xac00 <= next && next <= 0xc774) { + PROCESSSUFFIX(0xc744, 0xb97c) /* REUL, EUL */ + else PROCESSSUFFIX(0xc740, 0xb294) /* NEUN, EUN */ + else PROCESSSUFFIX(0xac00, 0xc774) /* I, GA */ + else PROCESSSUFFIX(0xc640, 0xacfc) /* WA, GWA */ + } + } + } + else + *(fcur++) = *fmt; + } + +#undef PROCESSSUFFIX, PROCESSSUFFIX_IDA +#undef HAS_FINAL, HAS_FINAL_OR_NOTSYL + + r = PyUnicode_FromUnicode(fmtout, fcur-fmtout); + +out: + PyMem_Free(fmtout); + return r; +} + /* List of methods defined in the module */ #define meth(name, func, doc) {name, (PyCFunction)func, METH_VARARGS, doc} +#define meth_kw(name, func, doc) {name, (PyCFunction)func, METH_VARARGS|METH_KEYWORDS, doc} static struct PyMethodDef hangul_methods[] = { - meth("isJaeum", Py_isJaeum, Py_isJaeum__doc__), - meth("isMoeum", Py_isMoeum, Py_isMoeum__doc__), - meth("ishangul", Py_ishangul, Py_ishangul__doc__), - meth("join", Py_join, Py_join__doc__), - meth("split", Py_split, Py_split__doc__), - meth("conjoin", Py_conjoin, Py_conjoin__doc__), - meth("disjoint", Py_disjoint, Py_disjoint__doc__), + meth("isJaeum", hangul_isJaeum, hangul_isJaeum__doc__), + meth("isMoeum", hangul_isMoeum, hangul_isMoeum__doc__), + meth("ishangul", hangul_ishangul, hangul_ishangul__doc__), + meth("join", hangul_join, hangul_join__doc__), + meth("split", hangul_split, hangul_split__doc__), + meth("conjoin", hangul_conjoin, hangul_conjoin__doc__), + meth("disjoint", hangul_disjoint, hangul_disjoint__doc__), + meth_kw("format", hangul_format, hangul_format__doc__), {NULL, NULL}, }; |