[KoCo-CVS] [Commit] KoreanCodecs/tools generate_codec_mapping.py
Brought to you by:
perky
From: Hye-Shik C. <pe...@us...> - 2003-01-12 23:12:49
|
perky 03/01/12 15:12:49 Added: tools generate_codec_mapping.py Log: Move src/tablegen.py to tools/generate_codec_mapping.py Revision Changes Path 1.1 KoreanCodecs/tools/generate_codec_mapping.py Index: generate_codec_mapping.py =================================================================== # # generate_codec_mapping.py - $Revision: 1.1 $ # # Code Table Generator # # Author: Hye-Shik Chang <pe...@Fr...> # Date : $Date: 2003/01/12 23:12:48 $ # # # This file is part of KoreanCodecs. # # KoreanCodecs is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published # by the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # KoreanCodecs is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License # along with KoreanCodecs; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # import time UNICODE_INVALID = "UNIINV," COPYRIGHT_HEADER = """\ /* * This file is part of KoreanCodecs. * * KoreanCodecs is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published * by the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * KoreanCodecs is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with KoreanCodecs; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * Generated by generate_codec_mapping.py on %s * $Id: generate_codec_mapping.py,v 1.1 2003/01/12 23:12:48 perky Exp $ */ """ % time.asctime(time.gmtime()) def tohex(s): return "\\x%02x\\x%02x" % tuple(map(ord, s)) def decodemapgen(fo, prefix, bottom, top, m, print_region=1, print_index=1): fmap = {} for oco, dco in m.items(): fmap.setdefault(ord(oco[0]), {}) fmap[ord(oco[0])][ord(oco[1])] = dco firstkeys = fmap.keys() firstkeys.sort() if print_region: print >> fo, """\ #define %s_bottom %d #define %s_top %d """ % (prefix, bottom, prefix, top) for fk in firstkeys: seckeys = fmap[fk].keys() seckeys.sort() print >> fo, "static const Py_UNICODE %s_%02X[%d] = { /* %02X::%02X-%02X */" \ % (prefix, fk, top-bottom+1, fk, bottom, top) seckeys = range(bottom, top+1) while seckeys: dp = seckeys[:8] del seckeys[:8] print >> fo, " ", ' '.join([ fmap[fk].has_key(i) and ("0x%04x," % ord(fmap[fk][i])) or UNICODE_INVALID for i in dp ]) print >> fo, "};" print >> fo if print_index: decmapindex(fo, prefix, fmap) def decmapindex(fo, prefix, fmap): print >> fo, "static const Py_UNICODE *%s_map[128] = {" % (prefix) for i in range(128, 256): if fmap.has_key(i): print >> fo, " %s_%02X, /* 0x%02X */" % (prefix, i, i) else: print >> fo, " 0, /* 0x%02X */" % i print >> fo, "};" print >> fo def encodemapgen(fo, prefix, m, threshold): ecodes = m.keys() ecodes.sort() eblocks = [[-99999, -99999, {}]] for i in ecodes: if eblocks[-1][1] + threshold < ord(i): eblocks.append([ord(i), ord(i), {ord(i):m[i]}]) else: eblocks[-1][1] = ord(i) eblocks[-1][2][ord(i)] = m[i] blkcount = 0 for blk in eblocks[1:]: print >> fo, "static const DBYTECHAR %s_page%d[%d] = { /* 0x%04x - 0x%04x */" % \ (prefix, blkcount, blk[1]-blk[0]+1, blk[0], blk[1]) blkcount += 1 obl = range(blk[0], blk[1]+1) while obl: dp = obl[:8] del obl[:8] print >> fo, " ", " ".join([ blk[2].has_key(ok) and '0x%02x%02x,' % tuple(map(ord, blk[2][ok])) or "NOCHAR," for ok in dp ]) print >> fo, "};" print >> fo blkcount = 0 print >> fo, "#define _%s(uni) ( \\" % prefix for blk in eblocks[1:]: print >> fo, " uni >= 0x%04x && uni <= 0x%04x ? %s_page%d[uni-0x%04x] : \\" % ( blk[0], blk[1], prefix, blkcount, blk[0] ) blkcount += 1 print >> fo, " NOCHAR \\" print >> fo, ")" print >> fo def hintgen(fo, prefix, m): k = range(256) print >> fo, "static const char %s_hint[256] = {" % prefix while k: n = k[:16] del k[:16] print >> fo, " ", " ".join(['%d,' % m[i] for i in n]) print >> fo, "};" print >> fo ksc5601 = open("_koco_ksc5601.h", "w") print >> ksc5601, COPYRIGHT_HEADER # johab_ideograph ksc5601_hangul ksc5601_ideograph ksc5601_misc uhc.py from korean.mappings import ksc5601_hangul, ksc5601_ideograph, ksc5601_misc ksc5601_decoding = {} ksc5601_decoding.update(ksc5601_hangul.decoding_map) ksc5601_decoding.update(ksc5601_ideograph.decoding_map) ksc5601_decoding.update(ksc5601_misc.decoding_map) decodemapgen(ksc5601, "ksc5601_decode", 0xa1, 0xfe, ksc5601_decoding) print >> ksc5601 ksc5601_encoding = {} ksc5601_encoding.update(ksc5601_hangul.encoding_map) ksc5601_encoding.update(ksc5601_ideograph.encoding_map) ksc5601_encoding.update(ksc5601_misc.encoding_map) del ksc5601 del ksc5601_decoding, ksc5601_hangul, ksc5601_ideograph, ksc5601_misc uhctable = open("_koco_uhc.h", "w") print >> uhctable, COPYRIGHT_HEADER print >> uhctable, """\ #define uhc_page0_bottom 0x41 #define uhc_page0_top 0xfe #define uhc_page1_bottom 0x41 #define uhc_page1_top 0xa0 """ from korean.mappings import uhc uhcpage0 = {} uhcpage1 = {} fmap = {} # Hmm, I need dictionary comprehension.... for code, uni in uhc.decoding_map.items(): if code[0] <= '\xa0': # page 0 uhcpage0[code] = uni else: uhcpage1[code] = uni fmap[ord(code[0])] = None # just for index decodemapgen(uhctable, "uhc_decode", 0x41, 0xfe, uhcpage0, 0, 0) decodemapgen(uhctable, "uhc_decode", 0x41, 0xa0, uhcpage1, 0, 0) decmapindex(uhctable, "uhc_decode", fmap) hintarray = [] for i in range(256): if chr(i).isalpha() or 0x81 <= i <= 0xA0: hintarray.append(1) else: hintarray.append(0) hintgen(uhctable, "uhc_decode", hintarray) encmapfile = open("_koco_wansungenc.h", "w") print >> encmapfile, COPYRIGHT_HEADER ksc5601_encoding.update(uhc.encoding_map) encodemapgen(encmapfile, "wansung_encode", ksc5601_encoding, 512) # # $Id: generate_codec_mapping.py,v 1.1 2003/01/12 23:12:48 perky Exp $ # # -*- End-Of-File -*- |