[KoCo-CVS] [Commit] cjkcodecs/tools genmap_ja_codecs.py genmap_ko_codecs.py genmap_support.py genmap
Brought to you by:
perky
From: Hye-Shik C. <pe...@us...> - 2003-04-20 17:35:34
|
perky 03/04/20 10:35:33 Added: tools genmap_ja_codecs.py genmap_ko_codecs.py genmap_support.py genmap_zh_CN_codecs.py genmap_zh_TW_codecs.py Log: Import codec implementations from Multibyte Codecs patch. Revision Changes Path 1.1 cjkcodecs/tools/genmap_ja_codecs.py Index: genmap_ja_codecs.py =================================================================== # # genmap_ja_codecs.py: Japanese Codecs Map Generator # # Copyright (C) 2003 Hye-Shik Chang <pe...@Fr...>. # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # # $Id: genmap_ja_codecs.py,v 1.1 2003/04/20 17:35:32 perky Exp $ # from genmap_support import * JISX0208_C1 = (0x21, 0x74) JISX0208_C2 = (0x21, 0x7e) JISX0212_C1 = (0x22, 0x6d) JISX0212_C2 = (0x21, 0x7e) CP932P0_C1 = (0x81, 0x81) # patches between shift-jis and cp932 CP932P0_C2 = (0x5f, 0xca) CP932P1_C1 = (0x87, 0x87) # CP932 P1 CP932P1_C2 = (0x40, 0x9c) CP932P2_C1 = (0xed, 0xfc) # CP932 P2 CP932P2_C2 = (0x40, 0xfc) try: jisx0208file = open('JIS0208.TXT') except IOError: print "=>> Please download mapping table from http://www.unicode." \ "org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/JIS0208.TXT" raise SystemExit try: jisx0212file = open('JIS0212.TXT') except IOError: print "=>> Please download mapping table from http://www.unicode." \ "org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/JIS0212.TXT" raise SystemExit try: cp932file = open('CP932.TXT') except IOError: print "=>> Please download mapping table from http://www.unicode." \ "org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP932.TXT" raise SystemExit omap = open('_ja_codecs.h', 'w') print "Loading Mapping File..." jisx0208datever, sjisdecmap = loadmap(jisx0208file, natcol=0, unicol=2) jisx0208datever, jisx0208decmap = loadmap(jisx0208file, natcol=1, unicol=2) jisx0212datever, jisx0212decmap = loadmap(jisx0212file) cp932datever, cp932decmap = loadmap(cp932file) sjisencmap, cp932encmap = {}, {} cp932diff = {} for c1, m in cp932decmap.items(): for c2, code in m.items(): cp932encmap[code] = (c1, c2) if sjisdecmap.has_key(c1) and sjisdecmap[c1].has_key(c2): sjisencmap[sjisdecmap[c1][c2]] = (c1, c2) if sjisdecmap[c1][c2] != code: cp932diff[(c1, c2)] = (sjisdecmap[c1][c2], code) else: del cp932decmap[c1][c2] if not cp932decmap[c1]: del cp932decmap[c1] difmap = [] for uni, (c1, c2) in cp932encmap.iteritems(): if sjisencmap.has_key(uni): s1, s2 = sjisencmap[uni] if (s1, s2) != (c1, c2): difmap.append(uni) print "Printing Copyright..." printcopyright(omap, filename='_ja_codecs.h', encodingnames='Japanese Encodings', sourcename='JISX0208.TXT/JISX0212.TXT', sourceversion='%s/%s' % (jisx0208datever, jisx0212datever)) print "Generating JIS X 0208 decode map..." genmap_decode(omap, "jisx0208_decode", JISX0208_C1, JISX0208_C2, jisx0208decmap) print "Generating JIS X 0208 decode map index..." print_decmapindex(omap, "jisx0208_decode", jisx0208decmap, rng=(0, 128)) print "Generating JIS X 0212 decode map..." genmap_decode(omap, "jisx0212_decode", JISX0212_C1, JISX0212_C2, jisx0212decmap) print "Generating JIS X 0212 decode map index..." print_decmapindex(omap, "jisx0212_decode", jisx0212decmap, rng=(0, 128)) print "Generating CP932 decode map..." genmap_decode(omap, "cp932_decode", CP932P0_C1, CP932P0_C2, cp932decmap) genmap_decode(omap, "cp932_decode", CP932P1_C1, CP932P1_C2, cp932decmap) genmap_decode(omap, "cp932_decode", CP932P2_C1, CP932P2_C2, cp932decmap) print "Generating CP932 decode map index..." print_decmapindex(omap, "cp932_decode", cp932decmap) print "Generating Constants..." for mnam in ('JISX0208', 'JISX0212', 'CP932P0', 'CP932P1', 'CP932P2'): for c in ('C1', 'C2'): mappfx = mnam + '_' + c maprange = eval(mappfx) print >> omap, "#define %-19s 0x%02x" % ( mappfx+'_BOTTOM', maprange[0]) print >> omap, "#define %-19s 0x%02x" % ( mappfx+'_TOP', maprange[1]) print "Generating CP932 Tweaks..." if difmap: print >> omap, "#define CP932_TWEAKUNIMAP(umap)", for uni in difmap: print >> omap, "\\" print >> omap, "\t(umap)[0x%02x][0x%02x] = NOCHAR;" % ( uni >> 8, uni & 0xFF), print >> omap print "\nDone!" # ex: ts=8 sts=4 et 1.1 cjkcodecs/tools/genmap_ko_codecs.py Index: genmap_ko_codecs.py =================================================================== # # genmap_ko_codecs.py: Korean Codecs Map Generator # # Copyright (C) 2003 Hye-Shik Chang <pe...@Fr...>. # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # # $Id: genmap_ko_codecs.py,v 1.1 2003/04/20 17:35:32 perky Exp $ # from genmap_support import * KSX1001_C1 = (0xa1, 0xfe) KSX1001_C2 = (0xa1, 0xfe) UHCL1_C1 = (0x81, 0xa0) UHCL1_C2 = (0x41, 0xfe) UHCL2_C1 = (0xa1, 0xfe) UHCL2_C2 = (0x41, 0xa0) try: mapfile = open('CP949.TXT') except IOError: print "=>> Please download mapping table from http://www.unicode." \ "org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP949.TXT" raise SystemExit omap = open('_ko_codecs.h', 'w') print "Loading Mapping File..." datever, decmap = loadmap(mapfile) print "Printing Copyright..." printcopyright(omap, filename='_ko_codecs.h', encodingnames='Korean Encodings', sourcename='CP949.TXT', sourceversion=datever) print "Generating KS X 1001 decode map..." genmap_decode(omap, "ksx1001_decode", KSX1001_C1, KSX1001_C2, decmap) print "Generating KS X 1001 decode map index..." print_decmapindex(omap, "ksx1001_decode", decmap) uhcdecmap = {} for c1, c2map in decmap.iteritems(): for c2 in c2map.iterkeys(): if not (c1 >= 0xa1 and c2 >= 0xa1): # uhc uhcdecmap.setdefault(c1, {}) uhcdecmap[c1][c2] = c2map[c2] print "Generating UHC Level 1 decode map..." genmap_decode(omap, "uhc_decode", UHCL1_C1, UHCL1_C2, uhcdecmap) print "Generating UHC Level 2 decode map..." genmap_decode(omap, "uhc_decode", UHCL2_C1, UHCL2_C2, uhcdecmap) print "Generating UHC decode map index..." print_decmapindex(omap, "uhc_decode", uhcdecmap) print "Generating Constants..." for mnam in ('KSX1001', 'UHCL1', 'UHCL2'): for c in ('C1', 'C2'): mappfx = mnam + '_' + c maprange = eval(mappfx) print >> omap, "#define %-19s 0x%02x" % ( mappfx+'_BOTTOM', maprange[0]) print >> omap, "#define %-19s 0x%02x" % ( mappfx+'_TOP', maprange[1]) print "\nDone!" # ex: ts=8 sts=4 et 1.1 cjkcodecs/tools/genmap_support.py Index: genmap_support.py =================================================================== # # genmap_support.py: Multibyte Codec Map Generator # # Copyright (C) 2003 Hye-Shik Chang <pe...@Fr...>. # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # # $Id: genmap_support.py,v 1.1 2003/04/20 17:35:32 perky Exp $ # import re COPYRIGHT_HEADER = """\ /* * %(filename)s * Mapping Tables for %(encodingnames)s * * Generated from %(sourcename)s as of %(sourceversion)s * $Id: genmap_support.py,v 1.1 2003/04/20 17:35:32 perky Exp $ */ """ re_UNIMAPDATE = re.compile('Date:\s*([ a-zA-Z0-9/]*)') re_UNIMAPVERSION= re.compile('Table version:\s*([0-9.]+)') def printcopyright(fo, **data): print >> fo, COPYRIGHT_HEADER % data def genmap_decode(fo, prefix, c1range, c2range, dmap, onlymask=()): c2width = c2range[1] - c2range[0] + 1 c2values = range(c2range[0], c2range[1] + 1) for c1 in range(c1range[0], c1range[1] + 1): if not dmap.has_key(c1) or (onlymask and c1 not in onlymask): continue c2map = dmap[c1] for c2 in c2values: if c2map.has_key(c2): break else: continue print >> fo, ("static const Py_UNICODE %s_%02X[%d] = {" " /* %02X::%02X-%02X */" % (prefix, c1, c2width, c1, c2range[0], c2range[1])) c2map[prefix] = True c2s = c2values[:] while c2s: dp = c2s[:8] del c2s[:8] print >> fo, ' ', ' '.join([ c2map.has_key(i) and ("0x%04x," % c2map[i]) or "UNIINV," for i in dp ]) print >> fo, "};" print >> fo def print_decmapindex(fo, prefix, fmap, f2map={}, f2mapprefix='', rng=(0x80, 0x100)): print >> fo, "static const Py_UNICODE *%s_map[128] = {" % (prefix) for i in range(*rng): if fmap.has_key(i) and fmap[i].has_key(prefix): print >> fo, " %s_%02X, /* 0x%02X */" % (prefix, i, i) elif f2map.has_key(i) and f2map[i].has_key(f2mapprefix): print >> fo, " %s_%02X, /* 0x%02X */" % (f2mapprefix, i, i) else: print >> fo, " 0, /* 0x%02X */" % i print >> fo, "};" print >> fo def loadmap(fo, sethighbit=0, natcol=0, unicol=1): fo.seek(0, 0) head = fo.read(1024) mapdatever = '%s-%s' % ( re_UNIMAPVERSION.findall(head)[0], re_UNIMAPDATE.findall(head)[0] ) if sethighbit: sethighbit = 0x80 fo.seek(0, 0) decmap = {} for line in fo: line = line.split('#', 1)[0].strip() if not line or len(line.split()) < 2: continue row = map(eval, line.split()) loc, uni = row[natcol], row[unicol] if loc >= 0x100: decmap.setdefault((loc >> 8) | sethighbit, {}) decmap[(loc >> 8)|sethighbit][(loc & 0xff)|sethighbit] = uni return mapdatever, decmap 1.1 cjkcodecs/tools/genmap_zh_CN_codecs.py Index: genmap_zh_CN_codecs.py =================================================================== # # genmap_zh_CN_codecs.py: Simplified Chinese Codecs Map Generator # # Copyright (C) 2003 Hye-Shik Chang <pe...@Fr...>. # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # # $Id: genmap_zh_CN_codecs.py,v 1.1 2003/04/20 17:35:32 perky Exp $ # from genmap_support import * GB2312_C1 = (0xa1, 0xfe) GB2312_C2 = (0xa1, 0xfe) GBKL1_C1 = (0x81, 0xa8) GBKL1_C2 = (0x40, 0xfe) GBKL2_C1 = (0xa9, 0xfe) GBKL2_C2 = (0x40, 0xa0) GB18030EXTP1_C1 = (0xa1, 0xa9) GB18030EXTP1_C2 = (0x40, 0xfe) GB18030EXTP2_C1 = (0xaa, 0xaf) GB18030EXTP2_C2 = (0xa1, 0xfe) GB18030EXTP3_C1 = (0xd7, 0xd7) GB18030EXTP3_C2 = (0xfa, 0xfe) GB18030EXTP4_C1 = (0xf8, 0xfd) GB18030EXTP4_C2 = (0xa1, 0xfe) GB18030EXTP5_C1 = (0xfe, 0xfe) GB18030EXTP5_C2 = (0x50, 0xfe) try: gb2312map = open('GB2312.TXT') except IOError: print "=>> Please download mapping table from http://www.unicode." \ "org/Public/MAPPINGS/OBSOLETE/EASTASIA/GB/GB2312.TXT" raise SystemExit try: cp936map = open('CP936.TXT') except IOError: print "=>> Please download mapping table from http://www.unicode." \ "org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP936.TXT" raise SystemExit try: gb18030map = open('gb-18030-2000.xml') except IOError: print "=>> Please download mapping table from http://oss.software" \ ".ibm.com/cvs/icu/~checkout~/charset/data/xml/gb-18030-2000.xml" raise SystemExit re_gb18030ass = re.compile('<a u="([A-F0-9]{4})" b="([0-9A-F ]+)"/>') def parse_gb18030map(fo): m, gbuni = {}, {} for i in range(65536): if i < 0xd800 or i > 0xdfff: # exclude unicode surrogate area gbuni[i] = None for uni, native in re_gb18030ass.findall(fo.read()): uni = eval('0x'+uni) native = [eval('0x'+u) for u in native.split()] if len(native) <= 2: del gbuni[uni] if len(native) == 2: # we can decode algorithmically for 1 or 4 bytes m.setdefault(native[0], {}) m[native[0]][native[1]] = uni gbuni = gbuni.keys() gbuni.sort() return m, gbuni omap = open('_zh_CN_codecs.h', 'w') print "Loading Mapping File..." gb18030decmap, gb18030unilinear = parse_gb18030map(gb18030map) datever, gbkdecmap = loadmap(cp936map) gb2312_datever, gb2312decmap = loadmap(gb2312map, 1) difmap = {} for c1, m in gbkdecmap.items(): for c2, code in m.items(): del gb18030decmap[c1][c2] if not gb18030decmap[c1]: del gb18030decmap[c1] for c1, m in gb2312decmap.items(): for c2, code in m.items(): if gbkdecmap[c1][c2] != code: difmap[(c1,c2)] = (code, gbkdecmap[c1][c2]) del gbkdecmap[c1][c2] if not gbkdecmap[c1]: del gbkdecmap[c1] print "Printing Copyright..." printcopyright(omap, filename='_zh_CN_codecs.h', encodingnames='Simplified Chinese Encodings', sourcename='CP936.TXT', sourceversion=datever) print "Generating GB2312 decode map..." genmap_decode(omap, "gb2312_decode", GB2312_C1, GB2312_C2, gb2312decmap) print "Generating GB2312 decode map index..." print_decmapindex(omap, "gb2312_decode", gb2312decmap) print "Generating GBK Level 1 decode map..." genmap_decode(omap, "gbk_decode", GBKL1_C1, GBKL1_C2, gbkdecmap) print "Generating GBK Level 2 decode map..." genmap_decode(omap, "gbk_decode", GBKL2_C1, GBKL2_C2, gbkdecmap) print "Generating GBK decode map index..." print_decmapindex(omap, "gbk_decode", gbkdecmap) print "Generating GB18030 extension plane 1 decode map..." genmap_decode(omap, "gb18030_decode", GB18030EXTP1_C1, GB18030EXTP1_C2, gb18030decmap) print "Generating GB18030 extension plane 2 decode map..." genmap_decode(omap, "gb18030_decode", GB18030EXTP2_C1, GB18030EXTP2_C2, gb18030decmap) print "Generating GB18030 extension plane 3 decode map..." genmap_decode(omap, "gb18030_decode", GB18030EXTP3_C1, GB18030EXTP3_C2, gb18030decmap) print "Generating GB18030 extension plane 4 decode map..." genmap_decode(omap, "gb18030_decode", GB18030EXTP4_C1, GB18030EXTP4_C2, gb18030decmap) print "Generating GB18030 extension plane 5 decode map..." genmap_decode(omap, "gb18030_decode", GB18030EXTP5_C1, GB18030EXTP5_C2, gb18030decmap) print "Generating GB18030 extension decode map index..." print_decmapindex(omap, "gb18030_decode", gb18030decmap) print "Generating Constants..." for mnam in ('GB2312', 'GBKL1', 'GBKL2', 'GB18030EXTP1', 'GB18030EXTP2', 'GB18030EXTP3', 'GB18030EXTP4', 'GB18030EXTP5'): for c in ('C1', 'C2'): mappfx = mnam + '_' + c maprange = eval(mappfx) print >> omap, "#define %-23s 0x%02x" % ( mappfx+'_BOTTOM', maprange[0]) print >> omap, "#define %-23s 0x%02x" % ( mappfx+'_TOP', maprange[1]) print "Generating GBK Special Map Macroes..." if difmap: print >> omap, "#define GBK_PREDECODE(dc1, dc2, assi)", elsereq = 0 for (c1, c2), (gb2312code, gbkcode) in difmap.items(): if elsereq: print >> omap, "\\\n\telse if", else: print >> omap, "\\\n\tif", elsereq = 1 print >> omap, "((dc1) == 0x%02x && (dc2) == 0x%02x) " \ "(assi) = 0x%04x;" % (c1, c2, gbkcode), print >> omap print >> omap, "#define GBK_TWEAKUNIMAP(umap)", for (c1, c2), (gb2312code, gbkcode) in difmap.items(): print >> omap, "\\" print >> omap, "\t(umap)[0x%02x][0x%02x] = 0x%02x%02x; \\" % ( gbkcode >> 8, gbkcode & 0xFF, c1, c2) if c1 != 0x20 and c2 != 0x15: print >> omap, "\t(umap)[0x%02x][0x%02x] = NOCHAR;" % ( gb2312code >> 8, gb2312code & 0xFF), print >> omap print "Generating GB18030 Unicode BMP Mapping Ranges..." ranges = [[-1, -1, -1]] gblinnum = 0 print >> omap, """ static const struct _gb18030_to_unibmp_ranges { Py_UNICODE first, last; DBCHAR base; } gb18030_to_unibmp_ranges[] = {""" for uni in gb18030unilinear: if uni == ranges[-1][1] + 1: ranges[-1][1] = uni else: ranges.append([uni, uni, gblinnum]) gblinnum += 1 for first, last, base in ranges[1:]: print >> omap, " { 0x%04x, 0x%04x, 0x%04x }," % (first, last, base) print >> omap, """\ { 0x0000, 0x0000, 0x%04x }, };""" % (ranges[-1][2] + ranges[-1][1] - ranges[-1][0] + 1) print "\nDone!" # ex: ts=8 sts=4 et 1.1 cjkcodecs/tools/genmap_zh_TW_codecs.py Index: genmap_zh_TW_codecs.py =================================================================== # # genmap_zh_TW_codecs.py: Traditional Chinese Codecs Map Generator # # Copyright (C) 2003 Hye-Shik Chang <pe...@Fr...>. # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # # $Id: genmap_zh_TW_codecs.py,v 1.1 2003/04/20 17:35:32 perky Exp $ # from genmap_support import * BIG5_C1 = (0xa1, 0xfe) BIG5_C2 = (0x40, 0xfe) # big5 map doesn't have 0xA3E1 (EURO SIGN), but we ignore # that for forward compatiblilty. "Hey! we have the euro-big5!" :) CP950_C1 = BIG5_C1 CP950_C2 = BIG5_C2 try: big5map = open('BIG5.TXT') except IOError: print "=>> Please download mapping table from http://www.unicode." \ "org/Public/MAPPINGS/OBSOLETE/EASTASIA/OTHER/BIG5.TXT" raise SystemExit try: cp950map = open('CP950.TXT') except IOError: print "=>> Please download mapping table from http://www.unicode." \ "org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP950.TXT" raise SystemExit omap = open('_zh_TW_codecs.h', 'w') print "Loading Mapping File..." datever, cp950decmap = loadmap(cp950map) big5_datever, big5decmap = loadmap(big5map) difpages = {} for c1, m in cp950decmap.items(): for c2, code in m.items(): if (not big5decmap.has_key(c1) or not big5decmap[c1].has_key(c2) or big5decmap[c1][c2] != code): difpages[c1] = True for c1, m in big5decmap.items(): for c2, code in m.items(): if not cp950decmap.has_key(c1) or not cp950decmap[c1].has_key(c2): difpages[c1] = True difpages = difpages.keys() print "Printing Copyright..." printcopyright(omap, filename='_zh_TW_codecs.h', encodingnames='Traditional Chinese Encodings', sourcename='CP950.TXT', sourceversion=datever) print "Generating BIG5 decode map..." genmap_decode(omap, "big5_decode", BIG5_C1, BIG5_C2, big5decmap) print "Generating BIG5 decode map index..." print_decmapindex(omap, "big5_decode", big5decmap) print "Generating CP950 decode map..." genmap_decode(omap, "cp950_decode", BIG5_C1, BIG5_C2, cp950decmap, difpages) print "Generating CP950 decode map index..." print_decmapindex(omap, "cp950_decode", cp950decmap, big5decmap, "big5_decode") print "Generating Constants..." for mnam in ('BIG5', 'CP950'): for c in ('C1', 'C2'): mappfx = mnam + '_' + c maprange = eval(mappfx) print >> omap, "#define %-19s 0x%02x" % ( mappfx+'_BOTTOM', maprange[0]) print >> omap, "#define %-19s 0x%02x" % ( mappfx+'_TOP', maprange[1]) print "\nDone!" # ex: ts=8 sts=4 et |