Thread: [KoCo-CVS] [Commit] cjkcodecs/tools genmap_japanese.py genmap_korean.py genmap_support.py
Brought to you by:
perky
From: Hye-Shik C. <pe...@us...> - 2003-05-17 16:16:59
|
perky 03/05/17 09:16:57 Modified: tools genmap_japanese.py genmap_korean.py genmap_support.py Log: Generate encode map yeah! Revision Changes Path 1.4 +45 -47 cjkcodecs/tools/genmap_japanese.py Index: genmap_japanese.py =================================================================== RCS file: /cvsroot/koco/cjkcodecs/tools/genmap_japanese.py,v retrieving revision 1.3 retrieving revision 1.4 diff -u -r1.3 -r1.4 --- genmap_japanese.py 14 May 2003 08:15:22 -0000 1.3 +++ genmap_japanese.py 17 May 2003 16:16:57 -0000 1.4 @@ -26,7 +26,7 @@ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # -# $Id: genmap_japanese.py,v 1.3 2003/05/14 08:15:22 perky Exp $ +# $Id: genmap_japanese.py,v 1.4 2003/05/17 16:16:57 perky Exp $ # from genmap_support import * @@ -68,67 +68,65 @@ cp932datever, cp932decmap = loadmap(cp932file) sjisencmap, cp932encmap = {}, {} -cp932diff = {} +jisx0208_0212encmap = {} +for c1, m in sjisdecmap.items(): + for c2, code in m.items(): + sjisencmap.setdefault(code >> 8, {}) + sjisencmap[code >> 8][code & 0xff] = c1 << 8 | c2 for c1, m in cp932decmap.items(): for c2, code in m.items(): - cp932encmap[code] = (c1, c2) - if sjisdecmap.has_key(c1) and sjisdecmap[c1].has_key(c2): - sjisencmap[sjisdecmap[c1][c2]] = (c1, c2) - if sjisdecmap[c1][c2] != code: - cp932diff[(c1, c2)] = (sjisdecmap[c1][c2], code) - else: - del cp932decmap[c1][c2] - if not cp932decmap[c1]: - del cp932decmap[c1] -difmap = [] -for uni, (c1, c2) in cp932encmap.iteritems(): - if sjisencmap.has_key(uni): - s1, s2 = sjisencmap[uni] - if (s1, s2) != (c1, c2): - difmap.append(uni) + if sjisencmap.has_key(c1) and sjisencmap[c1].has_key(c2) and \ + sjisencmap[c1][c2] != code: + cp932encmap.setdefault(code >> 8, {}) + cp932encmap[code >> 8][code & 0xff] = c1 << 8 | c2 +# Twinmap for both of JIS X 0208 (MSB unset) and JIS X 0212 (MSB set) +for c1, m in jisx0208decmap.items(): + for c2, code in m.items(): + jisx0208_0212encmap.setdefault(code >> 8, {}) + jisx0208_0212encmap[code >> 8][code & 0xff] = c1 << 8 | c2 +for c1, m in jisx0212decmap.items(): + for c2, code in m.items(): + jisx0208_0212encmap.setdefault(code >> 8, {}) + if jisx0208_0212encmap[code >> 8].has_key(code & 0xff): + print "OOPS!!!", (code) + jisx0208_0212encmap[code >> 8][code & 0xff] = 0x8000 | c1 << 8 | c2 omap = open("map_jisx0208.h", "w") printcopyright(omap) print "Generating JIS X 0208 decode map..." -genmap_decode(omap, "jisx0208", JISX0208_C1, JISX0208_C2, jisx0208decmap) - -print "Generating JIS X 0208 decode map index..." -print_decmapindex(omap, "jisx0208", jisx0208decmap, rng=(0, 128)) +codebunch = [] +genmap_decode(codebunch, "jisx0208", JISX0208_C1, JISX0208_C2, jisx0208decmap) +print_decmap(omap, codebunch, "jisx0208", jisx0208decmap) omap = open("map_jisx0212.h", "w") printcopyright(omap) print "Generating JIS X 0212 decode map..." -genmap_decode(omap, "jisx0212", JISX0212_C1, JISX0212_C2, jisx0212decmap) +codebunch = [] +genmap_decode(codebunch, "jisx0212", JISX0212_C1, JISX0212_C2, jisx0212decmap) +print_decmap(omap, codebunch, "jisx0212", jisx0212decmap) -print "Generating JIS X 0212 decode map index..." -print_decmapindex(omap, "jisx0212", jisx0212decmap, rng=(0, 128)) +omap = open("map_jisxcommon.h", "w") +printcopyright(omap) +print "Generating JIS X 0208 && JIS X 0212 encode map..." +codebunch =[] +genmap_encode(codebunch, "jisx0208_0212", jisx0208_0212encmap) +print_encmap(omap, codebunch, "jisx0208_0212", jisx0208_0212encmap) -omap = open("map_cp932.h", "w") +omap = open("map_cp932ext.h", "w") printcopyright(omap) -print "Generating CP932 decode map..." -genmap_decode(omap, "cp932", CP932P0_C1, CP932P0_C2, cp932decmap) -genmap_decode(omap, "cp932", CP932P1_C1, CP932P1_C2, cp932decmap) -genmap_decode(omap, "cp932", CP932P2_C1, CP932P2_C2, cp932decmap) - -print "Generating CP932 decode map index..." -print_decmapindex(omap, "cp932", cp932decmap) - -print "Generating CP932 Tweaks..." -if difmap: - print >> omap, "#define CP932_TWEAKUNIMAP(umap)", - checked = {} - for uni in difmap: - if not checked.has_key(uni >> 8): - print >> omap, "\\\n\tif ((umap)[0x%02x] == NULL) " \ - "(umap)[0x%02x] = PyMem_Malloc(sizeof(DBCHAR) * 256);" % ( - uni >> 8, uni >> 8), - checked[uni >> 8] = True - print >> omap, "\\\n\t(umap)[0x%02x][0x%02x] = DBCINV;" % ( - uni >> 8, uni & 0xFF), - print >> omap +print "Generating CP932 Extension decode map..." +codebunch = [] +genmap_decode(codebunch, "cp932ext", CP932P0_C1, CP932P0_C2, cp932decmap) +genmap_decode(codebunch, "cp932ext", CP932P1_C1, CP932P1_C2, cp932decmap) +genmap_decode(codebunch, "cp932ext", CP932P2_C1, CP932P2_C2, cp932decmap) +print_decmap(omap, codebunch, "cp932ext", cp932decmap) + +print "Generating CP932 Extension encode map..." +codebunch =[] +genmap_encode(codebunch, "cp932ext", cp932encmap) +print_encmap(omap, codebunch, "cp932ext", cp932encmap) print "\nDone!" - # ex: ts=8 sts=4 et 1.3 +22 -14 cjkcodecs/tools/genmap_korean.py Index: genmap_korean.py =================================================================== RCS file: /cvsroot/koco/cjkcodecs/tools/genmap_korean.py,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- genmap_korean.py 14 May 2003 06:58:05 -0000 1.2 +++ genmap_korean.py 17 May 2003 16:16:57 -0000 1.3 @@ -26,7 +26,7 @@ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # -# $Id: genmap_korean.py,v 1.2 2003/05/14 06:58:05 perky Exp $ +# $Id: genmap_korean.py,v 1.3 2003/05/17 16:16:57 perky Exp $ # from genmap_support import * @@ -48,36 +48,44 @@ print "Loading Mapping File..." datever, decmap = loadmap(mapfile) uhcdecmap, ksx1001decmap = {}, {} +cp949encmap = {} for c1, c2map in decmap.iteritems(): - for c2 in c2map.iterkeys(): + for c2, code in c2map.iteritems(): if c1 >= 0xa1 and c2 >= 0xa1: ksx1001decmap.setdefault(c1&0x7f, {}) ksx1001decmap[c1&0x7f][c2&0x7f] = c2map[c2] + cp949encmap.setdefault(code >> 8, {}) + cp949encmap[code >> 8][code & 0xFF] = (c1<<8 | c2) & 0x7f7f else: # uhc uhcdecmap.setdefault(c1, {}) uhcdecmap[c1][c2] = c2map[c2] + cp949encmap.setdefault(code >> 8, {}) # MSB set + cp949encmap[code >> 8][code & 0xFF] = 0x1000 | (c1<<8 | c2) & 0x7f7f omap = open('map_ksx1001.h', 'w') printcopyright(omap) print "Generating KS X 1001 decode map..." -genmap_decode(omap, "ksx1001", KSX1001_C1, KSX1001_C2, ksx1001decmap) +codebunch = [] +genmap_decode(codebunch, "ksx1001", KSX1001_C1, KSX1001_C2, ksx1001decmap) +print_decmap(omap, codebunch, "ksx1001", ksx1001decmap) -print "Generating KS X 1001 decode map index..." -print_decmapindex(omap, "ksx1001", ksx1001decmap, rng=(0, 127)) - -omap = open('map_cp949.h', 'w') +omap = open('map_cp949ext.h', 'w') printcopyright(omap) -print "Generating UHC Level 1 decode map..." -genmap_decode(omap, "cp949", UHCL1_C1, UHCL1_C2, uhcdecmap) +print "Generating UHC decode map..." +codebunch = [] +genmap_decode(codebunch, "cp949ext", UHCL1_C1, UHCL1_C2, uhcdecmap) +genmap_decode(codebunch, "cp949ext", UHCL2_C1, UHCL2_C2, uhcdecmap) +print_decmap(omap, codebunch, "cp949ext", uhcdecmap) -print "Generating UHC Level 2 decode map..." -genmap_decode(omap, "cp949", UHCL2_C1, UHCL2_C2, uhcdecmap) +omap = open('map_cp949.h', 'w') +printcopyright(omap) -print "Generating UHC decode map index..." -print_decmapindex(omap, "cp949", uhcdecmap) +print "Generating CP949 (includes KS X 1001) encode map..." +codebunch =[] +genmap_encode(codebunch, "cp949", cp949encmap) +print_encmap(omap, codebunch, "cp949", cp949encmap) print "\nDone!" - # ex: ts=8 sts=4 et 1.4 +74 -25 cjkcodecs/tools/genmap_support.py Index: genmap_support.py =================================================================== RCS file: /cvsroot/koco/cjkcodecs/tools/genmap_support.py,v retrieving revision 1.3 retrieving revision 1.4 diff -u -r1.3 -r1.4 --- genmap_support.py 22 Apr 2003 21:04:36 -0000 1.3 +++ genmap_support.py 17 May 2003 16:16:57 -0000 1.4 @@ -26,14 +26,14 @@ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # -# $Id: genmap_support.py,v 1.3 2003/04/22 21:04:36 perky Exp $ +# $Id: genmap_support.py,v 1.4 2003/05/17 16:16:57 perky Exp $ # import re COPYRIGHT_HEADER = """\ /* - * $Id: genmap_support.py,v 1.3 2003/04/22 21:04:36 perky Exp $ + * $Id: genmap_support.py,v 1.4 2003/05/17 16:16:57 perky Exp $ */ """ re_UNIMAPDATE = re.compile('Date:\s*([ a-zA-Z0-9/]*)') @@ -42,7 +42,56 @@ def printcopyright(fo): print >> fo, COPYRIGHT_HEADER -def genmap_decode(fo, prefix, c1range, c2range, dmap, onlymask=()): +def genmap_encode(codebunch, prefix, emap): + for c1 in range(0, 256): + if not emap.has_key(c1): + continue + c2map = emap[c1] + rc2values = c2map.keys() + rc2values.sort() + if not rc2values: + continue + + c2map[prefix] = True + c2map['min'] = rc2values[0] + c2map['max'] = rc2values[-1] + c2map['midx'] = len(codebunch) + + for v in range(rc2values[0], rc2values[-1] + 1): + if c2map.has_key(v): + codebunch.append('0x%04x,' % c2map[v]) + else: + codebunch.append('UNIINV,') + +def print_encmap(fo, codebunch, fmapprefix, fmap, f2map={}, f2mapprefix=''): + print >> fo, ("static const Py_UNICODE __%s_encmap[%d] = {" % ( + fmapprefix, len(codebunch))) + i = 0 + while i < len(codebunch): + dp = codebunch[i:i+8] + i += 8 + print >> fo, ' ', ' '.join(dp) + print >> fo, "};" + print >> fo + + print >> fo, "static const struct dbcs_index %s_encmap[256] = {" % (fmapprefix) + for i in range(256): + if fmap.has_key(i) and fmap[i].has_key(fmapprefix): + map = fmap + prefix = fmapprefix + elif f2map.has_key(i) and f2map[i].has_key(f2mapprefix): + map = f2map + prefix = f2mapprefix + else: + print >> fo, "/* 0x%02X */ {0, 0, 0}," % i + continue + + print >> fo, "/* 0x%02X */ {__%s_encmap+%d, 0x%02x, 0x%02x}," % ( + i, prefix, map[i]['midx'], map[i]['min'], map[i]['max']) + print >> fo, "};" + print >> fo + +def genmap_decode(codebunch, prefix, c1range, c2range, dmap, onlymask=()): c2width = c2range[1] - c2range[0] + 1 c2values = range(c2range[0], c2range[1] + 1) @@ -53,31 +102,31 @@ rc2values = [n for n in c2values if c2map.has_key(n)] if not rc2values: continue - rc2values = range(rc2values[0], rc2values[-1] + 1) - rc2width = len(rc2values) - print >> fo, ("static const Py_UNICODE __%s_decmap_%02X[%d] = {" - " /* %02X::%02X-%02X */" - % (prefix, c1, rc2width, c1, rc2values[0], rc2values[-1])) c2map[prefix] = True c2map['min'] = rc2values[0] c2map['max'] = rc2values[-1] + c2map['midx'] = len(codebunch) + + for v in range(rc2values[0], rc2values[-1] + 1): + if c2map.has_key(v): + codebunch.append('0x%04x,' % c2map[v]) + else: + codebunch.append('UNIINV,') + +def print_decmap(fo, codebunch, fmapprefix, fmap, f2map={}, f2mapprefix=''): + print >> fo, ("static const Py_UNICODE __%s_decmap[%d] = {" % ( + fmapprefix, len(codebunch))) + i = 0 + while i < len(codebunch): + dp = codebunch[i:i+8] + i += 8 + print >> fo, ' ', ' '.join(dp) + print >> fo, "};" + print >> fo - while rc2values: - dp = rc2values[:8] - del rc2values[:8] - print >> fo, ' ', ' '.join([ - c2map.has_key(i) and - ("0x%04x," % c2map[i]) or "UNIINV," - for i in dp - ]) - print >> fo, "};" - print >> fo - -def print_decmapindex(fo, fmapprefix, fmap, - f2map={}, f2mapprefix='', rng=(0x80, 0x100)): - print >> fo, "static const struct dbcs_index %s_decmap[128] = {" % (fmapprefix) - for i in range(*rng): + print >> fo, "static const struct dbcs_index %s_decmap[256] = {" % (fmapprefix) + for i in range(256): if fmap.has_key(i) and fmap[i].has_key(fmapprefix): map = fmap prefix = fmapprefix @@ -88,8 +137,8 @@ print >> fo, "/* 0x%02X */ {0, 0, 0}," % i continue - print >> fo, "/* 0x%02X */ {__%s_decmap_%02X, 0x%02x, 0x%02x}," % ( - i, prefix, i, map[i]['min'], map[i]['max']) + print >> fo, "/* 0x%02X */ {__%s_decmap+%d, 0x%02x, 0x%02x}," % ( + i, prefix, map[i]['midx'], map[i]['min'], map[i]['max']) print >> fo, "};" print >> fo |