Thread: [KoCo-CVS] [Commit] cjkcodecs/tools genmap_japanese.py genmap_support.py
Brought to you by:
perky
From: Hye-Shik C. <pe...@us...> - 2003-04-22 21:04:39
|
perky 03/04/22 14:04:36 Modified: tools genmap_japanese.py genmap_support.py Log: Add dummy map providers for japanese. Revision Changes Path 1.2 +9 -9 cjkcodecs/tools/genmap_japanese.py Index: genmap_japanese.py =================================================================== RCS file: /cvsroot/koco/cjkcodecs/tools/genmap_japanese.py,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- genmap_japanese.py 22 Apr 2003 19:24:24 -0000 1.1 +++ genmap_japanese.py 22 Apr 2003 21:04:36 -0000 1.2 @@ -26,7 +26,7 @@ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # -# $Id: genmap_japanese.py,v 1.1 2003/04/22 19:24:24 perky Exp $ +# $Id: genmap_japanese.py,v 1.2 2003/04/22 21:04:36 perky Exp $ # from genmap_support import * @@ -92,28 +92,28 @@ printcopyright(omap) print "Generating JIS X 0208 decode map..." -genmap_decode(omap, "jisx0208_decode", JISX0208_C1, JISX0208_C2, jisx0208decmap) +genmap_decode(omap, "jisx0208", JISX0208_C1, JISX0208_C2, jisx0208decmap) print "Generating JIS X 0208 decode map index..." -print_decmapindex(omap, "jisx0208_decode", jisx0208decmap, rng=(0, 128)) +print_decmapindex(omap, "jisx0208", jisx0208decmap, rng=(0, 128)) omap = open("map_jisx0212.h", "w") printcopyright(omap) print "Generating JIS X 0212 decode map..." -genmap_decode(omap, "jisx0212_decode", JISX0212_C1, JISX0212_C2, jisx0212decmap) +genmap_decode(omap, "jisx0212", JISX0212_C1, JISX0212_C2, jisx0212decmap) print "Generating JIS X 0212 decode map index..." -print_decmapindex(omap, "jisx0212_decode", jisx0212decmap, rng=(0, 128)) +print_decmapindex(omap, "jisx0212", jisx0212decmap, rng=(0, 128)) omap = open("map_cp932.h", "w") printcopyright(omap) print "Generating CP932 decode map..." -genmap_decode(omap, "cp932_decode", CP932P0_C1, CP932P0_C2, cp932decmap) -genmap_decode(omap, "cp932_decode", CP932P1_C1, CP932P1_C2, cp932decmap) -genmap_decode(omap, "cp932_decode", CP932P2_C1, CP932P2_C2, cp932decmap) +genmap_decode(omap, "cp932", CP932P0_C1, CP932P0_C2, cp932decmap) +genmap_decode(omap, "cp932", CP932P1_C1, CP932P1_C2, cp932decmap) +genmap_decode(omap, "cp932", CP932P2_C1, CP932P2_C2, cp932decmap) print "Generating CP932 decode map index..." -print_decmapindex(omap, "cp932_decode", cp932decmap) +print_decmapindex(omap, "cp932", cp932decmap) print "Generating CP932 Tweaks..." if difmap: 1.3 +27 -18 cjkcodecs/tools/genmap_support.py Index: genmap_support.py =================================================================== RCS file: /cvsroot/koco/cjkcodecs/tools/genmap_support.py,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- genmap_support.py 22 Apr 2003 19:24:25 -0000 1.2 +++ genmap_support.py 22 Apr 2003 21:04:36 -0000 1.3 @@ -26,14 +26,14 @@ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # -# $Id: genmap_support.py,v 1.2 2003/04/22 19:24:25 perky Exp $ +# $Id: genmap_support.py,v 1.3 2003/04/22 21:04:36 perky Exp $ # import re COPYRIGHT_HEADER = """\ /* - * $Id: genmap_support.py,v 1.2 2003/04/22 19:24:25 perky Exp $ + * $Id: genmap_support.py,v 1.3 2003/04/22 21:04:36 perky Exp $ */ """ re_UNIMAPDATE = re.compile('Date:\s*([ a-zA-Z0-9/]*)') @@ -50,21 +50,22 @@ if not dmap.has_key(c1) or (onlymask and c1 not in onlymask): continue c2map = dmap[c1] - for c2 in c2values: - if c2map.has_key(c2): - break - else: + rc2values = [n for n in c2values if c2map.has_key(n)] + if not rc2values: continue + rc2values = range(rc2values[0], rc2values[-1] + 1) + rc2width = len(rc2values) - print >> fo, ("static const Py_UNICODE %s_%02X[%d] = {" + print >> fo, ("static const Py_UNICODE __%s_decmap_%02X[%d] = {" " /* %02X::%02X-%02X */" - % (prefix, c1, c2width, c1, c2range[0], c2range[1])) + % (prefix, c1, rc2width, c1, rc2values[0], rc2values[-1])) c2map[prefix] = True + c2map['min'] = rc2values[0] + c2map['max'] = rc2values[-1] - c2s = c2values[:] - while c2s: - dp = c2s[:8] - del c2s[:8] + while rc2values: + dp = rc2values[:8] + del rc2values[:8] print >> fo, ' ', ' '.join([ c2map.has_key(i) and ("0x%04x," % c2map[i]) or "UNIINV," @@ -73,16 +74,22 @@ print >> fo, "};" print >> fo -def print_decmapindex(fo, prefix, fmap, +def print_decmapindex(fo, fmapprefix, fmap, f2map={}, f2mapprefix='', rng=(0x80, 0x100)): - print >> fo, "static const Py_UNICODE *%s_map[128] = {" % (prefix) + print >> fo, "static const struct dbcs_index %s_decmap[128] = {" % (fmapprefix) for i in range(*rng): - if fmap.has_key(i) and fmap[i].has_key(prefix): - print >> fo, " %s_%02X, /* 0x%02X */" % (prefix, i, i) + if fmap.has_key(i) and fmap[i].has_key(fmapprefix): + map = fmap + prefix = fmapprefix elif f2map.has_key(i) and f2map[i].has_key(f2mapprefix): - print >> fo, " %s_%02X, /* 0x%02X */" % (f2mapprefix, i, i) + map = f2map + prefix = f2mapprefix else: - print >> fo, " 0, /* 0x%02X */" % i + print >> fo, "/* 0x%02X */ {0, 0, 0}," % i + continue + + print >> fo, "/* 0x%02X */ {__%s_decmap_%02X, 0x%02x, 0x%02x}," % ( + i, prefix, i, map[i]['min'], map[i]['max']) print >> fo, "};" print >> fo @@ -109,3 +116,5 @@ decmap[(loc >> 8)|sethighbit][(loc & 0xff)|sethighbit] = uni return mapdatever, decmap + +# ex: ts=8 sts=4 et |
From: Hye-Shik C. <pe...@us...> - 2003-05-17 21:17:40
|
perky 03/05/17 14:17:39 Modified: tools genmap_japanese.py genmap_support.py Log: Fix unmatched naming Revision Changes Path 1.5 +3 -3 cjkcodecs/tools/genmap_japanese.py Index: genmap_japanese.py =================================================================== RCS file: /cvsroot/koco/cjkcodecs/tools/genmap_japanese.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -r1.4 -r1.5 --- genmap_japanese.py 17 May 2003 16:16:57 -0000 1.4 +++ genmap_japanese.py 17 May 2003 21:17:39 -0000 1.5 @@ -26,7 +26,7 @@ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # -# $Id: genmap_japanese.py,v 1.4 2003/05/17 16:16:57 perky Exp $ +# $Id: genmap_japanese.py,v 1.5 2003/05/17 21:17:39 perky Exp $ # from genmap_support import * @@ -111,8 +111,8 @@ printcopyright(omap) print "Generating JIS X 0208 && JIS X 0212 encode map..." codebunch =[] -genmap_encode(codebunch, "jisx0208_0212", jisx0208_0212encmap) -print_encmap(omap, codebunch, "jisx0208_0212", jisx0208_0212encmap) +genmap_encode(codebunch, "jisxcommon", jisx0208_0212encmap) +print_encmap(omap, codebunch, "jisxcommon", jisx0208_0212encmap) omap = open("map_cp932ext.h", "w") printcopyright(omap) 1.5 +5 -5 cjkcodecs/tools/genmap_support.py Index: genmap_support.py =================================================================== RCS file: /cvsroot/koco/cjkcodecs/tools/genmap_support.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -r1.4 -r1.5 --- genmap_support.py 17 May 2003 16:16:57 -0000 1.4 +++ genmap_support.py 17 May 2003 21:17:39 -0000 1.5 @@ -26,14 +26,14 @@ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # -# $Id: genmap_support.py,v 1.4 2003/05/17 16:16:57 perky Exp $ +# $Id: genmap_support.py,v 1.5 2003/05/17 21:17:39 perky Exp $ # import re COPYRIGHT_HEADER = """\ /* - * $Id: genmap_support.py,v 1.4 2003/05/17 16:16:57 perky Exp $ + * $Id: genmap_support.py,v 1.5 2003/05/17 21:17:39 perky Exp $ */ """ re_UNIMAPDATE = re.compile('Date:\s*([ a-zA-Z0-9/]*)') @@ -61,10 +61,10 @@ if c2map.has_key(v): codebunch.append('0x%04x,' % c2map[v]) else: - codebunch.append('UNIINV,') + codebunch.append('NOCHAR,') def print_encmap(fo, codebunch, fmapprefix, fmap, f2map={}, f2mapprefix=''): - print >> fo, ("static const Py_UNICODE __%s_encmap[%d] = {" % ( + print >> fo, ("static const DBCHAR __%s_encmap[%d] = {" % ( fmapprefix, len(codebunch))) i = 0 while i < len(codebunch): @@ -74,7 +74,7 @@ print >> fo, "};" print >> fo - print >> fo, "static const struct dbcs_index %s_encmap[256] = {" % (fmapprefix) + print >> fo, "static const struct unim_index %s_encmap[256] = {" % (fmapprefix) for i in range(256): if fmap.has_key(i) and fmap[i].has_key(fmapprefix): map = fmap |
From: Hye-Shik C. <pe...@us...> - 2003-07-07 04:47:25
|
perky 03/07/06 21:47:24 Modified: tools genmap_japanese.py genmap_support.py Log: Add jisx0213 generators Revision Changes Path 1.8 +157 -0 cjkcodecs/tools/genmap_japanese.py Index: genmap_japanese.py =================================================================== RCS file: /cvsroot/koco/cjkcodecs/tools/genmap_japanese.py,v retrieving revision 1.7 retrieving revision 1.8 diff -u -r1.7 -r1.8 --- genmap_japanese.py 10 Jun 2003 11:12:12 -0000 1.7 +++ genmap_japanese.py 7 Jul 2003 04:47:24 -0000 1.8 @@ -26,7 +26,7 @@ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # -# $Id: genmap_japanese.py,v 1.7 2003/06/10 11:12:12 perky Exp $ +# $Id: genmap_japanese.py,v 1.8 2003/07/07 04:47:24 perky Exp $ # from genmap_support import * @@ -35,6 +35,8 @@ JISX0208_C2 = (0x21, 0x7e) JISX0212_C1 = (0x22, 0x6d) JISX0212_C2 = (0x21, 0x7e) +JISX0213_C1 = (0x21, 0x7e) +JISX0213_C2 = (0x21, 0x7e) CP932P0_C1 = (0x81, 0x81) # patches between shift-jis and cp932 CP932P0_C2 = (0x5f, 0xca) CP932P1_C1 = (0x87, 0x87) # CP932 P1 @@ -60,12 +62,66 @@ print "=>> Please download mapping table from http://www.unicode." \ "org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP932.TXT" raise SystemExit +try: + jisx0213file = open('jisx0213-2000-std.txt') +except IOError: + print "=>> Please download mapping table from http://wakaba-web." \ + "hp.infoseek.co.jp/table/jisx0213-2000-std.txt" + raise SystemExit + +def loadmap_jisx0213(fo): + decmap3, decmap4 = {}, {} # maps to BMP for level 3 and 4 + decmap3_2, decmap4_2 = {}, {} # maps to U+2xxxx for level 3 and 4 + decmap3_pair = {} # maps to BMP-pair for level 3 + for line in fo: + line = line.split('#', 1)[0].strip() + if not line or len(line.split()) < 2: continue + + row = line.split() + loc = eval('0x' + row[0][2:]) + level = eval(row[0][0]) + m = None + if len(row[1].split('+')) == 2: # single unicode + uni = eval('0x' + row[1][2:]) + if level == 3: + if uni < 0x10000: + m = decmap3 + elif 0x20000 <= uni < 0x30000: + uni -= 0x20000 + m = decmap3_2 + elif level == 4: + if uni < 0x10000: + m = decmap4 + elif 0x20000 <= uni < 0x30000: + uni -= 0x20000 + m = decmap4_2 + m.setdefault((loc >> 8), {}) + m[(loc >> 8)][(loc & 0xff)] = uni + else: # pair + uniprefix = eval('0x' + row[1][2:6]) # body + uni = eval('0x' + row[1][7:11]) # modifier + if level != 3: + raise ValueError, "invalid map" + decmap3_pair.setdefault(uniprefix, {}) + m = decmap3_pair[uniprefix] + + if m is None: + raise ValueError, "invalid map" + m.setdefault((loc >> 8), {}) + m[(loc >> 8)][(loc & 0xff)] = uni + + return decmap3, decmap4, decmap3_2, decmap4_2, decmap3_pair print "Loading Mapping File..." sjisdecmap = loadmap(jisx0208file, natcol=0, unicol=2) jisx0208decmap = loadmap(jisx0208file, natcol=1, unicol=2) jisx0212decmap = loadmap(jisx0212file) cp932decmap = loadmap(cp932file) +jis3decmap, jis4decmap, jis3_2_decmap, jis4_2_decmap, jis3_pairdecmap \ += loadmap_jisx0213(jisx0213file) +if jis3decmap[0x21][0x24] != 0xff0c: + print "Please adjust your JIS X 0213 map using jisx0213-2000-std.txt.diff" + raise SystemExit sjisencmap, cp932encmap = {}, {} jisx0208_0212encmap = {} @@ -86,6 +142,15 @@ if not cp932encmap[c1]: del cp932encmap[c1] +jisx0213pairdecmap = {} +jisx0213pairencmap = [] +for unibody, m1 in jis3_pairdecmap.iteritems(): + for c1, m2 in m1.iteritems(): + for c2, modifier in m2.iteritems(): + jisx0213pairencmap.append((unibody, modifier, c1 << 8 | c2)) + jisx0213pairdecmap.setdefault(c1, {}) + jisx0213pairdecmap[c1][c2] = unibody << 16 | modifier + # Twinmap for both of JIS X 0208 (MSB unset) and JIS X 0212 (MSB set) for c1, m in jisx0208decmap.items(): for c2, code in m.items(): @@ -98,6 +163,43 @@ print "OOPS!!!", (code) jisx0208_0212encmap[code >> 8][code & 0xff] = 0x8000 | c1 << 8 | c2 +jisx0213bmpencmap = {} +for c1, m in jis3decmap.items(): + for c2, code in m.items(): + if jisx0208decmap.has_key(c1) and jisx0208decmap[c1].has_key(c2): + if jis3_pairdecmap.has_key(code): + jisx0213bmpencmap[code >> 8][code & 0xff] = (0,) # pair + jisx0213pairencmap.append((code, 0, c1 << 8 | c2)) + elif jisx0208decmap[c1][c2] == code: + del jis3decmap[c1][c2] + if not jis3decmap[c1]: + del jis3decmap[c1] + else: + raise ValueError, "Difference between JIS X 0208 and " \ + "JIS X 0213 Plane 1 is found." + else: + jisx0213bmpencmap.setdefault(code >> 8, {}) + if not jis3_pairdecmap.has_key(code): + jisx0213bmpencmap[code >> 8][code & 0xff] = c1 << 8 | c2 + else: + jisx0213bmpencmap[code >> 8][code & 0xff] = (0,) # pair + jisx0213pairencmap.append((code, 0, c1 << 8 | c2)) + +for c1, m in jis4decmap.iteritems(): + for c2, code in m.iteritems(): + jisx0213bmpencmap.setdefault(code >> 8, {}) + jisx0213bmpencmap[code >> 8][code & 0xff] = 0x8000 | c1 << 8 | c2 + +jisx0213empencmap = {} +for c1, m in jis3_2_decmap.iteritems(): + for c2, code in m.iteritems(): + jisx0213empencmap.setdefault(code >> 8, {}) + jisx0213empencmap[code >> 8][code & 0xff] = c1 << 8 | c2 +for c1, m in jis4_2_decmap.iteritems(): + for c2, code in m.iteritems(): + jisx0213empencmap.setdefault(code >> 8, {}) + jisx0213empencmap[code >> 8][code & 0xff] = 0x8000 | c1 << 8 | c2 + omap = open("map_jisx0208.h", "w") printcopyright(omap) @@ -133,6 +235,61 @@ codebunch =[] genmap_encode(codebunch, "cp932ext", cp932encmap) print_encmap(omap, codebunch, "cp932ext", cp932encmap) + +omap = open("map_jisx0213.h", "w") +printcopyright(omap) + +print "Generating JIS X 0213 Plane 1 BMP decode map..." +codebunch =[] +genmap_decode(codebunch, "jisx0213_1_bmp", JISX0213_C1, JISX0213_C2, jis3decmap) +print_decmap(omap, codebunch, "jisx0213_1_bmp", jis3decmap) + +print "Generating JIS X 0213 Plane 2 BMP decode map..." +codebunch =[] +genmap_decode(codebunch, "jisx0213_2_bmp", JISX0213_C1, JISX0213_C2, jis4decmap) +print_decmap(omap, codebunch, "jisx0213_2_bmp", jis4decmap) + +print "Generating JIS X 0213 BMP encode map..." +codebunch =[] +genmap_encode(codebunch, "jisx0213_bmp", jisx0213bmpencmap) +print_encmap(omap, codebunch, "jisx0213_bmp", jisx0213bmpencmap) + +print "Generating JIS X 0213 Plane 1 EMP decode map..." +codebunch =[] +genmap_decode(codebunch, "jisx0213_1_emp", + JISX0213_C1, JISX0213_C2, jis3_2_decmap) +print_decmap(omap, codebunch, "jisx0213_1_emp", jis3_2_decmap) + +print "Generating JIS X 0213 Plane 2 EMP decode map..." +codebunch =[] +genmap_decode(codebunch, "jisx0213_2_emp", + JISX0213_C1, JISX0213_C2, jis4_2_decmap) +print_decmap(omap, codebunch, "jisx0213_2_emp", jis4_2_decmap) + +print "Generating JIS X 0213 EMP encode map..." +codebunch =[] +genmap_encode(codebunch, "jisx0213_emp", jisx0213empencmap) +print_encmap(omap, codebunch, "jisx0213_emp", jisx0213empencmap) + +omap = open("map_jisx0213_pairs.h", "w") +printcopyright(omap) + +print "Generating JIS X 0213 unicode-pair decode map..." +codebunch =[] +genmap_decode(codebunch, "jisx0213_pairdecmap", JISX0213_C1, JISX0213_C2, + jisx0213pairdecmap, wide=1) +print_decmap(omap, codebunch, "jisx0213_pairdecmap", + jisx0213pairdecmap, wide=1) + +print "Generating JIS X 0213 unicode-pair encode map..." +jisx0213pairencmap.sort() +print >> omap, "#define JISX0213_ENCPAIRS %d" % len(jisx0213pairencmap) +print >> omap, "struct pair_encodemap jisx0213_pairencmap" \ + "[JISX0213_ENCPAIRS] = {" +for body, modifier, jis in jisx0213pairencmap: + print >> omap, " { 0x%04x, 0x%04x, 0x%04x }," % ( + body, modifier, jis) +print >> omap, "};" print "\nDone!" # ex: ts=8 sts=4 et 1.10 +37 -13 cjkcodecs/tools/genmap_support.py Index: genmap_support.py =================================================================== RCS file: /cvsroot/koco/cjkcodecs/tools/genmap_support.py,v retrieving revision 1.9 retrieving revision 1.10 diff -u -r1.9 -r1.10 --- genmap_support.py 6 Jul 2003 23:19:26 -0000 1.9 +++ genmap_support.py 7 Jul 2003 04:47:24 -0000 1.10 @@ -26,14 +26,14 @@ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # -# $Id: genmap_support.py,v 1.9 2003/07/06 23:19:26 perky Exp $ +# $Id: genmap_support.py,v 1.10 2003/07/07 04:47:24 perky Exp $ # import re COPYRIGHT_HEADER = """\ /* - * $Id: genmap_support.py,v 1.9 2003/07/06 23:19:26 perky Exp $ + * $Id: genmap_support.py,v 1.10 2003/07/07 04:47:24 perky Exp $ */ """ @@ -56,10 +56,14 @@ c2map['midx'] = len(codebunch) for v in range(rc2values[0], rc2values[-1] + 1): - if c2map.has_key(v): + if not c2map.has_key(v): + codebunch.append('NOCHAR,') + elif isinstance(c2map[v], int): codebunch.append('0x%04x,' % c2map[v]) + elif isinstance(c2map[v], tuple): + codebunch.append('MULTIC,') else: - codebunch.append('NOCHAR,') + raise ValueError def print_encmap(fo, codebunch, fmapprefix, fmap, f2map={}, f2mapprefix=''): print >> fo, ("static const DBCHAR __%s_encmap[%d] = {" % ( @@ -89,7 +93,8 @@ print >> fo, "};" print >> fo -def genmap_decode(codebunch, prefix, c1range, c2range, dmap, onlymask=()): +def genmap_decode(codebunch, prefix, c1range, c2range, + dmap, onlymask=(), wide=0): c2width = c2range[1] - c2range[0] + 1 c2values = range(c2range[0], c2range[1] + 1) @@ -108,22 +113,41 @@ for v in range(rc2values[0], rc2values[-1] + 1): if c2map.has_key(v): - codebunch.append('0x%04x,' % c2map[v]) + if not wide: + codebunch.append('0x%04x,' % c2map[v]) + else: + codebunch.append('0x%08x,' % c2map[v]) else: - codebunch.append('UNIINV,') - -def print_decmap(fo, codebunch, fmapprefix, fmap, f2map={}, f2mapprefix=''): - print >> fo, ("static const ucs2_t __%s_decmap[%d] = {" % ( + if not wide: + codebunch.append('UNIINV,') + else: + codebunch.append(' UNIINV,') + +def print_decmap(fo, codebunch, fmapprefix, fmap, f2map={}, + f2mapprefix='', wide=0): + if not wide: + print >> fo, ("static const ucs2_t __%s_decmap[%d] = {" % ( fmapprefix, len(codebunch))) + width = 8 + else: + print >> fo, ("static const ucs4_t __%s_decmap[%d] = {" % ( + fmapprefix, len(codebunch))) + width = 4 i = 0 while i < len(codebunch): - dp = codebunch[i:i+8] - i += 8 + dp = codebunch[i:i+width] + i += width print >> fo, ' ', ' '.join(dp) print >> fo, "};" print >> fo - print >> fo, "static const struct dbcs_index %s_decmap[256] = {" % (fmapprefix) + if not wide: + print >> fo, "static const struct dbcs_index %s_decmap[256] = {" % ( + fmapprefix) + else: + print >> fo, "static const struct widedbcs_index %s_decmap[256] = {" % ( + fmapprefix) + for i in range(256): if fmap.has_key(i) and fmap[i].has_key(fmapprefix): map = fmap |