[KoCo-CVS] [Commit] cjkcodecs/tools genmap_japanese.py genmap_support.py
Brought to you by:
perky
From: Hye-Shik C. <pe...@us...> - 2003-07-07 04:47:25
|
perky 03/07/06 21:47:24 Modified: tools genmap_japanese.py genmap_support.py Log: Add jisx0213 generators Revision Changes Path 1.8 +157 -0 cjkcodecs/tools/genmap_japanese.py Index: genmap_japanese.py =================================================================== RCS file: /cvsroot/koco/cjkcodecs/tools/genmap_japanese.py,v retrieving revision 1.7 retrieving revision 1.8 diff -u -r1.7 -r1.8 --- genmap_japanese.py 10 Jun 2003 11:12:12 -0000 1.7 +++ genmap_japanese.py 7 Jul 2003 04:47:24 -0000 1.8 @@ -26,7 +26,7 @@ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # -# $Id: genmap_japanese.py,v 1.7 2003/06/10 11:12:12 perky Exp $ +# $Id: genmap_japanese.py,v 1.8 2003/07/07 04:47:24 perky Exp $ # from genmap_support import * @@ -35,6 +35,8 @@ JISX0208_C2 = (0x21, 0x7e) JISX0212_C1 = (0x22, 0x6d) JISX0212_C2 = (0x21, 0x7e) +JISX0213_C1 = (0x21, 0x7e) +JISX0213_C2 = (0x21, 0x7e) CP932P0_C1 = (0x81, 0x81) # patches between shift-jis and cp932 CP932P0_C2 = (0x5f, 0xca) CP932P1_C1 = (0x87, 0x87) # CP932 P1 @@ -60,12 +62,66 @@ print "=>> Please download mapping table from http://www.unicode." \ "org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP932.TXT" raise SystemExit +try: + jisx0213file = open('jisx0213-2000-std.txt') +except IOError: + print "=>> Please download mapping table from http://wakaba-web." \ + "hp.infoseek.co.jp/table/jisx0213-2000-std.txt" + raise SystemExit + +def loadmap_jisx0213(fo): + decmap3, decmap4 = {}, {} # maps to BMP for level 3 and 4 + decmap3_2, decmap4_2 = {}, {} # maps to U+2xxxx for level 3 and 4 + decmap3_pair = {} # maps to BMP-pair for level 3 + for line in fo: + line = line.split('#', 1)[0].strip() + if not line or len(line.split()) < 2: continue + + row = line.split() + loc = eval('0x' + row[0][2:]) + level = eval(row[0][0]) + m = None + if len(row[1].split('+')) == 2: # single unicode + uni = eval('0x' + row[1][2:]) + if level == 3: + if uni < 0x10000: + m = decmap3 + elif 0x20000 <= uni < 0x30000: + uni -= 0x20000 + m = decmap3_2 + elif level == 4: + if uni < 0x10000: + m = decmap4 + elif 0x20000 <= uni < 0x30000: + uni -= 0x20000 + m = decmap4_2 + m.setdefault((loc >> 8), {}) + m[(loc >> 8)][(loc & 0xff)] = uni + else: # pair + uniprefix = eval('0x' + row[1][2:6]) # body + uni = eval('0x' + row[1][7:11]) # modifier + if level != 3: + raise ValueError, "invalid map" + decmap3_pair.setdefault(uniprefix, {}) + m = decmap3_pair[uniprefix] + + if m is None: + raise ValueError, "invalid map" + m.setdefault((loc >> 8), {}) + m[(loc >> 8)][(loc & 0xff)] = uni + + return decmap3, decmap4, decmap3_2, decmap4_2, decmap3_pair print "Loading Mapping File..." sjisdecmap = loadmap(jisx0208file, natcol=0, unicol=2) jisx0208decmap = loadmap(jisx0208file, natcol=1, unicol=2) jisx0212decmap = loadmap(jisx0212file) cp932decmap = loadmap(cp932file) +jis3decmap, jis4decmap, jis3_2_decmap, jis4_2_decmap, jis3_pairdecmap \ += loadmap_jisx0213(jisx0213file) +if jis3decmap[0x21][0x24] != 0xff0c: + print "Please adjust your JIS X 0213 map using jisx0213-2000-std.txt.diff" + raise SystemExit sjisencmap, cp932encmap = {}, {} jisx0208_0212encmap = {} @@ -86,6 +142,15 @@ if not cp932encmap[c1]: del cp932encmap[c1] +jisx0213pairdecmap = {} +jisx0213pairencmap = [] +for unibody, m1 in jis3_pairdecmap.iteritems(): + for c1, m2 in m1.iteritems(): + for c2, modifier in m2.iteritems(): + jisx0213pairencmap.append((unibody, modifier, c1 << 8 | c2)) + jisx0213pairdecmap.setdefault(c1, {}) + jisx0213pairdecmap[c1][c2] = unibody << 16 | modifier + # Twinmap for both of JIS X 0208 (MSB unset) and JIS X 0212 (MSB set) for c1, m in jisx0208decmap.items(): for c2, code in m.items(): @@ -98,6 +163,43 @@ print "OOPS!!!", (code) jisx0208_0212encmap[code >> 8][code & 0xff] = 0x8000 | c1 << 8 | c2 +jisx0213bmpencmap = {} +for c1, m in jis3decmap.items(): + for c2, code in m.items(): + if jisx0208decmap.has_key(c1) and jisx0208decmap[c1].has_key(c2): + if jis3_pairdecmap.has_key(code): + jisx0213bmpencmap[code >> 8][code & 0xff] = (0,) # pair + jisx0213pairencmap.append((code, 0, c1 << 8 | c2)) + elif jisx0208decmap[c1][c2] == code: + del jis3decmap[c1][c2] + if not jis3decmap[c1]: + del jis3decmap[c1] + else: + raise ValueError, "Difference between JIS X 0208 and " \ + "JIS X 0213 Plane 1 is found." + else: + jisx0213bmpencmap.setdefault(code >> 8, {}) + if not jis3_pairdecmap.has_key(code): + jisx0213bmpencmap[code >> 8][code & 0xff] = c1 << 8 | c2 + else: + jisx0213bmpencmap[code >> 8][code & 0xff] = (0,) # pair + jisx0213pairencmap.append((code, 0, c1 << 8 | c2)) + +for c1, m in jis4decmap.iteritems(): + for c2, code in m.iteritems(): + jisx0213bmpencmap.setdefault(code >> 8, {}) + jisx0213bmpencmap[code >> 8][code & 0xff] = 0x8000 | c1 << 8 | c2 + +jisx0213empencmap = {} +for c1, m in jis3_2_decmap.iteritems(): + for c2, code in m.iteritems(): + jisx0213empencmap.setdefault(code >> 8, {}) + jisx0213empencmap[code >> 8][code & 0xff] = c1 << 8 | c2 +for c1, m in jis4_2_decmap.iteritems(): + for c2, code in m.iteritems(): + jisx0213empencmap.setdefault(code >> 8, {}) + jisx0213empencmap[code >> 8][code & 0xff] = 0x8000 | c1 << 8 | c2 + omap = open("map_jisx0208.h", "w") printcopyright(omap) @@ -133,6 +235,61 @@ codebunch =[] genmap_encode(codebunch, "cp932ext", cp932encmap) print_encmap(omap, codebunch, "cp932ext", cp932encmap) + +omap = open("map_jisx0213.h", "w") +printcopyright(omap) + +print "Generating JIS X 0213 Plane 1 BMP decode map..." +codebunch =[] +genmap_decode(codebunch, "jisx0213_1_bmp", JISX0213_C1, JISX0213_C2, jis3decmap) +print_decmap(omap, codebunch, "jisx0213_1_bmp", jis3decmap) + +print "Generating JIS X 0213 Plane 2 BMP decode map..." +codebunch =[] +genmap_decode(codebunch, "jisx0213_2_bmp", JISX0213_C1, JISX0213_C2, jis4decmap) +print_decmap(omap, codebunch, "jisx0213_2_bmp", jis4decmap) + +print "Generating JIS X 0213 BMP encode map..." +codebunch =[] +genmap_encode(codebunch, "jisx0213_bmp", jisx0213bmpencmap) +print_encmap(omap, codebunch, "jisx0213_bmp", jisx0213bmpencmap) + +print "Generating JIS X 0213 Plane 1 EMP decode map..." +codebunch =[] +genmap_decode(codebunch, "jisx0213_1_emp", + JISX0213_C1, JISX0213_C2, jis3_2_decmap) +print_decmap(omap, codebunch, "jisx0213_1_emp", jis3_2_decmap) + +print "Generating JIS X 0213 Plane 2 EMP decode map..." +codebunch =[] +genmap_decode(codebunch, "jisx0213_2_emp", + JISX0213_C1, JISX0213_C2, jis4_2_decmap) +print_decmap(omap, codebunch, "jisx0213_2_emp", jis4_2_decmap) + +print "Generating JIS X 0213 EMP encode map..." +codebunch =[] +genmap_encode(codebunch, "jisx0213_emp", jisx0213empencmap) +print_encmap(omap, codebunch, "jisx0213_emp", jisx0213empencmap) + +omap = open("map_jisx0213_pairs.h", "w") +printcopyright(omap) + +print "Generating JIS X 0213 unicode-pair decode map..." +codebunch =[] +genmap_decode(codebunch, "jisx0213_pairdecmap", JISX0213_C1, JISX0213_C2, + jisx0213pairdecmap, wide=1) +print_decmap(omap, codebunch, "jisx0213_pairdecmap", + jisx0213pairdecmap, wide=1) + +print "Generating JIS X 0213 unicode-pair encode map..." +jisx0213pairencmap.sort() +print >> omap, "#define JISX0213_ENCPAIRS %d" % len(jisx0213pairencmap) +print >> omap, "struct pair_encodemap jisx0213_pairencmap" \ + "[JISX0213_ENCPAIRS] = {" +for body, modifier, jis in jisx0213pairencmap: + print >> omap, " { 0x%04x, 0x%04x, 0x%04x }," % ( + body, modifier, jis) +print >> omap, "};" print "\nDone!" # ex: ts=8 sts=4 et 1.10 +37 -13 cjkcodecs/tools/genmap_support.py Index: genmap_support.py =================================================================== RCS file: /cvsroot/koco/cjkcodecs/tools/genmap_support.py,v retrieving revision 1.9 retrieving revision 1.10 diff -u -r1.9 -r1.10 --- genmap_support.py 6 Jul 2003 23:19:26 -0000 1.9 +++ genmap_support.py 7 Jul 2003 04:47:24 -0000 1.10 @@ -26,14 +26,14 @@ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # -# $Id: genmap_support.py,v 1.9 2003/07/06 23:19:26 perky Exp $ +# $Id: genmap_support.py,v 1.10 2003/07/07 04:47:24 perky Exp $ # import re COPYRIGHT_HEADER = """\ /* - * $Id: genmap_support.py,v 1.9 2003/07/06 23:19:26 perky Exp $ + * $Id: genmap_support.py,v 1.10 2003/07/07 04:47:24 perky Exp $ */ """ @@ -56,10 +56,14 @@ c2map['midx'] = len(codebunch) for v in range(rc2values[0], rc2values[-1] + 1): - if c2map.has_key(v): + if not c2map.has_key(v): + codebunch.append('NOCHAR,') + elif isinstance(c2map[v], int): codebunch.append('0x%04x,' % c2map[v]) + elif isinstance(c2map[v], tuple): + codebunch.append('MULTIC,') else: - codebunch.append('NOCHAR,') + raise ValueError def print_encmap(fo, codebunch, fmapprefix, fmap, f2map={}, f2mapprefix=''): print >> fo, ("static const DBCHAR __%s_encmap[%d] = {" % ( @@ -89,7 +93,8 @@ print >> fo, "};" print >> fo -def genmap_decode(codebunch, prefix, c1range, c2range, dmap, onlymask=()): +def genmap_decode(codebunch, prefix, c1range, c2range, + dmap, onlymask=(), wide=0): c2width = c2range[1] - c2range[0] + 1 c2values = range(c2range[0], c2range[1] + 1) @@ -108,22 +113,41 @@ for v in range(rc2values[0], rc2values[-1] + 1): if c2map.has_key(v): - codebunch.append('0x%04x,' % c2map[v]) + if not wide: + codebunch.append('0x%04x,' % c2map[v]) + else: + codebunch.append('0x%08x,' % c2map[v]) else: - codebunch.append('UNIINV,') - -def print_decmap(fo, codebunch, fmapprefix, fmap, f2map={}, f2mapprefix=''): - print >> fo, ("static const ucs2_t __%s_decmap[%d] = {" % ( + if not wide: + codebunch.append('UNIINV,') + else: + codebunch.append(' UNIINV,') + +def print_decmap(fo, codebunch, fmapprefix, fmap, f2map={}, + f2mapprefix='', wide=0): + if not wide: + print >> fo, ("static const ucs2_t __%s_decmap[%d] = {" % ( fmapprefix, len(codebunch))) + width = 8 + else: + print >> fo, ("static const ucs4_t __%s_decmap[%d] = {" % ( + fmapprefix, len(codebunch))) + width = 4 i = 0 while i < len(codebunch): - dp = codebunch[i:i+8] - i += 8 + dp = codebunch[i:i+width] + i += width print >> fo, ' ', ' '.join(dp) print >> fo, "};" print >> fo - print >> fo, "static const struct dbcs_index %s_decmap[256] = {" % (fmapprefix) + if not wide: + print >> fo, "static const struct dbcs_index %s_decmap[256] = {" % ( + fmapprefix) + else: + print >> fo, "static const struct widedbcs_index %s_decmap[256] = {" % ( + fmapprefix) + for i in range(256): if fmap.has_key(i) and fmap[i].has_key(fmapprefix): map = fmap |