Thread: [KoCo-CVS] [Commit] cjkcodecs/tools genmap_japanese.py genmap_support.py genmap_ja_codecs.py
Brought to you by:
perky
From: Hye-Shik C. <pe...@us...> - 2003-04-22 19:24:28
|
perky 03/04/22 12:24:25 Modified: tools genmap_support.py Added: tools genmap_japanese.py Removed: tools genmap_ja_codecs.py Log: Change to new framework Revision Changes Path 1.2 +4 -8 cjkcodecs/tools/genmap_support.py Index: genmap_support.py =================================================================== RCS file: /cvsroot/koco/cjkcodecs/tools/genmap_support.py,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- genmap_support.py 20 Apr 2003 17:35:32 -0000 1.1 +++ genmap_support.py 22 Apr 2003 19:24:25 -0000 1.2 @@ -26,25 +26,21 @@ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # -# $Id: genmap_support.py,v 1.1 2003/04/20 17:35:32 perky Exp $ +# $Id: genmap_support.py,v 1.2 2003/04/22 19:24:25 perky Exp $ # import re COPYRIGHT_HEADER = """\ /* - * %(filename)s - * Mapping Tables for %(encodingnames)s - * - * Generated from %(sourcename)s as of %(sourceversion)s - * $Id: genmap_support.py,v 1.1 2003/04/20 17:35:32 perky Exp $ + * $Id: genmap_support.py,v 1.2 2003/04/22 19:24:25 perky Exp $ */ """ re_UNIMAPDATE = re.compile('Date:\s*([ a-zA-Z0-9/]*)') re_UNIMAPVERSION= re.compile('Table version:\s*([0-9.]+)') -def printcopyright(fo, **data): - print >> fo, COPYRIGHT_HEADER % data +def printcopyright(fo): + print >> fo, COPYRIGHT_HEADER def genmap_decode(fo, prefix, c1range, c2range, dmap, onlymask=()): c2width = c2range[1] - c2range[0] + 1 1.1 cjkcodecs/tools/genmap_japanese.py Index: genmap_japanese.py =================================================================== # # genmap_ja_codecs.py: Japanese Codecs Map Generator # # Copyright (C) 2003 Hye-Shik Chang <pe...@Fr...>. # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # # $Id: genmap_japanese.py,v 1.1 2003/04/22 19:24:24 perky Exp $ # from genmap_support import * JISX0208_C1 = (0x21, 0x74) JISX0208_C2 = (0x21, 0x7e) JISX0212_C1 = (0x22, 0x6d) JISX0212_C2 = (0x21, 0x7e) CP932P0_C1 = (0x81, 0x81) # patches between shift-jis and cp932 CP932P0_C2 = (0x5f, 0xca) CP932P1_C1 = (0x87, 0x87) # CP932 P1 CP932P1_C2 = (0x40, 0x9c) CP932P2_C1 = (0xed, 0xfc) # CP932 P2 CP932P2_C2 = (0x40, 0xfc) try: jisx0208file = open('JIS0208.TXT') except IOError: print "=>> Please download mapping table from http://www.unicode." \ "org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/JIS0208.TXT" raise SystemExit try: jisx0212file = open('JIS0212.TXT') except IOError: print "=>> Please download mapping table from http://www.unicode." \ "org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/JIS0212.TXT" raise SystemExit try: cp932file = open('CP932.TXT') except IOError: print "=>> Please download mapping table from http://www.unicode." \ "org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP932.TXT" raise SystemExit print "Loading Mapping File..." jisx0208datever, sjisdecmap = loadmap(jisx0208file, natcol=0, unicol=2) jisx0208datever, jisx0208decmap = loadmap(jisx0208file, natcol=1, unicol=2) jisx0212datever, jisx0212decmap = loadmap(jisx0212file) cp932datever, cp932decmap = loadmap(cp932file) sjisencmap, cp932encmap = {}, {} cp932diff = {} for c1, m in cp932decmap.items(): for c2, code in m.items(): cp932encmap[code] = (c1, c2) if sjisdecmap.has_key(c1) and sjisdecmap[c1].has_key(c2): sjisencmap[sjisdecmap[c1][c2]] = (c1, c2) if sjisdecmap[c1][c2] != code: cp932diff[(c1, c2)] = (sjisdecmap[c1][c2], code) else: del cp932decmap[c1][c2] if not cp932decmap[c1]: del cp932decmap[c1] difmap = [] for uni, (c1, c2) in cp932encmap.iteritems(): if sjisencmap.has_key(uni): s1, s2 = sjisencmap[uni] if (s1, s2) != (c1, c2): difmap.append(uni) omap = open("map_jisx0208.h", "w") printcopyright(omap) print "Generating JIS X 0208 decode map..." genmap_decode(omap, "jisx0208_decode", JISX0208_C1, JISX0208_C2, jisx0208decmap) print "Generating JIS X 0208 decode map index..." print_decmapindex(omap, "jisx0208_decode", jisx0208decmap, rng=(0, 128)) omap = open("map_jisx0212.h", "w") printcopyright(omap) print "Generating JIS X 0212 decode map..." genmap_decode(omap, "jisx0212_decode", JISX0212_C1, JISX0212_C2, jisx0212decmap) print "Generating JIS X 0212 decode map index..." print_decmapindex(omap, "jisx0212_decode", jisx0212decmap, rng=(0, 128)) omap = open("map_cp932.h", "w") printcopyright(omap) print "Generating CP932 decode map..." genmap_decode(omap, "cp932_decode", CP932P0_C1, CP932P0_C2, cp932decmap) genmap_decode(omap, "cp932_decode", CP932P1_C1, CP932P1_C2, cp932decmap) genmap_decode(omap, "cp932_decode", CP932P2_C1, CP932P2_C2, cp932decmap) print "Generating CP932 decode map index..." print_decmapindex(omap, "cp932_decode", cp932decmap) print "Generating CP932 Tweaks..." if difmap: print >> omap, "#define CP932_TWEAKUNIMAP(umap)", for uni in difmap: print >> omap, "\\" print >> omap, "\t(umap)[0x%02x][0x%02x] = NOCHAR;" % ( uni >> 8, uni & 0xFF), print >> omap print "\nDone!" # ex: ts=8 sts=4 et |