[KoCo-CVS] [Commit] cjkcodecs/tools genmap_tchinese.py
Brought to you by:
perky
From: Hye-Shik C. <pe...@us...> - 2003-06-20 09:04:54
|
perky 03/06/20 02:04:53 Modified: tools genmap_tchinese.py Log: - Tweaked some mapping for cp932 and cp950 to make more consistency with MS Windows. - CP932: Added single byte "UNDEFINED" characters 0x80, 0xa0, 0xfd, 0xfe, 0xff (documented on NOTES.cp932) - CP950: Changed encode mappings to another more popular for duplicated unicode points: 5341 -> A451, 5345 -> A4CA - A unittest for big5 mapping is added. - Fixed a bug that cp932 codec couldn't decode half-width katakana. Revision Changes Path 1.5 +21 -14 cjkcodecs/tools/genmap_tchinese.py Index: genmap_tchinese.py =================================================================== RCS file: /cvsroot/koco/cjkcodecs/tools/genmap_tchinese.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -r1.4 -r1.5 --- genmap_tchinese.py 19 Jun 2003 17:49:01 -0000 1.4 +++ genmap_tchinese.py 20 Jun 2003 09:04:53 -0000 1.5 @@ -26,7 +26,7 @@ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # -# $Id: genmap_tchinese.py,v 1.4 2003/06/19 17:49:01 perky Exp $ +# $Id: genmap_tchinese.py,v 1.5 2003/06/20 09:04:53 perky Exp $ # from genmap_support import * @@ -68,28 +68,35 @@ big5decmap[bcode >> 8][bcode & 0xff] = ucode big5encmap, cp950encmap = {}, {} +for c1, m in big5decmap.items(): + for c2, code in m.items(): + big5encmap.setdefault(code >> 8, {}) + if not big5encmap[code >> 8].has_key(code & 0xff): + big5encmap[code >> 8][code & 0xff] = c1 << 8 | c2 for c1, m in cp950decmap.items(): for c2, code in m.items(): - if not (not big5decmap.has_key(c1) or not big5decmap[c1].has_key(c2) - or big5decmap[c1][c2] != code): - del cp950decmap[c1][c2] cp950encmap.setdefault(code >> 8, {}) if not cp950encmap[code >> 8].has_key(code & 0xff): cp950encmap[code >> 8][code & 0xff] = c1 << 8 | c2 -for c1, m in big5decmap.items(): - for c2, code in m.items(): - big5encmap.setdefault(code >> 8, {}) - big5encmap[code >> 8][code & 0xff] = c1 << 8 | c2 - if (cp950encmap.has_key(code >> 8) and - cp950encmap[code >> 8].has_key(code & 0xff) and - cp950encmap[code >> 8][code & 0xff] == c1 << 8 | c2): - del cp950encmap[code >> 8][code & 0xff] - if not cp950encmap[code >> 8]: - del cp950encmap[code >>8] # fix unicode->big5 duplicated mapping priority +big5encmap[0xFF][0x0F] = 0xA241 +big5encmap[0xFF][0x3C] = 0xA242 big5encmap[0x53][0x41] = 0xA451 big5encmap[0x53][0x45] = 0xA4CA +cp950encmap[0x53][0x41] = 0xA451 +cp950encmap[0x53][0x45] = 0xA4CA + +for c1, m in cp950encmap.items(): + for c2, code in m.items(): + if (big5encmap.has_key(c1) and big5encmap[c1].has_key(c2) + and big5encmap[c1][c2] == code): + del cp950encmap[c1][c2] +for c1, m in cp950decmap.items(): + for c2, code in m.items(): + if (big5decmap.has_key(c1) and big5decmap[c1].has_key(c2) + and big5decmap[c1][c2] == code): + del cp950decmap[c1][c2] omap = open('map_big5.h', 'w') printcopyright(omap) |