Thread: [KoCo-CVS] [Commit] cjkcodecs/tools genmap_tchinese.py
Brought to you by:
perky
From: Hye-Shik C. <pe...@us...> - 2003-05-28 06:51:35
|
perky 03/05/27 23:51:34 Modified: tools genmap_tchinese.py Log: Fix wrong overriding cp950 encmap over big5's. Revision Changes Path 1.2 +9 -3 cjkcodecs/tools/genmap_tchinese.py Index: genmap_tchinese.py =================================================================== RCS file: /cvsroot/koco/cjkcodecs/tools/genmap_tchinese.py,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- genmap_tchinese.py 17 May 2003 20:33:06 -0000 1.1 +++ genmap_tchinese.py 28 May 2003 06:51:33 -0000 1.2 @@ -26,7 +26,7 @@ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # -# $Id: genmap_tchinese.py,v 1.1 2003/05/17 20:33:06 perky Exp $ +# $Id: genmap_tchinese.py,v 1.2 2003/05/28 06:51:33 perky Exp $ # from genmap_support import * @@ -60,13 +60,19 @@ if not (not big5decmap.has_key(c1) or not big5decmap[c1].has_key(c2) or big5decmap[c1][c2] != code): del cp950decmap[c1][c2] - else: - cp950encmap.setdefault(code >> 8, {}) + cp950encmap.setdefault(code >> 8, {}) + if not cp950encmap[code >> 8].has_key(code & 0xff): cp950encmap[code >> 8][code & 0xff] = c1 << 8 | c2 for c1, m in big5decmap.items(): for c2, code in m.items(): big5encmap.setdefault(code >> 8, {}) big5encmap[code >> 8][code & 0xff] = c1 << 8 | c2 + if (cp950encmap.has_key(code >> 8) and + cp950encmap[code >> 8].has_key(code & 0xff) and + cp950encmap[code >> 8][code & 0xff] == c1 << 8 | c2): + del cp950encmap[code >> 8][code & 0xff] + if not cp950encmap[code >> 8]: + del cp950encmap[code >>8] omap = open('map_big5.h', 'w') printcopyright(omap) |
From: Hye-Shik C. <pe...@us...> - 2003-06-19 17:49:07
|
perky 03/06/19 10:49:01 Modified: tools genmap_tchinese.py Log: Fix some big5 mappings to give more roundtrip compatibility. Revision Changes Path 1.4 +18 -1 cjkcodecs/tools/genmap_tchinese.py Index: genmap_tchinese.py =================================================================== RCS file: /cvsroot/koco/cjkcodecs/tools/genmap_tchinese.py,v retrieving revision 1.3 retrieving revision 1.4 diff -u -r1.3 -r1.4 --- genmap_tchinese.py 10 Jun 2003 11:12:12 -0000 1.3 +++ genmap_tchinese.py 19 Jun 2003 17:49:01 -0000 1.4 @@ -26,7 +26,7 @@ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # -# $Id: genmap_tchinese.py,v 1.3 2003/06/10 11:12:12 perky Exp $ +# $Id: genmap_tchinese.py,v 1.4 2003/06/19 17:49:01 perky Exp $ # from genmap_support import * @@ -54,6 +54,19 @@ print "Loading Mapping File..." cp950decmap = loadmap(cp950map) big5decmap = loadmap(big5map) + +# big5 mapping fix (see doc/NOTES.big5) +for m in """\ +0xA15A 0x2574 +0xA1C3 0xFFE3 +0xA1C5 0x02CD +0xA1FE 0xFF0F +0xA240 0xFF3C +0xA2CC 0x5341 +0xA2CE 0x5345""".splitlines(): + bcode, ucode = map(eval, m.split()) + big5decmap[bcode >> 8][bcode & 0xff] = ucode + big5encmap, cp950encmap = {}, {} for c1, m in cp950decmap.items(): for c2, code in m.items(): @@ -73,6 +86,10 @@ del cp950encmap[code >> 8][code & 0xff] if not cp950encmap[code >> 8]: del cp950encmap[code >>8] + +# fix unicode->big5 duplicated mapping priority +big5encmap[0x53][0x41] = 0xA451 +big5encmap[0x53][0x45] = 0xA4CA omap = open('map_big5.h', 'w') printcopyright(omap) |
From: Hye-Shik C. <pe...@us...> - 2003-06-20 09:04:54
|
perky 03/06/20 02:04:53 Modified: tools genmap_tchinese.py Log: - Tweaked some mapping for cp932 and cp950 to make more consistency with MS Windows. - CP932: Added single byte "UNDEFINED" characters 0x80, 0xa0, 0xfd, 0xfe, 0xff (documented on NOTES.cp932) - CP950: Changed encode mappings to another more popular for duplicated unicode points: 5341 -> A451, 5345 -> A4CA - A unittest for big5 mapping is added. - Fixed a bug that cp932 codec couldn't decode half-width katakana. Revision Changes Path 1.5 +21 -14 cjkcodecs/tools/genmap_tchinese.py Index: genmap_tchinese.py =================================================================== RCS file: /cvsroot/koco/cjkcodecs/tools/genmap_tchinese.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -r1.4 -r1.5 --- genmap_tchinese.py 19 Jun 2003 17:49:01 -0000 1.4 +++ genmap_tchinese.py 20 Jun 2003 09:04:53 -0000 1.5 @@ -26,7 +26,7 @@ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # -# $Id: genmap_tchinese.py,v 1.4 2003/06/19 17:49:01 perky Exp $ +# $Id: genmap_tchinese.py,v 1.5 2003/06/20 09:04:53 perky Exp $ # from genmap_support import * @@ -68,28 +68,35 @@ big5decmap[bcode >> 8][bcode & 0xff] = ucode big5encmap, cp950encmap = {}, {} +for c1, m in big5decmap.items(): + for c2, code in m.items(): + big5encmap.setdefault(code >> 8, {}) + if not big5encmap[code >> 8].has_key(code & 0xff): + big5encmap[code >> 8][code & 0xff] = c1 << 8 | c2 for c1, m in cp950decmap.items(): for c2, code in m.items(): - if not (not big5decmap.has_key(c1) or not big5decmap[c1].has_key(c2) - or big5decmap[c1][c2] != code): - del cp950decmap[c1][c2] cp950encmap.setdefault(code >> 8, {}) if not cp950encmap[code >> 8].has_key(code & 0xff): cp950encmap[code >> 8][code & 0xff] = c1 << 8 | c2 -for c1, m in big5decmap.items(): - for c2, code in m.items(): - big5encmap.setdefault(code >> 8, {}) - big5encmap[code >> 8][code & 0xff] = c1 << 8 | c2 - if (cp950encmap.has_key(code >> 8) and - cp950encmap[code >> 8].has_key(code & 0xff) and - cp950encmap[code >> 8][code & 0xff] == c1 << 8 | c2): - del cp950encmap[code >> 8][code & 0xff] - if not cp950encmap[code >> 8]: - del cp950encmap[code >>8] # fix unicode->big5 duplicated mapping priority +big5encmap[0xFF][0x0F] = 0xA241 +big5encmap[0xFF][0x3C] = 0xA242 big5encmap[0x53][0x41] = 0xA451 big5encmap[0x53][0x45] = 0xA4CA +cp950encmap[0x53][0x41] = 0xA451 +cp950encmap[0x53][0x45] = 0xA4CA + +for c1, m in cp950encmap.items(): + for c2, code in m.items(): + if (big5encmap.has_key(c1) and big5encmap[c1].has_key(c2) + and big5encmap[c1][c2] == code): + del cp950encmap[c1][c2] +for c1, m in cp950decmap.items(): + for c2, code in m.items(): + if (big5decmap.has_key(c1) and big5decmap[c1].has_key(c2) + and big5decmap[c1][c2] == code): + del cp950decmap[c1][c2] omap = open('map_big5.h', 'w') printcopyright(omap) |