perky 03/01/09 17:56:48
Added: tools generate_mackorean_mapping.py
Log:
Add generator utility for MacKorean extension mapping.
Revision Changes Path
1.1 KoreanCodecs/tools/generate_mackorean_mapping.py
Index: generate_mackorean_mapping.py
===================================================================
#!/usr/bin/env python
#
# This file is part of KoreanCodecs.
#
# Copyright(C) 2002-2003 Hye-Shik Chang <pe...@Fr...>.
#
# KoreanCodecs is free software; you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published
# by the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# KoreanCodecs is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with KoreanCodecs; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# $Id: generate_mackorean_mapping.py,v 1.1 2003/01/10 01:56:48 perky Exp $
#
decmap = {}
multi_encmap = {}
def unichrs(s):
return u''.join(map(unichr, map(eval, s.split('+'))))
for line in open("MacKorean.txt"):
data = line.split('#')[0].strip().split()
if len(data) != 2:
continue
macv = eval(data[0])
if macv >= 0x100:
macv = chr(macv >> 8) + chr(macv & 0xff)
else:
macv = chr(macv)
univ = unichrs(data[1])
decmap[macv] = univ
if len(univ) > 1:
em = multi_encmap
for mc in univ:
em.setdefault(mc, {})
em = em[mc]
em[None] = macv
decmapkeys = decmap.keys()
decmapkeys.sort()
print "decoding_map = {"
for dk in decmapkeys:
print repr(dk) + ':', repr(decmap[dk]) + ','
print "}"
def printencmap(depth, label, em):
indent = (" " * depth)[:-1]
print indent, "%s: {" % repr(label)
if em.has_key(None):
print indent, " None: %s," % repr(em[None])
for ek in em:
if ek is not None:
printencmap(depth + 1, ek, em[ek])
print indent, "},"
printencmap(0, '', multi_encmap)
# ex: ts=8 sts=4 et
|