[KoCo-CVS] [Commit] KoreanCodecs/test CodecTestBase.py test_all.py test_cp949.py test_euc_kr.py test
Brought to you by:
perky
From: Hye-Shik C. <pe...@us...> - 2003-01-12 22:54:14
|
perky 03/01/12 14:54:13 Modified: test CodecTestBase.py test_all.py test_cp949.py test_euc_kr.py test_hangul.py test_iso_2022_kr.py test_johab.py test_mackorean.py test_qwerty2bul.py test_unijohab.py Log: Remove selective framework for two implementations, 'C' and 'Python'. We'll maintain only 1 implementation from now. Accordingly, --with[out]-extension options is removed, too. Revision Changes Path 1.10 +6 -3 KoreanCodecs/test/CodecTestBase.py Index: CodecTestBase.py =================================================================== RCS file: /cvsroot/koco/KoreanCodecs/test/CodecTestBase.py,v retrieving revision 1.9 retrieving revision 1.10 diff -u -r1.9 -r1.10 --- CodecTestBase.py 10 Jan 2003 06:08:21 -0000 1.9 +++ CodecTestBase.py 12 Jan 2003 22:54:13 -0000 1.10 @@ -16,7 +16,7 @@ # along with KoreanCodecs; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # -# $Id: CodecTestBase.py,v 1.9 2003/01/10 06:08:21 perky Exp $ +# $Id: CodecTestBase.py,v 1.10 2003/01/12 22:54:13 perky Exp $ # import StringIO @@ -46,8 +46,11 @@ def setUp(self): if not self.textfile_chunk: - self.textfile_chunk = ('texts/' + self.encoding, - 'texts/%s.utf-8' % self.encoding) or self.textfile_stream + self.textfile_chunk = ('texts/' + + self.encoding.replace('korean.', ''), + 'texts/%s.utf-8' % + self.encoding.replace('korean.', '') + ) or self.textfile_stream if not self.textfile_stream: self.textfile_stream = self.textfile_chunk # checked above. :) 1.8 +5 -5 KoreanCodecs/test/test_all.py Index: test_all.py =================================================================== RCS file: /cvsroot/koco/KoreanCodecs/test/test_all.py,v retrieving revision 1.7 retrieving revision 1.8 diff -u -r1.7 -r1.8 --- test_all.py 10 Jan 2003 03:15:25 -0000 1.7 +++ test_all.py 12 Jan 2003 22:54:13 -0000 1.8 @@ -16,20 +16,20 @@ # along with KoreanCodecs; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # -# $Id: test_all.py,v 1.7 2003/01/10 03:15:25 perky Exp $ +# $Id: test_all.py,v 1.8 2003/01/12 22:54:13 perky Exp $ # import CodecTestBase -from test_cp949 import TestCP949_CExtension, TestCP949_PurePython -from test_euc_kr import TestEUCKR_CExtension, TestEUCKR_PurePython -from test_mackorean import TestMacKorean_PurePython +from test_cp949 import TestCP949 +from test_euc_kr import TestEUCKR +from test_mackorean import TestMacKorean from test_iso_2022_kr import TestISO_2022_KR from test_johab import TestJOHAB from test_qwerty2bul import TestQWERTY2BUL from test_unijohab import TestUNIJOHAB -from test_hangul import TestHangul_CExtension, TestHangul_PurePython +from test_hangul import TestHangul if __name__ == '__main__': CodecTestBase.main() 1.12 +40 -47 KoreanCodecs/test/test_cp949.py Index: test_cp949.py =================================================================== RCS file: /cvsroot/koco/KoreanCodecs/test/test_cp949.py,v retrieving revision 1.11 retrieving revision 1.12 diff -u -r1.11 -r1.12 --- test_cp949.py 11 Jan 2003 15:01:56 -0000 1.11 +++ test_cp949.py 12 Jan 2003 22:54:13 -0000 1.12 @@ -16,7 +16,7 @@ # along with KoreanCodecs; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # -# $Id: test_cp949.py,v 1.11 2003/01/11 15:01:56 perky Exp $ +# $Id: test_cp949.py,v 1.12 2003/01/12 22:54:13 perky Exp $ # import CodecTestBase @@ -24,53 +24,46 @@ def unichrs(s): return u''.join(map(unichr, map(eval, s.split('+')))) -class Shield: - class TestCP949Base(CodecTestBase.TestStreamReader, CodecTestBase.CodecTestBase): - encoding = 'cp949' - textfile_chunk = ('texts/cp949', 'texts/cp949.utf-8') - errortests = ( - # invalid bytes - ("abc\x80\x80\xc1\xc4", "strict", None), - ("abc\xc8", "strict", None), - ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\uc894"), - ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\uc894\ufffd"), - ("abc\x80\x80\xc1\xc4", "ignore", u"abc\uc894"), - ) - - def test_mapping(self): - import sys, os - - if not os.access('CP949.TXT', os.R_OK): - sys.stdout.write('skipped -- CP949.TXT not found, download from' - ' http://www.unicode.org/Public/MAPPINGS' - '/VENDORS/MICSFT/WINDOWS/CP949.TXT ') - sys.stdout.flush() - return - - for line in open('CP949.TXT'): - if not line: - break - data = line.split('#')[0].strip().split() - if len(data) != 2: - continue - - cp949val = eval(data[0]) - if cp949val <= 0x7F: - cp949ch = chr(cp949val & 0xff) - elif cp949val >= 0x100: - cp949ch = chr(cp949val >> 8) + chr(cp949val & 0xff) - else: - continue - unich = unichrs(data[1]) +class TestCP949(CodecTestBase.TestStreamReader, CodecTestBase.CodecTestBase): + encoding = 'korean.cp949' + textfile_chunk = ('texts/cp949', 'texts/cp949.utf-8') + errortests = ( + # invalid bytes + ("abc\x80\x80\xc1\xc4", "strict", None), + ("abc\xc8", "strict", None), + ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\uc894"), + ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\uc894\ufffd"), + ("abc\x80\x80\xc1\xc4", "ignore", u"abc\uc894"), + ) + + def test_mapping(self): + import sys, os + + if not os.access('CP949.TXT', os.R_OK): + sys.stdout.write('skipped -- CP949.TXT not found, download from' + ' http://www.unicode.org/Public/MAPPINGS' + '/VENDORS/MICSFT/WINDOWS/CP949.TXT ') + sys.stdout.flush() + return + + for line in open('CP949.TXT'): + if not line: + break + data = line.split('#')[0].strip().split() + if len(data) != 2: + continue + + cp949val = eval(data[0]) + if cp949val <= 0x7F: + cp949ch = chr(cp949val & 0xff) + elif cp949val >= 0x100: + cp949ch = chr(cp949val >> 8) + chr(cp949val & 0xff) + else: + continue + unich = unichrs(data[1]) - self.assertEqual(unich.encode(self.encoding), cp949ch) - self.assertEqual(unicode(cp949ch, self.encoding), unich) - -class TestCP949_CExtension(Shield.TestCP949Base): - encoding = 'korean.c.cp949' - -class TestCP949_PurePython(Shield.TestCP949Base): - encoding = 'korean.python.cp949' + self.assertEqual(unich.encode(self.encoding), cp949ch) + self.assertEqual(unicode(cp949ch, self.encoding), unich) if __name__ == '__main__': CodecTestBase.main() 1.9 +18 -25 KoreanCodecs/test/test_euc_kr.py Index: test_euc_kr.py =================================================================== RCS file: /cvsroot/koco/KoreanCodecs/test/test_euc_kr.py,v retrieving revision 1.8 retrieving revision 1.9 diff -u -r1.8 -r1.9 --- test_euc_kr.py 9 Jan 2003 23:23:31 -0000 1.8 +++ test_euc_kr.py 12 Jan 2003 22:54:13 -0000 1.9 @@ -16,37 +16,30 @@ # along with KoreanCodecs; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # -# $Id: test_euc_kr.py,v 1.8 2003/01/09 23:23:31 perky Exp $ +# $Id: test_euc_kr.py,v 1.9 2003/01/12 22:54:13 perky Exp $ # import CodecTestBase -class Shield: - class TestEUCKR_Base(CodecTestBase.TestStreamReader, CodecTestBase.CodecTestBase): - encoding = 'euc-kr' - textfile_chunk = ('texts/euc-kr', 'texts/euc-kr.utf-8') - errortests = ( - # invalid bytes - ("abc\x80\x80\xc1\xc4", "strict", None), - ("abc\xc8", "strict", None), - ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\uc894"), - ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\uc894\ufffd"), - ("abc\x80\x80\xc1\xc4", "ignore", u"abc\uc894"), - ("\xc1\x64", "strict", None), # cp949 code H-AE-H - ) +class TestEUCKR(CodecTestBase.TestStreamReader, CodecTestBase.CodecTestBase): + encoding = 'korean.euc-kr' + textfile_chunk = ('texts/euc-kr', 'texts/euc-kr.utf-8') + errortests = ( + # invalid bytes + ("abc\x80\x80\xc1\xc4", "strict", None), + ("abc\xc8", "strict", None), + ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\uc894"), + ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\uc894\ufffd"), + ("abc\x80\x80\xc1\xc4", "ignore", u"abc\uc894"), + ("\xc1\x64", "strict", None), # cp949 code H-AE-H + ) - def test_ksx1001_1998(self): - self.assertEqual(unicode('\xa2\xe6', self.encoding), u'\u20ac') - self.assertEqual(unicode('\xa2\xe7', self.encoding), u'\u00ae') - self.assertEqual(u'\u20ac'.encode(self.encoding), '\xa2\xe6') - self.assertEqual(u'\u00ae'.encode(self.encoding), '\xa2\xe7') + def test_ksx1001_1998(self): + self.assertEqual(unicode('\xa2\xe6', self.encoding), u'\u20ac') + self.assertEqual(unicode('\xa2\xe7', self.encoding), u'\u00ae') + self.assertEqual(u'\u20ac'.encode(self.encoding), '\xa2\xe6') + self.assertEqual(u'\u00ae'.encode(self.encoding), '\xa2\xe7') - -class TestEUCKR_CExtension(Shield.TestEUCKR_Base): - encoding = 'korean.c.euc-kr' - -class TestEUCKR_PurePython(Shield.TestEUCKR_Base): - encoding = 'korean.python.euc-kr' if __name__ == '__main__': CodecTestBase.main() 1.11 +38 -44 KoreanCodecs/test/test_hangul.py Index: test_hangul.py =================================================================== RCS file: /cvsroot/koco/KoreanCodecs/test/test_hangul.py,v retrieving revision 1.10 retrieving revision 1.11 diff -u -r1.10 -r1.11 --- test_hangul.py 9 Jan 2003 21:31:44 -0000 1.10 +++ test_hangul.py 12 Jan 2003 22:54:13 -0000 1.11 @@ -16,94 +16,88 @@ # along with KoreanCodecs; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # -# $Id: test_hangul.py,v 1.10 2003/01/09 21:31:44 perky Exp $ +# $Id: test_hangul.py,v 1.11 2003/01/12 22:54:13 perky Exp $ # import unittest +from korean import hangul -class Shield: - class TestHangul(unittest.TestCase): +class TestHangul(unittest.TestCase): def test_joinsplit(self): - self.assertEqual(self.h.join([self.h.J, self.h.WA, self.h.L]), u'\uc894') - self.assertEqual(self.h.join([self.h.JJ, self.h.Null, self.h.Null]), u'\u3149') - self.assertEqual(self.h.join((self.h.Null, self.h.YI, self.h.Null)), u'\u3162') - - self.assertEqual(self.h.split(u'\uc894'), (self.h.J, self.h.WA, self.h.L)) - self.assertEqual(self.h.split(u'\u3149'), (self.h.JJ, self.h.Null, self.h.Null)) - self.assertEqual(self.h.split(u'\u3162'), (self.h.Null, self.h.YI, self.h.Null)) + self.assertEqual(hangul.join([hangul.J, hangul.WA, hangul.L]), u'\uc894') + self.assertEqual(hangul.join([hangul.JJ, hangul.Null, hangul.Null]), u'\u3149') + self.assertEqual(hangul.join((hangul.Null, hangul.YI, hangul.Null)), u'\u3162') + + self.assertEqual(hangul.split(u'\uc894'), (hangul.J, hangul.WA, hangul.L)) + self.assertEqual(hangul.split(u'\u3149'), (hangul.JJ, hangul.Null, hangul.Null)) + self.assertEqual(hangul.split(u'\u3162'), (hangul.Null, hangul.YI, hangul.Null)) def test_basicspec(self): - self.assertEqual(self.h.isJaeum(self.h.J), 1) - self.assertEqual(self.h.isJaeum(self.h.E), 0) - self.assertEqual(self.h.isMoeum(self.h.L), 0) - self.assertEqual(self.h.isMoeum(self.h.O), 1) - self.assertEqual(self.h.ishangul(u'\uc870'), 1) - self.assertEqual(self.h.ishangul(u'\u382c'), 0) + self.assertEqual(hangul.isJaeum(hangul.J), 1) + self.assertEqual(hangul.isJaeum(hangul.E), 0) + self.assertEqual(hangul.isMoeum(hangul.L), 0) + self.assertEqual(hangul.isMoeum(hangul.O), 1) + self.assertEqual(hangul.ishangul(u'\uc870'), 1) + self.assertEqual(hangul.ishangul(u'\u382c'), 0) def test_testlong(self): - self.assertEqual(self.h.isJaeum(u'\u3131\u3134\u3137\u3139'), 1) - self.assertEqual(self.h.isJaeum(u'\u3131\u314f\u3134\u314f'), 0) - self.assertEqual(self.h.isJaeum(u''), 0) - - self.assertEqual(self.h.isMoeum(u'\u314f\u3151\u3153\u3155'), 1) - self.assertEqual(self.h.isMoeum(u'\u3131\u314f\u3134\u314f'), 0) - self.assertEqual(self.h.isMoeum(u''), 0) - - self.assertEqual(self.h.ishangul(u'\ud2f0\ud2f0\ub9c8\uc18c\uc774'), 1) - self.assertEqual(self.h.ishangul(u'\ud2f0\ud2f0\ub9c8 \uc18c\uc774'), 0) - self.assertEqual(self.h.ishangul(u''), 0) + self.assertEqual(hangul.isJaeum(u'\u3131\u3134\u3137\u3139'), 1) + self.assertEqual(hangul.isJaeum(u'\u3131\u314f\u3134\u314f'), 0) + self.assertEqual(hangul.isJaeum(u''), 0) + + self.assertEqual(hangul.isMoeum(u'\u314f\u3151\u3153\u3155'), 1) + self.assertEqual(hangul.isMoeum(u'\u3131\u314f\u3134\u314f'), 0) + self.assertEqual(hangul.isMoeum(u''), 0) + + self.assertEqual(hangul.ishangul(u'\ud2f0\ud2f0\ub9c8\uc18c\uc774'), 1) + self.assertEqual(hangul.ishangul(u'\ud2f0\ud2f0\ub9c8 \uc18c\uc774'), 0) + self.assertEqual(hangul.ishangul(u''), 0) def test_format_altsuffix(self): fmt = u'%s\ub294 %s\ub97c %s\ud55c\ub2e4.' obj1, obj2 = u'\ud61c\uc2dd', u'\uc544\ub77c' - self.assertEqual(self.h.format(fmt, obj1, obj2, u'\u2661'), + self.assertEqual(hangul.format(fmt, obj1, obj2, u'\u2661'), u'\ud61c\uc2dd\uc740 \uc544\ub77c\ub97c \u2661\ud55c\ub2e4.') - self.assertEqual(self.h.format(fmt, obj2, obj1, u'\uc2eb\uc5b4'), + self.assertEqual(hangul.format(fmt, obj2, obj1, u'\uc2eb\uc5b4'), u'\uc544\ub77c\ub294 \ud61c\uc2dd\uc744 \uc2eb\uc5b4\ud55c\ub2e4.') fmt = u'\ud0dc\ucd08\uc5d0 %s\uc640 %s\uac00 \uc788\uc5c8\ub2e4.' - self.assertEqual(self.h.format(fmt, obj1, obj2), + self.assertEqual(hangul.format(fmt, obj1, obj2), u'\ud0dc\ucd08\uc5d0 \ud61c\uc2dd\uacfc \uc544\ub77c\uac00' u' \uc788\uc5c8\ub2e4.') - self.assertEqual(self.h.format(fmt, obj2, obj1), + self.assertEqual(hangul.format(fmt, obj2, obj1), u'\ud0dc\ucd08\uc5d0 \uc544\ub77c\uc640 \ud61c\uc2dd\uc774' u' \uc788\uc5c8\ub2e4.') obj1, obj2 = u'Julian', u'Julie' - self.assertEqual(self.h.format(fmt, obj1, obj2), + self.assertEqual(hangul.format(fmt, obj1, obj2), u'\ud0dc\ucd08\uc5d0 Julian\uacfc Julie\uac00 \uc788\uc5c8\ub2e4.') - self.assertEqual(self.h.format(fmt, obj2, obj1), + self.assertEqual(hangul.format(fmt, obj2, obj1), u'\ud0dc\ucd08\uc5d0 Julie\uc640 Julian\uc774 \uc788\uc5c8\ub2e4.') def test_format_idasuffix(self): fmt = u'%s(\uc785)\ub2c8\ub2e4, %s(\uc778)\ub370, %s(\uc774)\ub2e4' - self.assertEqual(self.h.format(fmt, *(u'\uc18c\uc774',)*3), + self.assertEqual(hangul.format(fmt, *(u'\uc18c\uc774',)*3), u'\uc18c\uc785\ub2c8\ub2e4, \uc18c\uc778\ub370, \uc18c\uc774\ub2e4') - self.assertEqual(self.h.format(fmt, *(u'\ub2e4\ub155',)*3), + self.assertEqual(hangul.format(fmt, *(u'\ub2e4\ub155',)*3), u'\ub2e4\ub155\uc785\ub2c8\ub2e4, \ub2e4\ub155\uc778\ub370,' u' \ub2e4\ub155\uc774\ub2e4') def test_format_argtypes(self): fmt = u'%(int)d(\uc785)\ub2c8\ub2e4. %(str)s\uc740 %(str)s\uc5d0' \ u'%(float).2f\uc640' - self.assertEqual(self.h.format(fmt, int=1, str=u'hmm', float=3.14), + self.assertEqual(hangul.format(fmt, int=1, str=u'hmm', float=3.14), u'1\uc785\ub2c8\ub2e4. hmm\uc740 hmm\uc5d03.14\uc640') def test_conjoin(self): - self.assertEqual(self.h.conjoin(u'\u1112\u1161\u11ab\u1100\u1173\u11af\u110b\u1175' + self.assertEqual(hangul.conjoin(u'\u1112\u1161\u11ab\u1100\u1173\u11af\u110b\u1175' u' \u110c\u1169\u11c2\u110b\u1161\u110b\u116d.'), u'\ud55c\uae00\uc774 \uc88b\uc544\uc694.') def test_disjoint(self): - self.assertEqual(self.h.disjoint(u'\ub9c8\ub140\ubc30\ub2ec\ubd80 \ud0a4\ud0a4'), + self.assertEqual(hangul.disjoint(u'\ub9c8\ub140\ubc30\ub2ec\ubd80 \ud0a4\ud0a4'), u'\u1106\u1161\u1102\u1167\u1107\u1162\u1103\u1161\u11af\u1107\u116e' u' \u110f\u1175\u110f\u1175') - -class TestHangul_CExtension(Shield.TestHangul): - from korean.c import hangul as h - -class TestHangul_PurePython(Shield.TestHangul): - from korean.python import hangul as h if __name__ == '__main__': import sys 1.7 +6 -4 KoreanCodecs/test/test_iso_2022_kr.py Index: test_iso_2022_kr.py =================================================================== RCS file: /cvsroot/koco/KoreanCodecs/test/test_iso_2022_kr.py,v retrieving revision 1.6 retrieving revision 1.7 diff -u -r1.6 -r1.7 --- test_iso_2022_kr.py 10 Jan 2003 06:08:22 -0000 1.6 +++ test_iso_2022_kr.py 12 Jan 2003 22:54:13 -0000 1.7 @@ -16,15 +16,17 @@ # along with KoreanCodecs; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # -# $Id: test_iso_2022_kr.py,v 1.6 2003/01/10 06:08:22 perky Exp $ +# $Id: test_iso_2022_kr.py,v 1.7 2003/01/12 22:54:13 perky Exp $ # import CodecTestBase class TestISO_2022_KR(CodecTestBase.CodecTestBase): - encoding = 'iso-2022-kr' - textfile_chunk = ('texts/%s.roundrobin' % encoding, 'texts/%s.utf-8' % encoding) - textfile_stream = ('texts/%s.stream' % encoding, 'texts/%s.utf-8' % encoding) + encoding = 'korean.iso-2022-kr' + textfile_chunk = ('texts/iso-2022-kr.roundrobin', + 'texts/iso-2022-kr.utf-8') + textfile_stream = ('texts/iso-2022-kr.stream', + 'texts/iso-2022-kr.utf-8') roundtriptest = 0 errortests = ( 1.4 +2 -2 KoreanCodecs/test/test_johab.py Index: test_johab.py =================================================================== RCS file: /cvsroot/koco/KoreanCodecs/test/test_johab.py,v retrieving revision 1.3 retrieving revision 1.4 diff -u -r1.3 -r1.4 --- test_johab.py 9 Jan 2003 21:31:44 -0000 1.3 +++ test_johab.py 12 Jan 2003 22:54:13 -0000 1.4 @@ -16,13 +16,13 @@ # along with KoreanCodecs; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # -# $Id: test_johab.py,v 1.3 2003/01/09 21:31:44 perky Exp $ +# $Id: test_johab.py,v 1.4 2003/01/12 22:54:13 perky Exp $ # import CodecTestBase class TestJOHAB(CodecTestBase.TestStreamReader, CodecTestBase.CodecTestBase): - encoding = 'johab' + encoding = 'korean.johab' errortests = ( # invalid bytes ("abc\x80\x80\xc1\xc4", "strict", None), 1.5 +43 -49 KoreanCodecs/test/test_mackorean.py Index: test_mackorean.py =================================================================== RCS file: /cvsroot/koco/KoreanCodecs/test/test_mackorean.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -r1.4 -r1.5 --- test_mackorean.py 11 Jan 2003 15:01:56 -0000 1.4 +++ test_mackorean.py 12 Jan 2003 22:54:13 -0000 1.5 @@ -16,7 +16,7 @@ # along with KoreanCodecs; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # -# $Id: test_mackorean.py,v 1.4 2003/01/11 15:01:56 perky Exp $ +# $Id: test_mackorean.py,v 1.5 2003/01/12 22:54:13 perky Exp $ # import CodecTestBase @@ -24,56 +24,50 @@ def unichrs(s): return u''.join(map(unichr, map(eval, s.split('+')))) -class Shield: - class TestMacKorean_Base(CodecTestBase.TestStreamReader, CodecTestBase.CodecTestBase): - encoding = 'mackorean' - textfile_chunk = ('texts/mackorean', 'texts/mackorean.utf-8') - errortests = ( - # invalid bytes - # \x90 is expected as one byte character in MacKorean. - ("abc\x90\x90\xc1\xc4", "strict", None), - ("abc\xc8", "strict", None), - ("abc\x90\x90\xc1\xc4", "replace", u"abc\ufffd\ufffd\uc894"), - ("abc\x90\x90\xc1\xc4\xc8", "replace", u"abc\ufffd\ufffd\uc894\ufffd"), - ("abc\x90\x90\xc1\xc4", "ignore", u"abc\uc894"), - ("\xc1\x64", "strict", None), # cp949 code H-AE-H - ) - - def test_mapping(self): - import os, sys - - if not os.access('KOREAN.TXT', os.R_OK): - sys.stdout.write('skipped -- KOREAN.TXT not found, download from' - ' http://www.unicode.org/Public/MAPPINGS' - '/VENDORS/APPLE/KOREAN.TXT ') - sys.stdout.flush() - return - - for line in open('KOREAN.TXT'): - if not line: - break - data = line.split('#')[0].strip().split() - if len(data) != 2: - continue - - macval = eval(data[0]) - - if macval <= 0x7F: - macch = chr(macval & 0xff) - elif macval >= 0x100: - macch = chr(macval >> 8) + chr(macval & 0xff) - else: - continue - unich = unichrs(data[1]) +class TestMacKorean(CodecTestBase.TestStreamReader, CodecTestBase.CodecTestBase): + encoding = 'korean.mackorean' + textfile_chunk = ('texts/mackorean', 'texts/mackorean.utf-8') + errortests = ( + # invalid bytes + # \x90 is expected as one byte character in MacKorean. + ("abc\x90\x90\xc1\xc4", "strict", None), + ("abc\xc8", "strict", None), + ("abc\x90\x90\xc1\xc4", "replace", u"abc\ufffd\ufffd\uc894"), + ("abc\x90\x90\xc1\xc4\xc8", "replace", u"abc\ufffd\ufffd\uc894\ufffd"), + ("abc\x90\x90\xc1\xc4", "ignore", u"abc\uc894"), + ("\xc1\x64", "strict", None), # cp949 code H-AE-H + ) + + def test_mapping(self): + import os, sys + + if not os.access('KOREAN.TXT', os.R_OK): + sys.stdout.write('skipped -- KOREAN.TXT not found, download from' + ' http://www.unicode.org/Public/MAPPINGS' + '/VENDORS/APPLE/KOREAN.TXT ') + sys.stdout.flush() + return + + for line in open('KOREAN.TXT'): + if not line: + break + data = line.split('#')[0].strip().split() + if len(data) != 2: + continue + + macval = eval(data[0]) + + if macval <= 0x7F: + macch = chr(macval & 0xff) + elif macval >= 0x100: + macch = chr(macval >> 8) + chr(macval & 0xff) + else: + continue + unich = unichrs(data[1]) - self.assertEqual(unich.encode(self.encoding), macch) - self.assertEqual(unicode(macch, self.encoding), unich) + self.assertEqual(unich.encode(self.encoding), macch) + self.assertEqual(unicode(macch, self.encoding), unich) -#class TestMacKorean_CExtension(Shield.TestMacKorean_Base): -# encoding = 'korean.c.mackorean' - -class TestMacKorean_PurePython(Shield.TestMacKorean_Base): - encoding = 'korean.python.mackorean' if __name__ == '__main__': CodecTestBase.main() 1.5 +2 -2 KoreanCodecs/test/test_qwerty2bul.py Index: test_qwerty2bul.py =================================================================== RCS file: /cvsroot/koco/KoreanCodecs/test/test_qwerty2bul.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -r1.4 -r1.5 --- test_qwerty2bul.py 9 Jan 2003 21:31:44 -0000 1.4 +++ test_qwerty2bul.py 12 Jan 2003 22:54:13 -0000 1.5 @@ -16,13 +16,13 @@ # along with KoreanCodecs; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # -# $Id: test_qwerty2bul.py,v 1.4 2003/01/09 21:31:44 perky Exp $ +# $Id: test_qwerty2bul.py,v 1.5 2003/01/12 22:54:13 perky Exp $ # import CodecTestBase class TestQWERTY2BUL(CodecTestBase.CodecTestBase): - encoding = 'qwerty2bul' + encoding = 'korean.qwerty2bul' errortests = ( # invalid bytes ("123\x80\x80whkf", "strict", None), 1.3 +2 -2 KoreanCodecs/test/test_unijohab.py Index: test_unijohab.py =================================================================== RCS file: /cvsroot/koco/KoreanCodecs/test/test_unijohab.py,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- test_unijohab.py 9 Jan 2003 21:31:44 -0000 1.2 +++ test_unijohab.py 12 Jan 2003 22:54:13 -0000 1.3 @@ -16,13 +16,13 @@ # along with KoreanCodecs; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # -# $Id: test_unijohab.py,v 1.2 2003/01/09 21:31:44 perky Exp $ +# $Id: test_unijohab.py,v 1.3 2003/01/12 22:54:13 perky Exp $ # import CodecTestBase class TestUNIJOHAB(CodecTestBase.CodecTestBase): - encoding = 'unijohab' + encoding = 'korean.unijohab' errortests = () # error handling is relying UTF-8 codec. if __name__ == '__main__': |