Thread: [KoCo-CVS] [Commit] cjkcodecs/tests test_multibytecodec_support.py
Brought to you by:
perky
From: Hye-Shik C. <pe...@us...> - 2003-05-26 15:08:50
|
perky 03/05/26 08:08:49 Modified: tests test_multibytecodec_support.py Log: Merge changes from iconv_codec. Revision Changes Path 1.3 +66 -42 cjkcodecs/tests/test_multibytecodec_support.py Index: test_multibytecodec_support.py =================================================================== RCS file: /cvsroot/koco/cjkcodecs/tests/test_multibytecodec_support.py,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- test_multibytecodec_support.py 26 May 2003 11:40:41 -0000 1.2 +++ test_multibytecodec_support.py 26 May 2003 15:08:49 -0000 1.3 @@ -27,7 +27,7 @@ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # -# $Id: test_multibytecodec_support.py,v 1.2 2003/05/26 11:40:41 perky Exp $ +# $Id: test_multibytecodec_support.py,v 1.3 2003/05/26 15:08:49 perky Exp $ # import sys, codecs @@ -68,47 +68,48 @@ else: self.assertRaises(UnicodeError, func, source, scheme) - def test_xmlcharrefreplace(self): - if self.has_iso10646: - return - - s = u"\u0b13\u0b23\u0b60 nd eggs" - self.assertEqual( - self.encode(s, "xmlcharrefreplace")[0], - "ଓଣୠ nd eggs" - ) - - def test_customreplace(self): - if self.has_iso10646: - return - - import htmlentitydefs - - names = {} - for (key, value) in htmlentitydefs.entitydefs.items(): - if len(value)==1: - names[value.decode('latin-1')] = self.decode(key)[0] - else: - names[unichr(int(value[2:-1]))] = self.decode(key)[0] - - def xmlcharnamereplace(exc): - if not isinstance(exc, UnicodeEncodeError): - raise TypeError("don't know how to handle %r" % exc) - l = [] - for c in exc.object[exc.start:exc.end]: - try: - l.append(u"&%s;" % names[c]) - except KeyError: - l.append(u"&#%d;" % ord(c)) - return (u"".join(l), exc.end) - - codecs.register_error( - "test.xmlcharnamereplace", xmlcharnamereplace) - - sin = u"\xab\u211c\xbb = \u2329\u1234\u232a" - sout = "«ℜ» = ⟨ሴ⟩" - self.assertEqual(self.encode(sin, - "test.xmlcharnamereplace")[0], sout) + if sys.hexversion >= 0x02030000: + def test_xmlcharrefreplace(self): + if self.has_iso10646: + return + + s = u"\u0b13\u0b23\u0b60 nd eggs" + self.assertEqual( + self.encode(s, "xmlcharrefreplace")[0], + "ଓଣୠ nd eggs" + ) + + def test_customreplace(self): + if self.has_iso10646: + return + + import htmlentitydefs + + names = {} + for (key, value) in htmlentitydefs.entitydefs.items(): + if len(value)==1: + names[value.decode('latin-1')] = self.decode(key)[0] + else: + names[unichr(int(value[2:-1]))] = self.decode(key)[0] + + def xmlcharnamereplace(exc): + if not isinstance(exc, UnicodeEncodeError): + raise TypeError("don't know how to handle %r" % exc) + l = [] + for c in exc.object[exc.start:exc.end]: + try: + l.append(u"&%s;" % names[c]) + except KeyError: + l.append(u"&#%d;" % ord(c)) + return (u"".join(l), exc.end) + + codecs.register_error( + "test.xmlcharnamereplace", xmlcharnamereplace) + + sin = u"\xab\u211c\xbb = \u2329\u1234\u232a" + sout = "«ℜ» = ⟨ሴ⟩" + self.assertEqual(self.encode(sin, + "test.xmlcharnamereplace")[0], sout) def test_streamreader(self): UTF8Writer = codecs.lookup('utf-8')[3] @@ -128,6 +129,29 @@ ostream.write(data) self.assertEqual(ostream.getvalue(), self.tstring[1]) + + def test_streamwriter(self): + # We can't test with the real utf-8 StreamReader here. + # The standard SR.readline{,s} are mostly broken for multibyte seqs. + #UTF8Reader = codecs.lookup('utf-8')[2] + return + UTF8Reader = iconv_codec.lookup('utf-8')[2] + for name in ["read", "readline", "readlines"]: + for sizehint in [None, -1] + range(1, 33) + \ + [64, 128, 256, 512, 1024]: + istream = UTF8Reader(StringIO(self.tstring[1])) + ostream = self.writer(StringIO()) + func = getattr(istream, name) + while 1: + data = func(sizehint) + if not data: + break + if name == "readlines": + ostream.writelines(data) + else: + ostream.write(data) + + self.assertEqual(ostream.getvalue(), self.tstring[0]) class TestBase_Mapping(unittest.TestCase): pass_enctest = [] |
From: Hye-Shik C. <pe...@us...> - 2003-05-29 09:17:39
|
perky 03/05/29 02:17:38 Modified: tests test_multibytecodec_support.py Log: We have a working 'utf-8' StreamReader now. Revision Changes Path 1.5 +2 -3 cjkcodecs/tests/test_multibytecodec_support.py Index: test_multibytecodec_support.py =================================================================== RCS file: /cvsroot/koco/cjkcodecs/tests/test_multibytecodec_support.py,v retrieving revision 1.4 retrieving revision 1.5 diff -u -r1.4 -r1.5 --- test_multibytecodec_support.py 27 May 2003 05:17:48 -0000 1.4 +++ test_multibytecodec_support.py 29 May 2003 09:17:38 -0000 1.5 @@ -27,7 +27,7 @@ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # -# $Id: test_multibytecodec_support.py,v 1.4 2003/05/27 05:17:48 perky Exp $ +# $Id: test_multibytecodec_support.py,v 1.5 2003/05/29 09:17:38 perky Exp $ # import sys, codecs, os.path @@ -134,8 +134,7 @@ # We can't test with the real utf-8 StreamReader here. # The standard SR.readline{,s} are mostly broken for multibyte seqs. #UTF8Reader = codecs.lookup('utf-8')[2] - return - UTF8Reader = iconv_codec.lookup('utf-8')[2] + UTF8Reader = codecs.lookup('cjkcodecs.utf-8')[2] for name in ["read", "readline", "readlines"]: for sizehint in [None, -1] + range(1, 33) + \ [64, 128, 256, 512, 1024]: |
From: Hye-Shik C. <pe...@us...> - 2003-07-19 10:46:16
|
perky 03/07/19 03:46:09 Modified: tests test_multibytecodec_support.py Log: Remove UTF-16 codec and explain why we still keep utf-7 and utf-8 codecs. Revision Changes Path 1.9 +12 -8 cjkcodecs/tests/test_multibytecodec_support.py Index: test_multibytecodec_support.py =================================================================== RCS file: /cvsroot/koco/cjkcodecs/tests/test_multibytecodec_support.py,v retrieving revision 1.8 retrieving revision 1.9 diff -u -r1.8 -r1.9 --- test_multibytecodec_support.py 12 Jul 2003 15:06:09 -0000 1.8 +++ test_multibytecodec_support.py 19 Jul 2003 10:46:09 -0000 1.9 @@ -27,7 +27,7 @@ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # -# $Id: test_multibytecodec_support.py,v 1.8 2003/07/12 15:06:09 perky Exp $ +# $Id: test_multibytecodec_support.py,v 1.9 2003/07/19 10:46:09 perky Exp $ # import sys, codecs, os.path @@ -116,7 +116,7 @@ "test.xmlcharnamereplace")[0], sout) def test_streamreader(self): - UTF8Writer = codecs.lookup('utf-8')[3] + UTF8Writer = codecs.getwriter('utf-8') for name in ["read", "readline", "readlines"]: for sizehint in [None, -1] + range(1, 33) + \ [64, 128, 256, 512, 1024]: @@ -135,18 +135,22 @@ self.assertEqual(ostream.getvalue(), self.tstring[1]) def test_streamwriter(self): - # We can't test with the real utf-8 StreamReader here. - # The standard SR.readline{,s} are mostly broken for multibyte seqs. - #UTF8Reader = codecs.lookup('utf-8')[2] - UTF8Reader = codecs.lookup('cjkcodecs.utf-8')[2] + # don't use standard utf-8 streamreader here. + # it's broken for sr.readline(smallnumber) + UTF8Reader = codecs.getreader('cjkcodecs.utf-8') for name in ["read", "readline", "readlines"]: - for sizehint in [None, -1] + range(1, 33) + \ + for sizehint in [None] + range(1, 33) + \ [64, 128, 256, 512, 1024]: istream = UTF8Reader(StringIO(self.tstring[1])) ostream = self.writer(StringIO()) func = getattr(istream, name) while 1: - data = func(sizehint) + print name, sizehint + if sizehint is not None: + data = func(sizehint) + else: + data = func() + if not data: break if name == "readlines": |
From: Hye-Shik C. <pe...@us...> - 2003-07-19 10:49:03
|
perky 03/07/19 03:49:00 Modified: tests test_multibytecodec_support.py Log: UTF-8 codec isn't removed. Revision Changes Path 1.10 +1 -2 cjkcodecs/tests/test_multibytecodec_support.py Index: test_multibytecodec_support.py =================================================================== RCS file: /cvsroot/koco/cjkcodecs/tests/test_multibytecodec_support.py,v retrieving revision 1.9 retrieving revision 1.10 diff -u -r1.9 -r1.10 --- test_multibytecodec_support.py 19 Jul 2003 10:46:09 -0000 1.9 +++ test_multibytecodec_support.py 19 Jul 2003 10:49:00 -0000 1.10 @@ -27,7 +27,7 @@ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # -# $Id: test_multibytecodec_support.py,v 1.9 2003/07/19 10:46:09 perky Exp $ +# $Id: test_multibytecodec_support.py,v 1.10 2003/07/19 10:49:00 perky Exp $ # import sys, codecs, os.path @@ -145,7 +145,6 @@ ostream = self.writer(StringIO()) func = getattr(istream, name) while 1: - print name, sizehint if sizehint is not None: data = func(sizehint) else: |