From: <mi...@us...> - 2012-06-13 14:14:23
|
Revision: 7440 http://docutils.svn.sourceforge.net/docutils/?rev=7440&view=rev Author: milde Date: 2012-06-13 14:14:12 +0000 (Wed, 13 Jun 2012) Log Message: ----------- Fixup: more save implementation of binary data output under Python 3. Prevent test error under Python 3. Add tests for FileOutput. Document. Modified Paths: -------------- trunk/docutils/HISTORY.txt trunk/docutils/docutils/io.py trunk/docutils/test/DocutilsTestSupport.py trunk/docutils/test/test_error_reporting.py trunk/docutils/test/test_io.py Modified: trunk/docutils/HISTORY.txt =================================================================== --- trunk/docutils/HISTORY.txt 2012-06-11 21:02:39 UTC (rev 7439) +++ trunk/docutils/HISTORY.txt 2012-06-13 14:14:12 UTC (rev 7440) @@ -28,6 +28,11 @@ - Use converted sources from the ``build/`` directory for tests under Python 3. +* docutils/io.py + + - Fix writing binary data to sys.stdout under Python 3 (allows + ``rst2odt.py`` to be used with output redirection). + * docutils/parsers/rst/directives/misc.py - Fix [ 3525847 ]. Catch and report UnicodeEncodeError with Modified: trunk/docutils/docutils/io.py =================================================================== --- trunk/docutils/docutils/io.py 2012-06-11 21:02:39 UTC (rev 7439) +++ trunk/docutils/docutils/io.py 2012-06-13 14:14:12 UTC (rev 7440) @@ -21,7 +21,22 @@ class InputError(IOError): pass class OutputError(IOError): pass +def check_encoding(stream, encoding): + """Test, whether the encoding of `stream` matches `encoding`. + Returns + + :None: if `encoding` or `stream.encoding` are not a valid encoding + argument (e.g. ``None``) or `stream.encoding is missing. + :True: if the encoding argument resolves to the same value as `encoding`, + :False: if the encodings differ. + """ + try: + return codecs.lookup(stream.encoding) == codecs.lookup(encoding) + except (LookupError, AttributeError, TypeError): + return None + + class Input(TransformSpec): """ @@ -231,10 +246,7 @@ else: self.source = sys.stdin elif (sys.version_info >= (3,0) and - self.encoding and hasattr(self.source, 'encoding') and - self.encoding != self.source.encoding and - codecs.lookup(self.encoding) != - codecs.lookup(self.source.encoding)): + check_encoding(self.source, self.encoding) is False): # TODO: re-open, warn or raise error? raise UnicodeError('Encoding clash: encoding given is "%s" ' 'but source is opened with encoding "%s".' % @@ -327,10 +339,7 @@ if destination_path: self.opened = False else: - if sys.version_info >= (3,0) and 'b' in self.mode: - self.destination = sys.stdout.buffer - else: - self.destination = sys.stdout + self.destination = sys.stdout elif (# destination is file-type object -> check mode: mode and hasattr(self.destination, 'mode') and mode != self.destination.mode): @@ -342,17 +351,22 @@ self.destination_path = self.destination.name except AttributeError: pass - if (encoding and hasattr(self.destination, 'encoding') - and codecs.lookup(self.encoding) != - codecs.lookup(self.destination.encoding)): - if self.destination is sys.stdout and sys.version_info >= (3,0): - self.destination = sys.stdout.buffer - else: - raise UnicodeError('Encoding of %s (%s) ' - 'differs from specified encoding (%s)' % - (self.destination_path or 'destination', - self.destination.encoding, encoding)) + # Special cases under Python 3: different encoding or binary output + if sys.version_info >= (3,0): + if ('b' in self.mode + and self.destination in (sys.stdout, sys.stderr) + ): + self.destination = self.destination.buffer + if check_encoding(self.destination, self.encoding) is False: + if self.destination in (sys.stdout, sys.stderr): + self.destination = self.destination.buffer + else: # TODO: try the `write to .buffer` scheme instead? + raise ValueError('Encoding of %s (%s) differs \n' + ' from specified encoding (%s)' % + (self.destination_path or 'destination', + destination.encoding, encoding)) + def open(self): # Specify encoding in Python 3. if sys.version_info >= (3,0): @@ -375,25 +389,23 @@ def write(self, data): """Encode `data`, write it to a single file, and return it. - With Python 3 or binary output mode, `data` is returned unchanged. + With Python 3 or binary output mode, `data` is returned unchanged, + except when specified encoding and output encoding differ. """ - if sys.version_info < (3,0) and 'b' not in self.mode: - data = self.encode(data) if not self.opened: self.open() try: # In Python < 2.5, try...except has to be nested in try...finally. try: - if (sys.version_info >= (3,0) - and self.destination is sys.stdout.buffer - and 'b' not in self.mode): - # encode now, as sys.stdout.encoding != self.encoding - bdata = self.encode(data) - if os.linesep != '\n': - bdata = bdata.replace('\n', os.linesep) - self.destination.buffer.write(bdata) - else: - self.destination.write(data) - except (UnicodeError, LookupError), err: # can only happen in py3k + if 'b' not in self.mode and (sys.version_info < (3,0) or + check_encoding(self.destination, self.encoding) is False): + data = self.encode(data) + if sys.version_info >= (3,0) and os.linesep != '\n': + # writing as binary data -> fix endings + data = data.replace('\n', os.linesep) + + self.destination.write(data) + + except (UnicodeError, LookupError), err: raise UnicodeError( 'Unable to encode output data. output-encoding is: ' '%s.\n(%s)' % (self.encoding, ErrorString(err))) Modified: trunk/docutils/test/DocutilsTestSupport.py =================================================================== --- trunk/docutils/test/DocutilsTestSupport.py 2012-06-11 21:02:39 UTC (rev 7439) +++ trunk/docutils/test/DocutilsTestSupport.py 2012-06-13 14:14:12 UTC (rev 7440) @@ -881,7 +881,7 @@ return_tuple = [] for i in args: r = repr(i) - if ( (isinstance(i, str) or isinstance(i, unicode)) + if ( (isinstance(i, bytes) or isinstance(i, unicode)) and '\n' in i): stripped = '' if isinstance(i, unicode) and r.startswith('u'): Modified: trunk/docutils/test/test_error_reporting.py =================================================================== --- trunk/docutils/test/test_error_reporting.py 2012-06-11 21:02:39 UTC (rev 7439) +++ trunk/docutils/test/test_error_reporting.py 2012-06-13 14:14:12 UTC (rev 7440) @@ -158,14 +158,14 @@ # ----------------- # Stub: Buffer with 'strict' auto-conversion of input to byte string: -class BBuf(BytesIO, object): +class BBuf(BytesIO, object): # super class object required by Python <= 2.5 def write(self, data): if isinstance(data, unicode): data.encode('ascii', 'strict') super(BBuf, self).write(data) # Stub: Buffer expecting unicode string: -class UBuf(StringIO, object): +class UBuf(StringIO, object): # super class object required by Python <= 2.5 def write(self, data): # emulate Python 3 handling of stdout, stderr if isinstance(data, bytes): Modified: trunk/docutils/test/test_io.py =================================================================== --- trunk/docutils/test/test_io.py 2012-06-11 21:02:39 UTC (rev 7439) +++ trunk/docutils/test/test_io.py 2012-06-13 14:14:12 UTC (rev 7440) @@ -13,13 +13,46 @@ from docutils import io from docutils._compat import b, bytes from docutils.error_reporting import locale_encoding +from test_error_reporting import BBuf, UBuf +# python 2.3 +if not hasattr(unittest.TestCase, "assertTrue"): + assertTrue = unittest.TestCase.failUnless + +class mock_stdout(UBuf): + encoding = 'utf8' + + def __init__(self): + self.buffer = BBuf() + UBuf.__init__(self) + +class HelperTests(unittest.TestCase): + + def test_check_encoding_true(self): + """Return `True` if lookup returns the same codec""" + self.assertEqual(io.check_encoding(mock_stdout, 'utf8'), True) + self.assertEqual(io.check_encoding(mock_stdout, 'utf-8'), True) + self.assertEqual(io.check_encoding(mock_stdout, 'UTF-8'), True) + + def test_check_encoding_false(self): + """Return `False` if lookup returns different codecs""" + self.assertEqual(io.check_encoding(mock_stdout, 'ascii'), False) + self.assertEqual(io.check_encoding(mock_stdout, 'latin-1'), False) + + def test_check_encoding_none(self): + """Cases where the comparison fails.""" + # stream.encoding is None: + self.assertEqual(io.check_encoding(io.FileInput(), 'ascii'), None) + # stream.encoding does not exist: + self.assertEqual(io.check_encoding(BBuf, 'ascii'), None) + # encoding is None: + self.assertEqual(io.check_encoding(mock_stdout, None), None) + # encoding is invalid + self.assertEqual(io.check_encoding(mock_stdout, 'UTF-9'), None) + + class InputTests(unittest.TestCase): - # python 2.3 - if not hasattr(unittest.TestCase, "assertTrue"): - assertTrue = unittest.TestCase.failUnless - def test_bom(self): input = io.StringInput(source=b('\xef\xbb\xbf foo \xef\xbb\xbf bar'), encoding='utf8') @@ -75,13 +108,13 @@ # if no encoding is given, try decoding with utf8: input = io.FileInput(source_path='functional/input/cyrillic.txt') data = input.read() - if sys.version_info < (3,0): + if sys.version_info < (3,0): # in Py3k, the locale encoding is used without --input-encoding # skipping the heuristic self.assertEqual(input.successful_encoding, 'utf-8') def test_heuristics_no_utf8(self): - # if no encoding is given and decoding with utf8 fails, + # if no encoding is given and decoding with utf8 fails, # use either the locale encoding (if specified) or latin1: input = io.FileInput(source_path='data/latin1.txt') data = input.read() @@ -91,5 +124,66 @@ self.assertEqual(data, u'Gr\xfc\xdfe\n') +class OutputTests(unittest.TestCase): + + bdata = b('\xfc') + udata = u'\xfc' + + def setUp(self): + self.bdrain = BBuf() + """Buffer accepting binary strings (bytes)""" + self.udrain = UBuf() + """Buffer accepting unicode strings""" + self.mock_stdout = mock_stdout() + """Stub of sys.stdout under Python 3""" + + def test_write_unicode(self): + fo = io.FileOutput(destination=self.udrain, encoding='unicode', + autoclose=False) + fo.write(self.udata) + self.assertEqual(self.udrain.getvalue(), self.udata) + + def test_write_utf8(self): + if sys.version_info >= (3,0): + fo = io.FileOutput(destination=self.udrain, encoding='utf8', + autoclose=False) + fo.write(self.udata) + self.assertEqual(self.udrain.getvalue(), self.udata) + else: + fo = io.FileOutput(destination=self.bdrain, encoding='utf8', + autoclose=False) + fo.write(self.udata) + self.assertEqual(self.bdrain.getvalue(), self.udata.encode('utf8')) + + # With destination in binary mode, data must be binary string + # and is written as-is: + def test_write_bytes(self): + fo = io.FileOutput(destination=self.bdrain, encoding='utf8', + mode='wb', autoclose=False) + fo.write(self.bdata) + self.assertEqual(self.bdrain.getvalue(), self.bdata) + + # Test for Python 3 features: + if sys.version_info >= (3,0): + def test_write_bytes_to_stdout(self): + # binary data is written to destination.buffer, if the + # destination is sys.stdout or sys.stdin + backup = sys.stdout + sys.stdout = self.mock_stdout + fo = io.FileOutput(destination=sys.stdout, mode='wb', + autoclose=False) + fo.write(self.bdata) + self.assertEqual(self.mock_stdout.buffer.getvalue(), + self.bdata) + sys.stdout = backup + + def test_encoding_clash(self): + # Raise error, if given and destination encodings differ + # TODO: try the `write to .buffer` scheme instead? + self.assertRaises(ValueError, + io.FileOutput, destination=self.mock_stdout, + encoding='latin1') + + if __name__ == '__main__': unittest.main() This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |