|
From: <aa-...@us...> - 2022-11-02 23:28:52
|
Revision: 9202
http://sourceforge.net/p/docutils/code/9202
Author: aa-turner
Date: 2022-11-02 23:28:49 +0000 (Wed, 02 Nov 2022)
Log Message:
-----------
Partially revert r9167
``docutils.core.publish_string`` uses Python 2 notion of a
bytestring, such that in the general case it returns Python 3's
``bytes`` type. Revision 9167 attempted to address this distinction
by introducing ``publish_bytes`` and changing ``publish_string`` to
always return unicode text data as Python's ``str`` type. This is a
backwards compatibility break, so in this commit we restore the
previous behaviour, whilst simultaneously deprecating support for
returning binary data from the ``docutils.core.publish_string``
function for at least two releases of Docutils.
As part of this, we also deprecate returning binary data from the
``docutils.io.StringOutput.encode`` method, docutils.io.BytesOutput``
should be used in its stead.
Finally, we update tests for the reversion to the previous behaviour.
Revision Links:
--------------
http://sourceforge.net/p/docutils/code/9167
Modified Paths:
--------------
trunk/docutils/docutils/core.py
trunk/docutils/docutils/io.py
trunk/docutils/docutils/writers/_html_base.py
trunk/docutils/docutils/writers/docutils_xml.py
trunk/docutils/docutils/writers/html5_polyglot/__init__.py
trunk/docutils/docutils/writers/latex2e/__init__.py
trunk/docutils/docutils/writers/s5_html/__init__.py
trunk/docutils/test/DocutilsTestSupport.py
trunk/docutils/test/alltests.py
trunk/docutils/test/test_parsers/test_recommonmark/test_misc.py
trunk/docutils/test/test_parsers/test_rst/test_directives/test_code_parsing.py
trunk/docutils/test/test_publisher.py
trunk/docutils/test/test_writers/test_docutils_xml.py
trunk/docutils/test/test_writers/test_html4css1_misc.py
trunk/docutils/test/test_writers/test_html5_polyglot_misc.py
trunk/docutils/test/test_writers/test_latex2e_misc.py
trunk/docutils/test/test_writers/test_null.py
Modified: trunk/docutils/docutils/core.py
===================================================================
--- trunk/docutils/docutils/core.py 2022-11-02 22:39:16 UTC (rev 9201)
+++ trunk/docutils/docutils/core.py 2022-11-02 23:28:49 UTC (rev 9202)
@@ -429,10 +429,22 @@
enable_exit_status=False):
"""
Set up & run a `Publisher` for programmatic use with string I/O. Return
- Unicode string output.
+ the encoded string or Unicode string output.
+ For encoded string output, be sure to set the 'output_encoding' setting to
+ the desired encoding. Set it to 'unicode' for unencoded Unicode string
+ output. Here's one way::
+
+ publish_string(..., settings_overrides={'output_encoding': 'unicode'})
+
+ Similarly for Unicode string input (`source`)::
+
+ publish_string(..., settings_overrides={'input_encoding': 'unicode'})
+
Parameters: see `publish_programmatically`.
"""
+ warnings.warn('The return type of publish_string will change to '
+ '"str" from Docutils 0.21.', FutureWarning, stacklevel=2)
output, pub = publish_programmatically(
source_class=io.StringInput, source=source, source_path=source_path,
destination_class=io.StringOutput,
Modified: trunk/docutils/docutils/io.py
===================================================================
--- trunk/docutils/docutils/io.py 2022-11-02 22:39:16 UTC (rev 9201)
+++ trunk/docutils/docutils/io.py 2022-11-02 23:28:49 UTC (rev 9202)
@@ -558,7 +558,8 @@
class BytesOutput(Output):
"""
- Direct string output.
+ Direct binary output.
+ Provisional.
"""
default_destination_path = '<bytes>'
@@ -601,9 +602,12 @@
return self.destination
def encode(self, data):
- if isinstance(data, bytes):
- return data.decode(self.encoding, self.error_handler)
- return str(data)
+ data = super().encode(data)
+ if not isinstance(data, str):
+ warnings.warn("StringOutput.encode()'s return type will change to "
+ f'``str`` from Docutils 0.21, got type {type(data)}',
+ FutureWarning, stacklevel=2)
+ return data
class NullInput(Input):
Modified: trunk/docutils/docutils/writers/_html_base.py
===================================================================
--- trunk/docutils/docutils/writers/_html_base.py 2022-11-02 22:39:16 UTC (rev 9201)
+++ trunk/docutils/docutils/writers/_html_base.py 2022-11-02 23:28:49 UTC (rev 9202)
@@ -286,7 +286,7 @@
self.html_prolog = []
if settings.xml_declaration:
self.head_prefix.append(self.xml_declaration
- % settings.output_encoding)
+ % _output_encoding(settings))
# self.content_type = ""
# encoding not interpolated:
self.html_prolog.append(self.xml_declaration)
@@ -812,8 +812,8 @@
self.head_prefix_template %
{'lang': self.settings.language_code}])
self.html_prolog.append(self.doctype)
- self.meta.insert(0, self.content_type % self.settings.output_encoding)
- self.head.insert(0, self.content_type % self.settings.output_encoding)
+ self.meta.insert(0, self.content_type % _output_encoding(self.settings))
+ self.head.insert(0, self.content_type % _output_encoding(self.settings))
if 'name="dcterms.' in ''.join(self.meta):
self.head.append('<link rel="schema.dcterms"'
'href="http://purl.org/dc/terms/"/>')
@@ -1779,3 +1779,10 @@
visit_substitution_definition = ignore_node
visit_target = ignore_node
visit_pending = ignore_node
+
+
+def _output_encoding(settings):
+ """TEMPORARY, remove in Docutils 0.21"""
+ if settings.output_encoding == 'unicode':
+ return 'utf-8'
+ return settings.output_encoding
Modified: trunk/docutils/docutils/writers/docutils_xml.py
===================================================================
--- trunk/docutils/docutils/writers/docutils_xml.py 2022-11-02 22:39:16 UTC (rev 9201)
+++ trunk/docutils/docutils/writers/docutils_xml.py 2022-11-02 23:28:49 UTC (rev 9202)
@@ -101,7 +101,7 @@
self.output = []
if settings.xml_declaration:
self.output.append(
- self.xml_declaration % settings.output_encoding)
+ self.xml_declaration % _output_encoding(settings))
if settings.doctype_declaration:
self.output.append(self.doctype)
self.output.append(self.generator % docutils.__version__)
@@ -186,3 +186,10 @@
def setDocumentLocator(self, locator):
self.locator = locator
+
+
+def _output_encoding(settings):
+ """TEMPORARY, remove in Docutils 0.21"""
+ if settings.output_encoding == 'unicode':
+ return 'utf-8'
+ return settings.output_encoding
Modified: trunk/docutils/docutils/writers/html5_polyglot/__init__.py
===================================================================
--- trunk/docutils/docutils/writers/html5_polyglot/__init__.py 2022-11-02 22:39:16 UTC (rev 9201)
+++ trunk/docutils/docutils/writers/html5_polyglot/__init__.py 2022-11-02 23:28:49 UTC (rev 9202)
@@ -205,8 +205,8 @@
self.html_prolog.append(self.doctype)
self.meta.insert(0, self.viewport)
self.head.insert(0, self.viewport)
- self.meta.insert(0, self.content_type % self.settings.output_encoding)
- self.head.insert(0, self.content_type % self.settings.output_encoding)
+ self.meta.insert(0, self.content_type % _output_encoding(self))
+ self.head.insert(0, self.content_type % _output_encoding(self))
if 'name="dcterms.' in ''.join(self.meta):
self.head.append('<link rel="schema.dcterms"'
' href="http://purl.org/dc/terms/"/>')
@@ -452,3 +452,10 @@
f' href="#{ids[0]}"></a>')
close_tag = close_tag.replace('</h', self_link + '</h')
return start_tag, close_tag
+
+
+def _output_encoding(self):
+ """TEMPORARY, remove in Docutils 0.21"""
+ if self.settings.output_encoding == 'unicode':
+ return 'utf-8'
+ return self.settings.output_encoding
Modified: trunk/docutils/docutils/writers/latex2e/__init__.py
===================================================================
--- trunk/docutils/docutils/writers/latex2e/__init__.py 2022-11-02 22:39:16 UTC (rev 9201)
+++ trunk/docutils/docutils/writers/latex2e/__init__.py 2022-11-02 23:28:49 UTC (rev 9202)
@@ -1460,6 +1460,7 @@
# 'iso-8859-7': '' # greek
# 'iso-8859-8': '' # hebrew
# 'iso-8859-10': '' # latin6, more complete iso-8859-4
+ 'unicode': 'utf8', # TEMPORARY, remove in Docutils 0.21
}
encoding = docutils_encoding.lower()
if encoding in tr:
Modified: trunk/docutils/docutils/writers/s5_html/__init__.py
===================================================================
--- trunk/docutils/docutils/writers/s5_html/__init__.py 2022-11-02 22:39:16 UTC (rev 9201)
+++ trunk/docutils/docutils/writers/s5_html/__init__.py 2022-11-02 23:28:49 UTC (rev 9202)
@@ -282,8 +282,8 @@
self.head_prefix_template %
{'lang': self.settings.language_code}])
self.html_prolog.append(self.doctype)
- self.meta.insert(0, self.content_type % self.settings.output_encoding)
- self.head.insert(0, self.content_type % self.settings.output_encoding)
+ self.meta.insert(0, self.content_type % _output_encoding(self))
+ self.head.insert(0, self.content_type % _output_encoding(self))
if self.math_header:
if self.math_output == 'mathjax':
self.head.extend(self.math_header)
@@ -350,3 +350,10 @@
def visit_title(self, node):
html4css1.HTMLTranslator.visit_title(self, node)
+
+
+def _output_encoding(self):
+ """TEMPORARY, remove in Docutils 0.21"""
+ if self.settings.output_encoding == 'unicode':
+ return 'utf-8'
+ return self.settings.output_encoding
Modified: trunk/docutils/test/DocutilsTestSupport.py
===================================================================
--- trunk/docutils/test/DocutilsTestSupport.py 2022-11-02 22:39:16 UTC (rev 9201)
+++ trunk/docutils/test/DocutilsTestSupport.py 2022-11-02 23:28:49 UTC (rev 9202)
@@ -492,7 +492,8 @@
"""
settings_default_overrides = {'_disable_config': True,
- 'strict_visitor': True}
+ 'strict_visitor': True,
+ 'output_encoding': 'unicode'}
writer_name = '' # set in subclasses or constructor
def __init__(self, *args, writer_name='', **kwargs):
@@ -508,7 +509,7 @@
writer_name=self.writer_name,
settings_spec=self,
settings_overrides=self.suite_settings)
- self.assertEqual(output, str(self.expected))
+ self.assertEqual(str(output), str(self.expected))
class PublishTestSuite(CustomTestSuite):
Modified: trunk/docutils/test/alltests.py
===================================================================
--- trunk/docutils/test/alltests.py 2022-11-02 22:39:16 UTC (rev 9201)
+++ trunk/docutils/test/alltests.py 2022-11-02 23:28:49 UTC (rev 9202)
@@ -22,11 +22,19 @@
import glob # noqa: E402
import os # noqa: E402
import platform # noqa: E402
+import warnings # noqa: E402
from importlib import import_module # noqa: E402
import DocutilsTestSupport # noqa: E402 must be imported before docutils
import docutils # noqa: E402
+warnings.filterwarnings('ignore',
+ message='.*return type of publish_string.*',
+ category=FutureWarning)
+warnings.filterwarnings('ignore',
+ message=r".*StringOutput.encode\(\)'s return type.*",
+ category=FutureWarning)
+
class Tee:
"""Write to a file and a stream (default: stdout) simultaneously."""
Modified: trunk/docutils/test/test_parsers/test_recommonmark/test_misc.py
===================================================================
--- trunk/docutils/test/test_parsers/test_recommonmark/test_misc.py 2022-11-02 22:39:16 UTC (rev 9201)
+++ trunk/docutils/test/test_parsers/test_recommonmark/test_misc.py 2022-11-02 23:28:49 UTC (rev 9202)
@@ -51,8 +51,11 @@
def test_raw_disabled(self):
output = publish_string(sample_with_html, parser=parser,
- settings_overrides={'warning_stream': '',
- 'raw_enabled': False})
+ settings_overrides={
+ 'warning_stream': '',
+ 'raw_enabled': False,
+ 'output_encoding': 'unicode',
+ })
self.assertNotIn('<raw>', output)
self.assertIn('<system_message', output)
self.assertIn('Raw content disabled.', output)
@@ -59,9 +62,11 @@
def test_raw_disabled_inline(self):
output = publish_string('foo <a href="uri">', parser=parser,
- settings_overrides={'warning_stream': '',
- 'raw_enabled': False,
- })
+ settings_overrides={
+ 'warning_stream': '',
+ 'raw_enabled': False,
+ 'output_encoding': 'unicode',
+ })
self.assertNotIn('<raw>', output)
self.assertIn('<system_message', output)
self.assertIn('Raw content disabled.', output)
@@ -73,7 +78,8 @@
def test_missing_parser_message(self):
with self.assertRaisesRegex(ImportError,
'requires the package .*recommonmark'):
- publish_string(sample_with_html, parser_name='recommonmark')
+ publish_string(sample_with_html, parser_name='recommonmark',
+ settings_overrides={'output_encoding': 'unicode'})
if __name__ == '__main__':
Modified: trunk/docutils/test/test_parsers/test_rst/test_directives/test_code_parsing.py
===================================================================
--- trunk/docutils/test/test_parsers/test_rst/test_directives/test_code_parsing.py 2022-11-02 22:39:16 UTC (rev 9201)
+++ trunk/docutils/test/test_parsers/test_rst/test_directives/test_code_parsing.py 2022-11-02 23:28:49 UTC (rev 9202)
@@ -38,7 +38,7 @@
autoload("abc_mode", "abc");
"""
-settings = {'warning_stream': ''}
+settings = {'warning_stream': '', 'output_encoding': 'unicode'}
@unittest.skipUnless(with_pygments, 'optional module "pygments" not found')
Modified: trunk/docutils/test/test_publisher.py
===================================================================
--- trunk/docutils/test/test_publisher.py 2022-11-02 22:39:16 UTC (rev 9201)
+++ trunk/docutils/test/test_publisher.py 2022-11-02 23:28:49 UTC (rev 9202)
@@ -106,7 +106,8 @@
settings_spec=self,
settings_overrides={'expose_internals':
['refnames', 'do_not_expose'],
- 'report_level': 1})
+ 'report_level': 1,
+ 'output_encoding': 'unicode'})
self.assertEqual(output, exposed_pseudoxml_output)
# Test publishing parts using document as the source.
@@ -150,7 +151,7 @@
# Write out the document:
output = core.publish_from_doctree(
doctree_zombie, writer_name='pseudoxml',
- settings_spec=self)
+ settings_spec=self).decode('utf-8')
self.assertEqual(output, pseudoxml_output)
Modified: trunk/docutils/test/test_writers/test_docutils_xml.py
===================================================================
--- trunk/docutils/test/test_writers/test_docutils_xml.py 2022-11-02 22:39:16 UTC (rev 9201)
+++ trunk/docutils/test/test_writers/test_docutils_xml.py 2022-11-02 23:28:49 UTC (rev 9202)
@@ -121,11 +121,10 @@
def publish_xml(settings, source):
- return docutils.core.publish_string(source=source.encode('utf-8'),
- reader_name='standalone',
- writer_name='docutils_xml',
- settings_overrides=settings
- ).encode('latin1', 'xmlcharrefreplace')
+ return docutils.core.publish_bytes(source=source.encode('utf-8'),
+ reader_name='standalone',
+ writer_name='docutils_xml',
+ settings_overrides=settings)
# XML Test Case
Modified: trunk/docutils/test/test_writers/test_html4css1_misc.py
===================================================================
--- trunk/docutils/test/test_writers/test_html4css1_misc.py 2022-11-02 22:39:16 UTC (rev 9201)
+++ trunk/docutils/test/test_writers/test_html4css1_misc.py 2022-11-02 23:28:49 UTC (rev 9202)
@@ -37,6 +37,7 @@
mys = {'stylesheet_path': '',
# 'embed_stylesheet': False,
'_disable_config': True,
+ 'output_encoding': 'unicode',
}
def test_definition_list_item_classes(self):
Modified: trunk/docutils/test/test_writers/test_html5_polyglot_misc.py
===================================================================
--- trunk/docutils/test/test_writers/test_html5_polyglot_misc.py 2022-11-02 22:39:16 UTC (rev 9201)
+++ trunk/docutils/test/test_writers/test_html5_polyglot_misc.py 2022-11-02 23:28:49 UTC (rev 9202)
@@ -36,6 +36,7 @@
mys = {'stylesheet_path': '',
# 'embed_stylesheet': False,
'_disable_config': True,
+ 'output_encoding': 'unicode',
}
def test_definition_list_item_classes(self):
@@ -139,6 +140,7 @@
"""Warn about deprecated setting name."""
my_settings = {'_disable_config': True,
'embed_images': False,
+ 'output_encoding': 'unicode',
}
with self.assertWarnsRegex(FutureWarning,
'"embed_images" will be removed'):
Modified: trunk/docutils/test/test_writers/test_latex2e_misc.py
===================================================================
--- trunk/docutils/test/test_writers/test_latex2e_misc.py 2022-11-02 22:39:16 UTC (rev 9201)
+++ trunk/docutils/test/test_writers/test_latex2e_misc.py 2022-11-02 23:28:49 UTC (rev 9202)
@@ -61,6 +61,7 @@
mysettings = {'_disable_config': True,
# 'use_latex_citations': False,
# 'legacy_column_widths': True,
+ 'output_encoding': 'unicode',
}
with self.assertWarnsRegex(FutureWarning,
'"legacy_column_widths" will change'):
Modified: trunk/docutils/test/test_writers/test_null.py
===================================================================
--- trunk/docutils/test/test_writers/test_null.py 2022-11-02 22:39:16 UTC (rev 9201)
+++ trunk/docutils/test/test_writers/test_null.py 2022-11-02 23:28:49 UTC (rev 9202)
@@ -12,7 +12,8 @@
def suite():
- s = DocutilsTestSupport.PublishTestSuite('null')
+ s = DocutilsTestSupport.PublishTestSuite(
+ 'null', suite_settings={'output_encoding': 'utf-8'})
s.generateTests(totest)
return s
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|