From: Guenter M. <g....@qu...> - 2023-04-12 17:29:26
|
Dear Adam, dear Docutils developers, Am 6.04.23 schrieb Adam Turner: ... > > I left the decision about the end state of this transition open... > A decision to make later, and one that doesn't block the 0.20 release! Yes and no: if we want to give users advise on a stable recipe to avoid beeing hit by the default-change, we would need agreement of what will (most likely) be kept stable. The API documentation "publisher.txt" now has the example output = bytes(publish_string(…)) (which depends on `OutputString` features). ... > > (How about starting with annotating "core.py". [...] > I have a patch for adding type hints to Docutils, but wanted to wait > until releasing Docutils 0.20 so as not to add major new changes to > the repository. Agreed. > >> As far as I can tell, the only unresolved point at the moment ahead > >> of releasing Docutils 0.20 is the future of ``publish_bytes``. In the course of the latest changes and documentation updates, I found about two issues: * The parts returned by publish_parts() have (since ages) an item "encoding" (holding the `output_encoding` setting). This can (and IMO should) be easily complemented with "errors" (holding the `output_encoding_error_handler` setting). * The "null" writer currently produces the output ``None``, which is not `str` nor `bytes`. Currently, ``publish_string(writer_name="null", auto_encode=True)`` returns ``OutputString("None", encoding='utf-8', errors='strict')``. This should be either ``None`` (which requires special casing for None) or the empty string (which would make the output value type conforming to the documentation). I have prepared two small patches, that could either go into 0.20 or 0.21 (see below). What do you think? Günter --- >From 4f1b402a033cfc51c215469bfe89e0fbfabbc8af Mon Sep 17 00:00:00 2001 From: milde <mi...@us...> Date: Wed, 12 Apr 2023 17:21:17 +0200 Subject: [PATCH] Add "errors" to the parts provided by all writers. The new generic part "errors" contains the `output_encoding_error_handler` setting, which may make-or-brake encoding ``parts['whole']``. --- docutils/docs/api/publisher.txt | 5 ++++- docutils/docutils/core.py | 2 +- docutils/docutils/writers/__init__.py | 2 ++ docutils/test/test_writers/test_html4css1_parts.py | 1 + docutils/test/test_writers/test_html5_polyglot_parts.py | 1 + docutils/test/test_writers/test_latex2e_misc.py | 1 + 6 files changed, 10 insertions(+), 2 deletions(-) diff --git a/docutils/docs/api/publisher.txt b/docutils/docs/api/publisher.txt index 89810ff..a18e67d 100644 --- a/docutils/docs/api/publisher.txt +++ b/docutils/docs/api/publisher.txt @@ -136,5 +136,8 @@ Parts Provided By All Writers _`encoding` - The output encoding setting. + The `output_encoding`_ setting. + +_`errors` + The `output_encoding_error_handler`_ setting. _`version` diff --git a/docutils/docutils/core.py b/docutils/docutils/core.py index e19d6ce..085e5cd 100644 --- a/docutils/docutils/core.py +++ b/docutils/docutils/core.py @@ -491,5 +491,5 @@ def publish_parts(source, source_path=None, source_class=io.StringInput, parts = publish_parts(...) - body = parts['body'].encode(parts['encoding']) + body = parts['body'].encode(parts['encoding'], parts['errors']) See the `API documentation`__ for details on the provided parts. diff --git a/docutils/docutils/writers/__init__.py b/docutils/docutils/writers/__init__.py index 84be8f6..cba09f4 100644 --- a/docutils/docutils/writers/__init__.py +++ b/docutils/docutils/writers/__init__.py @@ -95,4 +95,6 @@ def assemble_parts(self): self.parts['whole'] = self.output self.parts['encoding'] = self.document.settings.output_encoding + self.parts['errors'] = ( + self.document.settings.output_encoding_error_handler) self.parts['version'] = docutils.__version__ diff --git a/docutils/test/test_writers/test_html4css1_parts.py b/docutils/test/test_writers/test_html4css1_parts.py index 8bd0dee..6dcfbd7 100755 --- a/docutils/test/test_writers/test_html4css1_parts.py +++ b/docutils/test/test_writers/test_html4css1_parts.py @@ -76,4 +76,5 @@ def format_output(self, parts): del parts['head_prefix'] del parts['encoding'] + del parts['errors'] del parts['version'] # remove standard portions: diff --git a/docutils/test/test_writers/test_html5_polyglot_parts.py b/docutils/test/test_writers/test_html5_polyglot_parts.py index acd959d..799ee98 100644 --- a/docutils/test/test_writers/test_html5_polyglot_parts.py +++ b/docutils/test/test_writers/test_html5_polyglot_parts.py @@ -74,4 +74,5 @@ def format_output(self, parts): del parts['head_prefix'] del parts['encoding'] + del parts['errors'] del parts['version'] # remove standard portions: diff --git a/docutils/test/test_writers/test_latex2e_misc.py b/docutils/test/test_writers/test_latex2e_misc.py index 32b1edc..74b2813 100644 --- a/docutils/test/test_writers/test_latex2e_misc.py +++ b/docutils/test/test_writers/test_latex2e_misc.py @@ -69,4 +69,5 @@ def test_publish_parts(self): 'docinfo', 'encoding', + 'errors', 'fallbacks', 'head_prefix', -- libgit2 1.1.0 >From 30d5554ee8d1015d1c3ff7c0d1862bd003fcb5b7 Mon Sep 17 00:00:00 2001 From: milde <mi...@us...> Date: Wed, 12 Apr 2023 17:30:10 +0200 Subject: [PATCH] The "null" writer now sets the output to the empty string ''. Bring the "null" writer behaviour in line with other writers to facilitate keeping a consistent "core" API. Makes `io.StringOutput` and `core.publish_string()` conform to the documented API without requiring a special case for ``output == None``. --- docutils/docutils/writers/null.py | 2 +- docutils/test/test_writers/test_null.py | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/docutils/docutils/writers/null.py b/docutils/docutils/writers/null.py index 6c30627..0d4a919 100644 --- a/docutils/docutils/writers/null.py +++ b/docutils/docutils/writers/null.py @@ -19,3 +19,3 @@ class Writer(writers.UnfilteredWriter): def translate(self): - pass + self.output = '' diff --git a/docutils/test/test_writers/test_null.py b/docutils/test/test_writers/test_null.py index 47890fd..5880ac3 100755 --- a/docutils/test/test_writers/test_null.py +++ b/docutils/test/test_writers/test_null.py @@ -34,7 +34,6 @@ def test_publish(self): 'strict_visitor': True, }, + auto_encode=False, ) - if isinstance(output, bytes): - output = output.decode('utf-8') self.assertEqual(output, case_expected) @@ -46,5 +45,5 @@ def test_publish(self): This is a paragraph. """, -None] +''] ] -- libgit2 1.1.0 |