[Docutils-checkins] SF.net SVN: docutils:[9068] trunk/docutils

SourceForge Headquarters 1320 Columbia Street Suite 310 San Diego, CA 92101 +1 (858) 422-6466

Revision: 9068
          http://sourceforge.net/p/docutils/code/9068
Author:   milde
Date:     2022-06-13 12:05:08 +0000 (Mon, 13 Jun 2022)
Log Message:
-----------
Unify naming of the "utf-8" codec.

Modified Paths:
--------------
    trunk/docutils/HISTORY.txt
    trunk/docutils/docutils/utils/__init__.py
    trunk/docutils/docutils/utils/math/tex2mathml_extern.py
    trunk/docutils/docutils/utils/smartquotes.py
    trunk/docutils/docutils/writers/latex2e/__init__.py
    trunk/docutils/test/test_dependencies.py
    trunk/docutils/test/test_error_reporting.py
    trunk/docutils/test/test_io.py
    trunk/docutils/test/test_parsers/test_rst/test_directives/test_date.py
    trunk/docutils/test/test_writers/test_docutils_xml.py
    trunk/docutils/tools/dev/generate_punctuation_chars.py

Modified: trunk/docutils/HISTORY.txt
===================================================================

--- trunk/docutils/HISTORY.txt	2022-06-10 11:08:46 UTC (rev 9067)
+++ trunk/docutils/HISTORY.txt	2022-06-13 12:05:08 UTC (rev 9068)
@@ -25,7 +25,7 @@
 
   - New module. Support for ``python -m docutils``.
     Also used for the ``docutils`` console script `entry point`.
-    
+
 * docutils/core.py:
 
   Let `Publisher.publish()` print info and prompt when waiting for input
@@ -98,11 +98,11 @@
 
   - Use "https:" scheme in "python_home" URL default.
   - Fix links in template.txt.
-  
+
 * setup.py:
 
   - New "docutils" console script `entry point`__. Fixes bug #447.
-    
+
     __ https://packaging.python.org/en/latest/specifications/entry-points/
 
 * test/DocutilsTestSupport.py
@@ -381,7 +381,7 @@
 
 * docutils/writers/latex2e/__init__.py:
 
-  - Open "docutils.sty" with encoding set to "utf8".
+  - Open "docutils.sty" with encoding set to "utf-8".
     Fixes bug #414: error with Py3k when locale encoding is "ascii".
 
 * docutils/parsers/*.py, docutils/transforms/*.py
@@ -1273,7 +1273,7 @@
 
      __ RELEASE-NOTES.html
 
-  - DependencyList uses io.FileOutput and 'utf8' encoding to prevent
+  - DependencyList uses io.FileOutput and 'utf-8' encoding to prevent
     errors recording non-ASCII filenames (fixes [ 3434355 ]).
 
   - Fix `relative_path()` with source=None and `unicode` target.

Modified: trunk/docutils/docutils/utils/__init__.py
===================================================================
--- trunk/docutils/docutils/utils/__init__.py	2022-06-10 11:08:46 UTC (rev 9067)
+++ trunk/docutils/docutils/utils/__init__.py	2022-06-13 12:05:08 UTC (rev 9068)
@@ -748,7 +748,7 @@
             else:
                 of = output_file
             self.file = io.FileOutput(destination_path=of,
-                                      encoding='utf8', autoclose=False)
+                                      encoding='utf-8', autoclose=False)
         else:
             self.file = None
 

Modified: trunk/docutils/docutils/utils/math/tex2mathml_extern.py
===================================================================
--- trunk/docutils/docutils/utils/math/tex2mathml_extern.py	2022-06-10 11:08:46 UTC (rev 9067)
+++ trunk/docutils/docutils/utils/math/tex2mathml_extern.py	2022-06-13 12:05:08 UTC (rev 9068)
@@ -40,10 +40,10 @@
                          stdout=subprocess.PIPE,
                          stderr=subprocess.PIPE,
                          close_fds=True)
-    p.stdin.write((document_template % math_code).encode('utf8'))
+    p.stdin.write((document_template % math_code).encode('utf-8'))
     p.stdin.close()
     latexml_code = p.stdout.read()
-    latexml_err = p.stderr.read().decode('utf8')
+    latexml_err = p.stderr.read().decode('utf-8')
     if reporter and (latexml_err.find('Error') >= 0 or not latexml_code):
         reporter.error(latexml_err)
 
@@ -60,8 +60,8 @@
                               close_fds=True)
     post_p.stdin.write(latexml_code)
     post_p.stdin.close()
-    result = post_p.stdout.read().decode('utf8')
-    post_p_err = post_p.stderr.read().decode('utf8')
+    result = post_p.stdout.read().decode('utf-8')
+    post_p_err = post_p.stderr.read().decode('utf-8')
     if reporter and (post_p_err.find('Error') >= 0 or not result):
         reporter.error(post_p_err)
 
@@ -87,10 +87,10 @@
                          stdout=subprocess.PIPE,
                          stderr=subprocess.PIPE,
                          close_fds=True)
-    p.stdin.write((document_template % math_code).encode('utf8'))
+    p.stdin.write((document_template % math_code).encode('utf-8'))
     p.stdin.close()
     result = p.stdout.read()
-    err = p.stderr.read().decode('utf8')
+    err = p.stderr.read().decode('utf-8')
     if err.find('**** Unknown') >= 0:
         msg = '\n'.join(line for line in err.splitlines()
                         if line.startswith('****'))
@@ -125,10 +125,10 @@
                          stdout=subprocess.PIPE,
                          stderr=subprocess.PIPE,
                          close_fds=True)
-    p.stdin.write(math_code.encode('utf8'))
+    p.stdin.write(math_code.encode('utf-8'))
     p.stdin.close()
-    result = p.stdout.read().decode('utf8')
-    err = p.stderr.read().decode('utf8')
+    result = p.stdout.read().decode('utf-8')
+    err = p.stderr.read().decode('utf-8')
 
     if result.find('<error>') >= 0:
         msg = result[result.find('<message>')+9:result.find('</message>')]
@@ -147,6 +147,6 @@
 if __name__ == "__main__":
     example = ('\\frac{\\partial \\sin^2(\\alpha)}{\\partial \\vec r}'
                '\\varpi \\, \\text{Grüße}')
-    # print(latexml(example).encode('utf8'))
+    # print(latexml(example).encode('utf-8'))
     # print(ttm(example))
-    print(blahtexml(example).encode('utf8'))
+    print(blahtexml(example).encode('utf-8'))

Modified: trunk/docutils/docutils/utils/smartquotes.py
===================================================================
--- trunk/docutils/docutils/utils/smartquotes.py	2022-06-10 11:08:46 UTC (rev 9067)
+++ trunk/docutils/docutils/utils/smartquotes.py	2022-06-13 12:05:08 UTC (rev 9068)
@@ -927,7 +927,7 @@
     # parser.add_argument("input", help="Input stream, use '-' for stdin.")
     parser.add_argument("-a", "--action", default="1",
                         help="what to do with the input (see --actionhelp)")
-    parser.add_argument("-e", "--encoding", default="utf8",
+    parser.add_argument("-e", "--encoding", default="utf-8",
                         help="text encoding")
     parser.add_argument("-l", "--language", default=defaultlanguage,
                         help="text language (BCP47 tag), "

Modified: trunk/docutils/docutils/writers/latex2e/__init__.py
===================================================================
--- trunk/docutils/docutils/writers/latex2e/__init__.py	2022-06-10 11:08:46 UTC (rev 9067)
+++ trunk/docutils/docutils/writers/latex2e/__init__.py	2022-06-13 12:05:08 UTC (rev 9068)
@@ -271,12 +271,12 @@
         # get template string from file
         templatepath = self.document.settings.template
         try:
-            with open(templatepath, encoding='utf8') as fp:
+            with open(templatepath, encoding='utf-8') as fp:
                 template = fp.read()
         except IOError:
             templatepath = os.path.join(self.default_template_path,
                                         templatepath)
-            with open(templatepath, encoding='utf8') as fp:
+            with open(templatepath, encoding='utf-8') as fp:
                 template = fp.read()
         # fill template
         self.assemble_parts()  # create dictionary of parts
@@ -597,7 +597,7 @@
 
 _docutils_sty = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                              'docutils.sty')
-with open(_docutils_sty, encoding='utf8') as fp:
+with open(_docutils_sty, encoding='utf-8') as fp:
     for line in fp:
         line = line.strip('% \n')
         if not line.endswith('::'):
@@ -1459,7 +1459,7 @@
               # 'iso-8859-6': ''   # arabic
               # 'iso-8859-7': ''   # greek
               # 'iso-8859-8': ''   # hebrew
-              # 'iso-8859-10': ''   # latin6, more complete iso-8859-4
+              # 'iso-8859-10': ''  # latin6, more complete iso-8859-4
               }
         encoding = docutils_encoding.lower()
         if encoding in tr:

Modified: trunk/docutils/test/test_dependencies.py
===================================================================
--- trunk/docutils/test/test_dependencies.py	2022-06-10 11:08:46 UTC (rev 9067)
+++ trunk/docutils/test/test_dependencies.py	2022-06-13 12:05:08 UTC (rev 9068)
@@ -46,7 +46,7 @@
         recorder.close()
         # Read the record file:
         record = docutils.io.FileInput(source_path=recordfile,
-                                       encoding='utf8')
+                                       encoding='utf-8')
         return record.read().splitlines()
 
     def test_dependencies_xml(self):

Modified: trunk/docutils/test/test_error_reporting.py
===================================================================
--- trunk/docutils/test/test_error_reporting.py	2022-06-10 11:08:46 UTC (rev 9067)
+++ trunk/docutils/test/test_error_reporting.py	2022-06-13 12:05:08 UTC (rev 9068)
@@ -144,7 +144,7 @@
         e.write(AttributeError(u' e\xfc'))
         self.assertEqual(buf.getvalue(), b'b\xfc u\\xfc e\\xfc')
         # encode with `encoding` attribute
-        e.encoding = 'utf8'
+        e.encoding = 'utf-8'
         e.write(u' u\xfc')
         self.assertEqual(buf.getvalue(), b'b\xfc u\\xfc e\\xfc u\xc3\xbc')
 

Modified: trunk/docutils/test/test_io.py
===================================================================
--- trunk/docutils/test/test_io.py	2022-06-10 11:08:46 UTC (rev 9067)
+++ trunk/docutils/test/test_io.py	2022-06-13 12:05:08 UTC (rev 9068)
@@ -33,7 +33,7 @@
 
 
 class mock_stdout(UBuf):
-    encoding = 'utf8'
+    encoding = 'utf-8'
 
     def __init__(self):
         self.buffer = BBuf()
@@ -44,8 +44,9 @@
 
     def test_check_encoding_true(self):
         """Return `True` if lookup returns the same codec"""
+        self.assertEqual(io.check_encoding(mock_stdout, 'utf-8'), True)
+        self.assertEqual(io.check_encoding(mock_stdout, 'utf_8'), True)
         self.assertEqual(io.check_encoding(mock_stdout, 'utf8'), True)
-        self.assertEqual(io.check_encoding(mock_stdout, 'utf-8'), True)
         self.assertEqual(io.check_encoding(mock_stdout, 'UTF-8'), True)
 
     def test_check_encoding_false(self):
@@ -80,7 +81,7 @@
 
     def test_bom(self):
         input = io.StringInput(source=b'\xef\xbb\xbf foo \xef\xbb\xbf bar',
-                               encoding='utf8')
+                               encoding='utf-8')
         # Assert BOMs are gone.
         self.assertEqual(input.read(), ' foo  bar')
         # With unicode input:
@@ -129,9 +130,9 @@
         self.assertEqual(data, ['Some include text.\n'])
 
     def test_heuristics_no_utf8(self):
-        # if no encoding is given and decoding with utf8 fails,
+        # if no encoding is given and decoding with utf-8 fails,
         # use either the locale encoding (if specified) or latin-1:
-        if io.locale_encoding != "utf8":
+        if io.locale_encoding.lower() not in ('utf-8', 'utf8'):
             # in Py3k, the locale encoding is used without --input-encoding
             # skipping the heuristic unless decoding fails.
             return
@@ -174,7 +175,7 @@
         self.assertEqual(self.udrain.getvalue(), self.udata)
 
     def test_write_utf8(self):
-        fo = io.FileOutput(destination=self.udrain, encoding='utf8',
+        fo = io.FileOutput(destination=self.udrain, encoding='utf-8',
                            autoclose=False)
         fo.write(self.udata)
         self.assertEqual(self.udrain.getvalue(), self.udata)
@@ -187,7 +188,7 @@
     # With destination in binary mode, data must be binary string
     # and is written as-is:
     def test_write_bytes(self):
-        fo = io.FileOutput(destination=self.bdrain, encoding='utf8',
+        fo = io.FileOutput(destination=self.bdrain, encoding='utf-8',
                            mode='wb', autoclose=False)
         fo.write(self.bdata)
         self.assertEqual(self.bdrain.getvalue(), self.bdata)
@@ -233,7 +234,7 @@
         e.write(AttributeError(' e\xfc'))
         self.assertEqual(buf.getvalue(), b'b\xfc u\\xfc e\\xfc')
         # encode with `encoding` attribute
-        e.encoding = 'utf8'
+        e.encoding = 'utf-8'
         e.write(' u\xfc')
         self.assertEqual(buf.getvalue(), b'b\xfc u\\xfc e\\xfc u\xc3\xbc')
 

Modified: trunk/docutils/test/test_parsers/test_rst/test_directives/test_date.py
===================================================================
--- trunk/docutils/test/test_parsers/test_rst/test_directives/test_date.py	2022-06-10 11:08:46 UTC (rev 9067)
+++ trunk/docutils/test/test_parsers/test_rst/test_directives/test_date.py	2022-06-13 12:05:08 UTC (rev 9068)
@@ -62,15 +62,16 @@
 ]
 
 # some locales return non-ASCII characters for names of days or months
-if locale_encoding in ['utf8', 'utf-8', 'latin-1']:
+# ensure the directive handles them correctly
+if locale_encoding.lower() in ('utf8', 'utf-8', 'latin-1', 'iso8859-1'):
     totest['decode date'] = [
     ["""\
-.. |date| date:: t\xc3glich
+.. |date| date:: täglich
 """,
     """\
 <document source="test data">
     <substitution_definition names="date">
-        t\xc3glich
+        täglich
 """],
     ]
 

Modified: trunk/docutils/test/test_writers/test_docutils_xml.py
===================================================================
--- trunk/docutils/test/test_writers/test_docutils_xml.py	2022-06-10 11:08:46 UTC (rev 9067)
+++ trunk/docutils/test/test_writers/test_docutils_xml.py	2022-06-13 12:05:08 UTC (rev 9068)
@@ -122,7 +122,7 @@
 
 
 def publish_xml(settings, source):
-    return docutils.core.publish_string(source=source.encode('utf8'),
+    return docutils.core.publish_string(source=source.encode('utf-8'),
                                         reader_name='standalone',
                                         writer_name='docutils_xml',
                                         settings_overrides=settings)
@@ -134,7 +134,7 @@
 
 class DocutilsXMLTestCase(DocutilsTestSupport.StandardTestCase):
 
-    settings = {'input_encoding': 'utf8',
+    settings = {'input_encoding': 'utf-8',
                 'output_encoding': 'iso-8859-1',
                 '_disable_config': True,
                 'indents': False,

Modified: trunk/docutils/tools/dev/generate_punctuation_chars.py
===================================================================
--- trunk/docutils/tools/dev/generate_punctuation_chars.py	2022-06-10 11:08:46 UTC (rev 9067)
+++ trunk/docutils/tools/dev/generate_punctuation_chars.py	2022-06-13 12:05:08 UTC (rev 9068)
@@ -213,10 +213,10 @@
     # 301F  LOW DOUBLE PRIME QUOTATION MARK misses the opening pendant:
     ucharlists['Ps'].insert(ucharlists['Pe'].index('\u301f'), '\u301d')
 
-    # print(''.join(ucharlists['Ps']).encode('utf8')
-    # print(''.join(ucharlists['Pe']).encode('utf8')
-    # print(''.join(ucharlists['Pi']).encode('utf8')
-    # print(''.join(ucharlists['Pf']).encode('utf8')
+    # print(''.join(ucharlists['Ps']).encode('utf-8')
+    # print(''.join(ucharlists['Pe']).encode('utf-8')
+    # print(''.join(ucharlists['Pi']).encode('utf-8')
+    # print(''.join(ucharlists['Pf']).encode('utf-8')
 
     # The Docutils character categories
     # ---------------------------------
@@ -364,10 +364,10 @@
 
         print_differences(openers, o, 'openers')
         if o_wide:
-            print('+ openers-wide = r"""%s"""' % o_wide.encode('utf8'))
+            print('+ openers-wide = r"""%s"""' % o_wide.encode('utf-8'))
         print_differences(closers, c, 'closers')
         if c_wide:
-            print('+ closers-wide = r"""%s"""' % c_wide.encode('utf8'))
+            print('+ closers-wide = r"""%s"""' % c_wide.encode('utf-8'))
 
         print_differences(delimiters, d + d_wide, 'delimiters')
         print_differences(closing_delimiters, cd, 'closing_delimiters')

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.