[Docutils-checkins] SF.net SVN: docutils:[9967] trunk/docutils

SourceForge Headquarters 1320 Columbia Street Suite 310 San Diego, CA 92101 +1 (858) 422-6466

Revision: 9967
          http://sourceforge.net/p/docutils/code/9967
Author:   milde
Date:     2024-10-24 21:05:01 +0000 (Thu, 24 Oct 2024)
Log Message:
-----------
Start reconciling "colwidth" attribute definition and use.

First, backwards compatible changes get the handling of the
`<colspec>` element's "colwidth" attribute in line with the
definition in the Exchange Table Model.

The "colwidth" attribute is defined in the "Exchange Table Model"
as either a *proportional measure* or a *fixed measure*.
Currently, Docutils supports only unitless values and interpets them
as proportional measures while the Exchange Table Model interprets
unitless values as fixed measures with default unit {U+201C}pt{U+201D}!

The new optional argument "unit_pattern" for `nodes.parse_measure()`
allows configuring what is recognized as a unit.

The new validation function `nodes.validate_proportional()`
for the "colwidth" attribute also accepts conformant values like
"5.2*" (for 5.2 times the "unit proportion").
For backwards compatibility reasons, the attribute is still stored
as numerical value in Python doctree element instances.

New auxiliary method `nodes.colspec.propwith()`:
returns the "colwidth" attribute as number if it is a
proportional measure.

Announce future changes in the RELEASE_NOTES

Add/modify test cases.

Modified Paths:
--------------
    trunk/docutils/RELEASE-NOTES.rst
    trunk/docutils/docutils/nodes.py
    trunk/docutils/docutils/writers/_html_base.py
    trunk/docutils/docutils/writers/html4css1/__init__.py
    trunk/docutils/docutils/writers/latex2e/__init__.py
    trunk/docutils/docutils/writers/odf_odt/__init__.py
    trunk/docutils/test/test_nodes.py
    trunk/docutils/test/test_parsers/test_docutils_xml/test_parse_element.py

Modified: trunk/docutils/RELEASE-NOTES.rst
===================================================================

--- trunk/docutils/RELEASE-NOTES.rst	2024-10-22 21:41:26 UTC (rev 9966)
+++ trunk/docutils/RELEASE-NOTES.rst	2024-10-24 21:05:01 UTC (rev 9967)
@@ -46,7 +46,7 @@
 Document Tree / Docutils DTD
 ----------------------------
 
-* Do not lowercase reference names in the `refname attribute`_
+* Do not lowercase reference names in the `"refname" attribute`_
   (matching hyperlinks, footnotes, and citations remains case insensitive),
   and drop the ``name`` attribute from <reference> nodes
   in Docutils 1.0.
@@ -57,8 +57,16 @@
 * The <footnote> element's first child (<label>) will become mandatory
   in Docutils 1.0.
 
-.. _refname attribute: docs/ref/doctree.html#refname
+* Values of the `"colwidth" attribute`_ will be stored in Python
+  element instances as `str` (with unit "*" for proportional values)
+  in Docutils 1.0.  (The method `nodes.colspec.propwidth()` provides
+  a stable means to extract a proportionional value as number.)
 
+  The default unit will change to "pt" in Docutils 2.0.
+
+.. _"refname" attribute: docs/ref/doctree.html#refname
+.. _"colwidth" attribute: docs/ref/doctree.html#colwidth
+
 Writers
 -------
 

Modified: trunk/docutils/docutils/nodes.py
===================================================================
--- trunk/docutils/docutils/nodes.py	2024-10-22 21:41:26 UTC (rev 9966)
+++ trunk/docutils/docutils/nodes.py	2024-10-24 21:05:01 UTC (rev 9967)
@@ -37,7 +37,6 @@
 # import docutils.transforms # -> delayed import in document.__init__()
 
 if TYPE_CHECKING:
-    import numbers
     from collections.abc import (Callable, Iterable, Iterator,
                                  Mapping, Sequence)
     from types import ModuleType
@@ -2414,7 +2413,20 @@
         'align', 'char', 'charoff', 'colname', 'colnum',
         'colsep', 'colwidth', 'rowsep', 'stub')
 
+    def propwidth(self) -> int|float:
+        """Return numerical value of "colwidth__" attribute. Default 1.
 
+        Raise ValueError if "colwidth" is zero, negative, or a *fixed value*.
+
+        Provisional.
+
+        __ https://docutils.sourceforge.io/docs/ref/doctree.html#colwidth
+        """
+        # Move current implementation of validate_colwidth() here
+        # in Docutils 1.0
+        return validate_colwidth(self.get('colwidth', ''))
+
+
 class thead(Part, Element):
     """Row(s) that form the head of a `tgroup`."""
     valid_attributes: Final = Element.valid_attributes + ('valign',)
@@ -3059,14 +3071,20 @@
     return '"%s"' % value
 
 
-def parse_measure(measure: str) -> tuple[numbers.Rational, str]:
-    """Parse a measure__, return value + optional unit.
+def parse_measure(measure: str, unit_pattern: str = '[a-zA-Zµ]*|%?'
+                  ) -> tuple[int|float, str]:
+    """Parse a measure__, return value + unit.
 
+    `unit_pattern` is a regular expression describing recognized units.
+    The default is suited for (but not limited to) CSS3 units and SI units.
+    It matches runs of ASCII letters or Greek mu, a single percent sign,
+    or no unit.
+
     __ https://docutils.sourceforge.io/docs/ref/doctree.html#measure
 
     Provisional.
     """
-    match = re.fullmatch('(-?[0-9.]+) *([a-zA-Zµ]*|%?)', measure)
+    match = re.fullmatch(f'(-?[0-9.]+) *({unit_pattern})', measure)
     try:
         try:
             value = int(match.group(1))
@@ -3149,7 +3167,8 @@
 
     See `parse_measure()` for a function returning a "number + unit" tuple.
 
-    The unit may be any run of letters or a percent sign.
+    The unit may be a run of ASCII letters or Greek mu, a single percent sign,
+    or the empty string. Case is preserved.
 
     Provisional.
 
@@ -3159,6 +3178,31 @@
     return f'{value}{unit}'
 
 
+def validate_colwidth(measure: str|int|float) -> int|float:
+    """Validate the "colwidth__" attribute.
+
+    Provisional:
+        `measure` must be a `str` and will be returned as normalized `str`
+        (with unit "*" for proportional values) in Docutils 1.0.
+
+        The default unit will change to "pt" in Docutils 2.0.
+
+    __ https://docutils.sourceforge.io/docs/ref/doctree.html#colwidth
+    """
+    if isinstance(measure, (int, float)):
+        value = measure
+    elif measure in ('*', ''):  # short for '1*'
+        value = 1
+    else:
+        try:
+            value, unit = parse_measure(measure, unit_pattern='[*]?')
+        except ValueError:
+            value = -1
+    if value <= 0:
+        raise ValueError(f'"{measure}" is no proportional measure.')
+    return value
+
+
 def validate_NMTOKEN(value: str) -> str:
     """
     Validate a "name token": a `str` of ASCII letters, digits, and [-._].
@@ -3232,7 +3276,7 @@
     'colnum': int,  # from CALS, currently ignored
     'cols': int,  # from CALS: "NMTOKEN, […] must be an integer > 0".
     'colsep': validate_yesorno,
-    'colwidth': int,  # sic! CALS: CDATA (measure or number+'*')
+    'colwidth': validate_colwidth,  # see docstring for pending changes
     'content': str,  # <meta>
     'delimiter': str,
     'dir': create_keyword_validator('ltr', 'rtl', 'auto'),  # <meta>

Modified: trunk/docutils/docutils/writers/_html_base.py
===================================================================
--- trunk/docutils/docutils/writers/_html_base.py	2024-10-22 21:41:26 UTC (rev 9966)
+++ trunk/docutils/docutils/writers/_html_base.py	2024-10-24 21:05:01 UTC (rev 9967)
@@ -846,9 +846,9 @@
             and 'colwidths-given' not in node.parent.parent['classes']):
             return
         self.body.append(self.starttag(node, 'colgroup'))
-        total_width = sum(node['colwidth'] for node in self.colspecs)
+        total_width = sum(node.propwidth() for node in self.colspecs)
         for node in self.colspecs:
-            colwidth = node['colwidth'] / total_width
+            colwidth = node.propwidth() / total_width
             self.body.append(self.emptytag(node, 'col',
                                            style=f'width: {colwidth:.1%}'))
         self.body.append('</colgroup>\n')

Modified: trunk/docutils/docutils/writers/html4css1/__init__.py
===================================================================
--- trunk/docutils/docutils/writers/html4css1/__init__.py	2024-10-22 21:41:26 UTC (rev 9966)
+++ trunk/docutils/docutils/writers/html4css1/__init__.py	2024-10-24 21:05:01 UTC (rev 9967)
@@ -231,10 +231,10 @@
             or ('colwidths-auto' in self.settings.table_style
                 and 'colwidths-given' not in node.parent.parent['classes'])):
             return
-        total_width = sum(node['colwidth'] for node in self.colspecs)
+        total_width = sum(node.propwidth() for node in self.colspecs)
         self.body.append(self.starttag(node, 'colgroup'))
         for node in self.colspecs:
-            colwidth = int(node['colwidth'] * 100.0 / total_width + 0.5)
+            colwidth = node.propwidth() * 100.0 / total_width + 0.5
             self.body.append(self.emptytag(node, 'col',
                                            width='%i%%' % colwidth))
         self.body.append('</colgroup>\n')

Modified: trunk/docutils/docutils/writers/latex2e/__init__.py
===================================================================
--- trunk/docutils/docutils/writers/latex2e/__init__.py	2024-10-22 21:41:26 UTC (rev 9966)
+++ trunk/docutils/docutils/writers/latex2e/__init__.py	2024-10-24 21:05:01 UTC (rev 9967)
@@ -989,11 +989,11 @@
             width = 80  # assumed standard line length
             factor = 0.93  # do not make it full linewidth
             # first see if we get too wide.
-            total_width = sum(node['colwidth']+1 for node in self._col_specs)
+            total_width = sum(node.propwidth()+1 for node in self._col_specs)
             if total_width > width:
                 factor *= width / total_width
-            self._colwidths = [(factor * (node['colwidth']+1)/width)
-                               + 0.005 for node in self._col_specs]
+            self._colwidths = [(factor * (node.propwidth()+1)/width) + 0.005
+                               for node in self._col_specs]
             latex_colspecs = ['p{%.3f\\DUtablewidth}' % colwidth
                               for colwidth in self._colwidths]
         else:
@@ -1010,7 +1010,7 @@
                 allowance = 1
             else:
                 allowance = 0  # "widths" option specified, use exact ratio
-            self._colwidths = [(node['colwidth']+allowance)/norm_length
+            self._colwidths = [(node.propwidth()+allowance)/norm_length
                                for node in self._col_specs]
             total_width = sum(self._colwidths)
             # Limit to 100%, force 100% if table width is specified:
@@ -1067,7 +1067,7 @@
                 n_c = len(self._col_specs)
                 a.append('\\endhead\n')
                 # footer on all but last page (if it fits):
-                t_width = sum(node['colwidth']+2 for node in self._col_specs)
+                t_width = sum(node.propwidth()+2 for node in self._col_specs)
                 if t_width > 30 or (t_width > 12 and not self.colwidths_auto):
                     a.append(r'\multicolumn{%d}{%s}'
                              % (n_c, self.get_multicolumn_width(0, n_c))

Modified: trunk/docutils/docutils/writers/odf_odt/__init__.py
===================================================================
--- trunk/docutils/docutils/writers/odf_odt/__init__.py	2024-10-22 21:41:26 UTC (rev 9966)
+++ trunk/docutils/docutils/writers/odf_odt/__init__.py	2024-10-24 21:05:01 UTC (rev 9967)
@@ -3085,7 +3085,7 @@
             '%s%%d.%%s' % TABLESTYLEPREFIX,
             (self.table_count, chr(self.column_count), )
         )
-        colwidth = node['colwidth'] / 12.0
+        colwidth = node.propwidth() / 12.0
         el1 = SubElement(self.automatic_styles, 'style:style', attrib={
             'style:name': colspec_name,
             'style:family': 'table-column',

Modified: trunk/docutils/test/test_nodes.py
===================================================================
--- trunk/docutils/test/test_nodes.py	2024-10-22 21:41:26 UTC (rev 9966)
+++ trunk/docutils/test/test_nodes.py	2024-10-24 21:05:01 UTC (rev 9967)
@@ -465,6 +465,31 @@
         self.assertEqual(len(parent), 5)
 
 
+class ColspecTests(unittest.TestCase):
+
+    def test_propwidth(self):
+        # Return colwidth attribute value if it is a proportional measure.
+        colspec = nodes.colspec()
+        colspec['colwidth'] = '8.2*'  # value + '*'
+        self.assertEqual(colspec.propwidth(), 8.2)
+        colspec['colwidth'] = '2'  # in Docutils < 2.0, default unit is '*'
+        self.assertEqual(colspec.propwidth(), 2)
+        colspec['colwidth'] = '20%'  # percentual values not supported
+        with self.assertRaisesRegex(ValueError, '"20%" is no proportional me'):
+            colspec.propwidth()
+        colspec['colwidth'] = '2em'  # fixed values not supported
+        with self.assertRaisesRegex(ValueError, '"2em" is no proportional me'):
+            colspec.propwidth()
+        colspec['colwidth'] = '0*'  # value must be positive
+        with self.assertRaisesRegex(ValueError, r'"0\*" is no proportional '):
+            colspec.propwidth()
+        # for backwards compatibility, numerical values are accepted
+        colspec['colwidth'] = 8.2
+        self.assertEqual(colspec.propwidth(), 8.2)
+        colspec['colwidth'] = 2
+        self.assertEqual(colspec.propwidth(), 2)
+
+
 class ElementValidationTests(unittest.TestCase):
 
     def test_validate(self):
@@ -1114,22 +1139,33 @@
                          ['a name', 'two\\', r'n\ames'])
 
     def test_parse_measure(self):
-        # measure is number + optional unit (letter(s) or percentage)
+        # measure is number + unit
+        # By default, a run of ASCII letters or µ, a single percent sign,
+        # or the empty string are recognized as unit.
         self.assertEqual(nodes.parse_measure('8ex'), (8, 'ex'))
         self.assertEqual(nodes.parse_measure('2.5'), (2.5, ''))
         self.assertEqual(nodes.parse_measure('-2s'), (-2, 's'))
+        # Spaces between number and unit are tolerated, case is preserved:
         self.assertEqual(nodes.parse_measure('2 µF'), (2, 'µF'))
         self.assertEqual(nodes.parse_measure('10 EUR'), (10, 'EUR'))
         self.assertEqual(nodes.parse_measure('.5 %'), (.5, '%'))
-        # scientific notation not supported
+        # Only a single percent sign is allowed:
+        with self.assertRaisesRegex(ValueError, '"2%%" is no valid measure'):
+            nodes.parse_measure('2%%')
+        # Scientific notation is not supported:
         with self.assertRaisesRegex(ValueError, '"3e-4 mm" is no valid '):
             nodes.parse_measure('3e-4 mm')
-        # unit must follow the number
+        # Units must follow the number:
         with self.assertRaisesRegex(ValueError, '"EUR 23" is no valid '):
             nodes.parse_measure('EUR 23')
-        # only single percent sign allowed
-        with self.assertRaisesRegex(ValueError, '"2%%" is no valid measure'):
-            nodes.parse_measure('2%%')
+        # Supported units can be configured with a "unit regexp pattern":
+        self.assertEqual(nodes.parse_measure('10 EUR', 'EUR|€'), (10, 'EUR'))
+        self.assertEqual(nodes.parse_measure('10 €', 'EUR|€'), (10, '€'))
+        with self.assertRaisesRegex(ValueError, '"20 DM" is no valid measure'):
+            nodes.parse_measure('20 DM', 'EUR|€')
+        # Measures without unit are only supported, if the pattern says so:
+        with self.assertRaisesRegex(ValueError, '"20" is no valid measure'):
+            nodes.parse_measure('20', 'EUR|€')
 
 
 class AttributeTypeTests(unittest.TestCase):
@@ -1171,7 +1207,7 @@
             nodes.validate_identifier_list(s2)
 
     def test_validate_measure(self):
-        # number (may be decimal fraction) + optional unit
+        # measure == number (int or decimal fraction) + optional unit
         # internal whitespace is removed
         self.assertEqual(nodes.validate_measure('8 ex'), '8ex')
         self.assertEqual(nodes.validate_measure('2'), '2')
@@ -1199,6 +1235,7 @@
     def test_validate_NMTOKEN(self):
         # str with ASCII-letters, digits, hyphen, underscore, and full-stop.
         self.assertEqual(nodes.validate_NMTOKEN('-8x_.'), '-8x_.')
+        # internal space is not allowed
         with self.assertRaises(ValueError):
             nodes.validate_NMTOKEN('why me')
 

Modified: trunk/docutils/test/test_parsers/test_docutils_xml/test_parse_element.py
===================================================================
--- trunk/docutils/test/test_parsers/test_docutils_xml/test_parse_element.py	2024-10-22 21:41:26 UTC (rev 9966)
+++ trunk/docutils/test/test_parsers/test_docutils_xml/test_parse_element.py	2024-10-24 21:05:01 UTC (rev 9967)
@@ -141,7 +141,8 @@
         node = docutils_xml.parse_element(xml)
         self.assertEqual(node.attributes, self.common_attributes | expected)
 
-    def test_auto(self):  # CDATA (str) number sequence: '1' or '*'
+    def test_auto(self):  # CDATA (str)
+        # also encodes footnote label type: '1': numbered, '*': symbols
         xml = '<footnote auto="*" backrefs="footnote-reference-2" />'
         expected = {'auto': '*',
                     'backrefs': ['footnote-reference-2']}
@@ -159,7 +160,15 @@
 
     # 'classes':  classnames.type (list[str])  → test_bullet
 
-    def test_colwidth(self):  # CDATA (int) sic!
+    def test_colwidth(self):  # CDATA (int)
+        # Provisional. Currently, Docutils handles "colwidth" differently
+        # from the Exchange Table Model. This will eventually change
+        # (see https://docutils.sourceforge.io/docs/ref/doctree.html#colwidth).
+        xml = '<colspec colwidth="33*" stub="1" />'
+        expected = {'colwidth': 33, 'stub': 1}
+        node = docutils_xml.parse_element(xml)
+        self.assertEqual(node.attributes, self.common_attributes | expected)
+        # Note: the upstream default unit is "pt", not "*".
         xml = '<colspec colwidth="33" stub="1" />'
         expected = {'colwidth': 33, 'stub': 1}
         node = docutils_xml.parse_element(xml)

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.