|
From: <mi...@us...> - 2024-10-24 21:05:03
|
Revision: 9967
http://sourceforge.net/p/docutils/code/9967
Author: milde
Date: 2024-10-24 21:05:01 +0000 (Thu, 24 Oct 2024)
Log Message:
-----------
Start reconciling "colwidth" attribute definition and use.
First, backwards compatible changes get the handling of the
`<colspec>` element's "colwidth" attribute in line with the
definition in the Exchange Table Model.
The "colwidth" attribute is defined in the "Exchange Table Model"
as either a *proportional measure* or a *fixed measure*.
Currently, Docutils supports only unitless values and interpets them
as proportional measures while the Exchange Table Model interprets
unitless values as fixed measures with default unit {U+201C}pt{U+201D}!
The new optional argument "unit_pattern" for `nodes.parse_measure()`
allows configuring what is recognized as a unit.
The new validation function `nodes.validate_proportional()`
for the "colwidth" attribute also accepts conformant values like
"5.2*" (for 5.2 times the "unit proportion").
For backwards compatibility reasons, the attribute is still stored
as numerical value in Python doctree element instances.
New auxiliary method `nodes.colspec.propwith()`:
returns the "colwidth" attribute as number if it is a
proportional measure.
Announce future changes in the RELEASE_NOTES
Add/modify test cases.
Modified Paths:
--------------
trunk/docutils/RELEASE-NOTES.rst
trunk/docutils/docutils/nodes.py
trunk/docutils/docutils/writers/_html_base.py
trunk/docutils/docutils/writers/html4css1/__init__.py
trunk/docutils/docutils/writers/latex2e/__init__.py
trunk/docutils/docutils/writers/odf_odt/__init__.py
trunk/docutils/test/test_nodes.py
trunk/docutils/test/test_parsers/test_docutils_xml/test_parse_element.py
Modified: trunk/docutils/RELEASE-NOTES.rst
===================================================================
--- trunk/docutils/RELEASE-NOTES.rst 2024-10-22 21:41:26 UTC (rev 9966)
+++ trunk/docutils/RELEASE-NOTES.rst 2024-10-24 21:05:01 UTC (rev 9967)
@@ -46,7 +46,7 @@
Document Tree / Docutils DTD
----------------------------
-* Do not lowercase reference names in the `refname attribute`_
+* Do not lowercase reference names in the `"refname" attribute`_
(matching hyperlinks, footnotes, and citations remains case insensitive),
and drop the ``name`` attribute from <reference> nodes
in Docutils 1.0.
@@ -57,8 +57,16 @@
* The <footnote> element's first child (<label>) will become mandatory
in Docutils 1.0.
-.. _refname attribute: docs/ref/doctree.html#refname
+* Values of the `"colwidth" attribute`_ will be stored in Python
+ element instances as `str` (with unit "*" for proportional values)
+ in Docutils 1.0. (The method `nodes.colspec.propwidth()` provides
+ a stable means to extract a proportionional value as number.)
+ The default unit will change to "pt" in Docutils 2.0.
+
+.. _"refname" attribute: docs/ref/doctree.html#refname
+.. _"colwidth" attribute: docs/ref/doctree.html#colwidth
+
Writers
-------
Modified: trunk/docutils/docutils/nodes.py
===================================================================
--- trunk/docutils/docutils/nodes.py 2024-10-22 21:41:26 UTC (rev 9966)
+++ trunk/docutils/docutils/nodes.py 2024-10-24 21:05:01 UTC (rev 9967)
@@ -37,7 +37,6 @@
# import docutils.transforms # -> delayed import in document.__init__()
if TYPE_CHECKING:
- import numbers
from collections.abc import (Callable, Iterable, Iterator,
Mapping, Sequence)
from types import ModuleType
@@ -2414,7 +2413,20 @@
'align', 'char', 'charoff', 'colname', 'colnum',
'colsep', 'colwidth', 'rowsep', 'stub')
+ def propwidth(self) -> int|float:
+ """Return numerical value of "colwidth__" attribute. Default 1.
+ Raise ValueError if "colwidth" is zero, negative, or a *fixed value*.
+
+ Provisional.
+
+ __ https://docutils.sourceforge.io/docs/ref/doctree.html#colwidth
+ """
+ # Move current implementation of validate_colwidth() here
+ # in Docutils 1.0
+ return validate_colwidth(self.get('colwidth', ''))
+
+
class thead(Part, Element):
"""Row(s) that form the head of a `tgroup`."""
valid_attributes: Final = Element.valid_attributes + ('valign',)
@@ -3059,14 +3071,20 @@
return '"%s"' % value
-def parse_measure(measure: str) -> tuple[numbers.Rational, str]:
- """Parse a measure__, return value + optional unit.
+def parse_measure(measure: str, unit_pattern: str = '[a-zA-Zµ]*|%?'
+ ) -> tuple[int|float, str]:
+ """Parse a measure__, return value + unit.
+ `unit_pattern` is a regular expression describing recognized units.
+ The default is suited for (but not limited to) CSS3 units and SI units.
+ It matches runs of ASCII letters or Greek mu, a single percent sign,
+ or no unit.
+
__ https://docutils.sourceforge.io/docs/ref/doctree.html#measure
Provisional.
"""
- match = re.fullmatch('(-?[0-9.]+) *([a-zA-Zµ]*|%?)', measure)
+ match = re.fullmatch(f'(-?[0-9.]+) *({unit_pattern})', measure)
try:
try:
value = int(match.group(1))
@@ -3149,7 +3167,8 @@
See `parse_measure()` for a function returning a "number + unit" tuple.
- The unit may be any run of letters or a percent sign.
+ The unit may be a run of ASCII letters or Greek mu, a single percent sign,
+ or the empty string. Case is preserved.
Provisional.
@@ -3159,6 +3178,31 @@
return f'{value}{unit}'
+def validate_colwidth(measure: str|int|float) -> int|float:
+ """Validate the "colwidth__" attribute.
+
+ Provisional:
+ `measure` must be a `str` and will be returned as normalized `str`
+ (with unit "*" for proportional values) in Docutils 1.0.
+
+ The default unit will change to "pt" in Docutils 2.0.
+
+ __ https://docutils.sourceforge.io/docs/ref/doctree.html#colwidth
+ """
+ if isinstance(measure, (int, float)):
+ value = measure
+ elif measure in ('*', ''): # short for '1*'
+ value = 1
+ else:
+ try:
+ value, unit = parse_measure(measure, unit_pattern='[*]?')
+ except ValueError:
+ value = -1
+ if value <= 0:
+ raise ValueError(f'"{measure}" is no proportional measure.')
+ return value
+
+
def validate_NMTOKEN(value: str) -> str:
"""
Validate a "name token": a `str` of ASCII letters, digits, and [-._].
@@ -3232,7 +3276,7 @@
'colnum': int, # from CALS, currently ignored
'cols': int, # from CALS: "NMTOKEN, […] must be an integer > 0".
'colsep': validate_yesorno,
- 'colwidth': int, # sic! CALS: CDATA (measure or number+'*')
+ 'colwidth': validate_colwidth, # see docstring for pending changes
'content': str, # <meta>
'delimiter': str,
'dir': create_keyword_validator('ltr', 'rtl', 'auto'), # <meta>
Modified: trunk/docutils/docutils/writers/_html_base.py
===================================================================
--- trunk/docutils/docutils/writers/_html_base.py 2024-10-22 21:41:26 UTC (rev 9966)
+++ trunk/docutils/docutils/writers/_html_base.py 2024-10-24 21:05:01 UTC (rev 9967)
@@ -846,9 +846,9 @@
and 'colwidths-given' not in node.parent.parent['classes']):
return
self.body.append(self.starttag(node, 'colgroup'))
- total_width = sum(node['colwidth'] for node in self.colspecs)
+ total_width = sum(node.propwidth() for node in self.colspecs)
for node in self.colspecs:
- colwidth = node['colwidth'] / total_width
+ colwidth = node.propwidth() / total_width
self.body.append(self.emptytag(node, 'col',
style=f'width: {colwidth:.1%}'))
self.body.append('</colgroup>\n')
Modified: trunk/docutils/docutils/writers/html4css1/__init__.py
===================================================================
--- trunk/docutils/docutils/writers/html4css1/__init__.py 2024-10-22 21:41:26 UTC (rev 9966)
+++ trunk/docutils/docutils/writers/html4css1/__init__.py 2024-10-24 21:05:01 UTC (rev 9967)
@@ -231,10 +231,10 @@
or ('colwidths-auto' in self.settings.table_style
and 'colwidths-given' not in node.parent.parent['classes'])):
return
- total_width = sum(node['colwidth'] for node in self.colspecs)
+ total_width = sum(node.propwidth() for node in self.colspecs)
self.body.append(self.starttag(node, 'colgroup'))
for node in self.colspecs:
- colwidth = int(node['colwidth'] * 100.0 / total_width + 0.5)
+ colwidth = node.propwidth() * 100.0 / total_width + 0.5
self.body.append(self.emptytag(node, 'col',
width='%i%%' % colwidth))
self.body.append('</colgroup>\n')
Modified: trunk/docutils/docutils/writers/latex2e/__init__.py
===================================================================
--- trunk/docutils/docutils/writers/latex2e/__init__.py 2024-10-22 21:41:26 UTC (rev 9966)
+++ trunk/docutils/docutils/writers/latex2e/__init__.py 2024-10-24 21:05:01 UTC (rev 9967)
@@ -989,11 +989,11 @@
width = 80 # assumed standard line length
factor = 0.93 # do not make it full linewidth
# first see if we get too wide.
- total_width = sum(node['colwidth']+1 for node in self._col_specs)
+ total_width = sum(node.propwidth()+1 for node in self._col_specs)
if total_width > width:
factor *= width / total_width
- self._colwidths = [(factor * (node['colwidth']+1)/width)
- + 0.005 for node in self._col_specs]
+ self._colwidths = [(factor * (node.propwidth()+1)/width) + 0.005
+ for node in self._col_specs]
latex_colspecs = ['p{%.3f\\DUtablewidth}' % colwidth
for colwidth in self._colwidths]
else:
@@ -1010,7 +1010,7 @@
allowance = 1
else:
allowance = 0 # "widths" option specified, use exact ratio
- self._colwidths = [(node['colwidth']+allowance)/norm_length
+ self._colwidths = [(node.propwidth()+allowance)/norm_length
for node in self._col_specs]
total_width = sum(self._colwidths)
# Limit to 100%, force 100% if table width is specified:
@@ -1067,7 +1067,7 @@
n_c = len(self._col_specs)
a.append('\\endhead\n')
# footer on all but last page (if it fits):
- t_width = sum(node['colwidth']+2 for node in self._col_specs)
+ t_width = sum(node.propwidth()+2 for node in self._col_specs)
if t_width > 30 or (t_width > 12 and not self.colwidths_auto):
a.append(r'\multicolumn{%d}{%s}'
% (n_c, self.get_multicolumn_width(0, n_c))
Modified: trunk/docutils/docutils/writers/odf_odt/__init__.py
===================================================================
--- trunk/docutils/docutils/writers/odf_odt/__init__.py 2024-10-22 21:41:26 UTC (rev 9966)
+++ trunk/docutils/docutils/writers/odf_odt/__init__.py 2024-10-24 21:05:01 UTC (rev 9967)
@@ -3085,7 +3085,7 @@
'%s%%d.%%s' % TABLESTYLEPREFIX,
(self.table_count, chr(self.column_count), )
)
- colwidth = node['colwidth'] / 12.0
+ colwidth = node.propwidth() / 12.0
el1 = SubElement(self.automatic_styles, 'style:style', attrib={
'style:name': colspec_name,
'style:family': 'table-column',
Modified: trunk/docutils/test/test_nodes.py
===================================================================
--- trunk/docutils/test/test_nodes.py 2024-10-22 21:41:26 UTC (rev 9966)
+++ trunk/docutils/test/test_nodes.py 2024-10-24 21:05:01 UTC (rev 9967)
@@ -465,6 +465,31 @@
self.assertEqual(len(parent), 5)
+class ColspecTests(unittest.TestCase):
+
+ def test_propwidth(self):
+ # Return colwidth attribute value if it is a proportional measure.
+ colspec = nodes.colspec()
+ colspec['colwidth'] = '8.2*' # value + '*'
+ self.assertEqual(colspec.propwidth(), 8.2)
+ colspec['colwidth'] = '2' # in Docutils < 2.0, default unit is '*'
+ self.assertEqual(colspec.propwidth(), 2)
+ colspec['colwidth'] = '20%' # percentual values not supported
+ with self.assertRaisesRegex(ValueError, '"20%" is no proportional me'):
+ colspec.propwidth()
+ colspec['colwidth'] = '2em' # fixed values not supported
+ with self.assertRaisesRegex(ValueError, '"2em" is no proportional me'):
+ colspec.propwidth()
+ colspec['colwidth'] = '0*' # value must be positive
+ with self.assertRaisesRegex(ValueError, r'"0\*" is no proportional '):
+ colspec.propwidth()
+ # for backwards compatibility, numerical values are accepted
+ colspec['colwidth'] = 8.2
+ self.assertEqual(colspec.propwidth(), 8.2)
+ colspec['colwidth'] = 2
+ self.assertEqual(colspec.propwidth(), 2)
+
+
class ElementValidationTests(unittest.TestCase):
def test_validate(self):
@@ -1114,22 +1139,33 @@
['a name', 'two\\', r'n\ames'])
def test_parse_measure(self):
- # measure is number + optional unit (letter(s) or percentage)
+ # measure is number + unit
+ # By default, a run of ASCII letters or µ, a single percent sign,
+ # or the empty string are recognized as unit.
self.assertEqual(nodes.parse_measure('8ex'), (8, 'ex'))
self.assertEqual(nodes.parse_measure('2.5'), (2.5, ''))
self.assertEqual(nodes.parse_measure('-2s'), (-2, 's'))
+ # Spaces between number and unit are tolerated, case is preserved:
self.assertEqual(nodes.parse_measure('2 µF'), (2, 'µF'))
self.assertEqual(nodes.parse_measure('10 EUR'), (10, 'EUR'))
self.assertEqual(nodes.parse_measure('.5 %'), (.5, '%'))
- # scientific notation not supported
+ # Only a single percent sign is allowed:
+ with self.assertRaisesRegex(ValueError, '"2%%" is no valid measure'):
+ nodes.parse_measure('2%%')
+ # Scientific notation is not supported:
with self.assertRaisesRegex(ValueError, '"3e-4 mm" is no valid '):
nodes.parse_measure('3e-4 mm')
- # unit must follow the number
+ # Units must follow the number:
with self.assertRaisesRegex(ValueError, '"EUR 23" is no valid '):
nodes.parse_measure('EUR 23')
- # only single percent sign allowed
- with self.assertRaisesRegex(ValueError, '"2%%" is no valid measure'):
- nodes.parse_measure('2%%')
+ # Supported units can be configured with a "unit regexp pattern":
+ self.assertEqual(nodes.parse_measure('10 EUR', 'EUR|€'), (10, 'EUR'))
+ self.assertEqual(nodes.parse_measure('10 €', 'EUR|€'), (10, '€'))
+ with self.assertRaisesRegex(ValueError, '"20 DM" is no valid measure'):
+ nodes.parse_measure('20 DM', 'EUR|€')
+ # Measures without unit are only supported, if the pattern says so:
+ with self.assertRaisesRegex(ValueError, '"20" is no valid measure'):
+ nodes.parse_measure('20', 'EUR|€')
class AttributeTypeTests(unittest.TestCase):
@@ -1171,7 +1207,7 @@
nodes.validate_identifier_list(s2)
def test_validate_measure(self):
- # number (may be decimal fraction) + optional unit
+ # measure == number (int or decimal fraction) + optional unit
# internal whitespace is removed
self.assertEqual(nodes.validate_measure('8 ex'), '8ex')
self.assertEqual(nodes.validate_measure('2'), '2')
@@ -1199,6 +1235,7 @@
def test_validate_NMTOKEN(self):
# str with ASCII-letters, digits, hyphen, underscore, and full-stop.
self.assertEqual(nodes.validate_NMTOKEN('-8x_.'), '-8x_.')
+ # internal space is not allowed
with self.assertRaises(ValueError):
nodes.validate_NMTOKEN('why me')
Modified: trunk/docutils/test/test_parsers/test_docutils_xml/test_parse_element.py
===================================================================
--- trunk/docutils/test/test_parsers/test_docutils_xml/test_parse_element.py 2024-10-22 21:41:26 UTC (rev 9966)
+++ trunk/docutils/test/test_parsers/test_docutils_xml/test_parse_element.py 2024-10-24 21:05:01 UTC (rev 9967)
@@ -141,7 +141,8 @@
node = docutils_xml.parse_element(xml)
self.assertEqual(node.attributes, self.common_attributes | expected)
- def test_auto(self): # CDATA (str) number sequence: '1' or '*'
+ def test_auto(self): # CDATA (str)
+ # also encodes footnote label type: '1': numbered, '*': symbols
xml = '<footnote auto="*" backrefs="footnote-reference-2" />'
expected = {'auto': '*',
'backrefs': ['footnote-reference-2']}
@@ -159,7 +160,15 @@
# 'classes': classnames.type (list[str]) → test_bullet
- def test_colwidth(self): # CDATA (int) sic!
+ def test_colwidth(self): # CDATA (int)
+ # Provisional. Currently, Docutils handles "colwidth" differently
+ # from the Exchange Table Model. This will eventually change
+ # (see https://docutils.sourceforge.io/docs/ref/doctree.html#colwidth).
+ xml = '<colspec colwidth="33*" stub="1" />'
+ expected = {'colwidth': 33, 'stub': 1}
+ node = docutils_xml.parse_element(xml)
+ self.assertEqual(node.attributes, self.common_attributes | expected)
+ # Note: the upstream default unit is "pt", not "*".
xml = '<colspec colwidth="33" stub="1" />'
expected = {'colwidth': 33, 'stub': 1}
node = docutils_xml.parse_element(xml)
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|