|
From: <mi...@us...> - 2024-05-06 12:41:09
|
Revision: 9690
http://sourceforge.net/p/docutils/code/9690
Author: milde
Date: 2024-05-06 12:41:07 +0000 (Mon, 06 May 2024)
Log Message:
-----------
Doctree validation: New configuration setting "validate".
New parser configuration setting "validate".
If True, the parser calls `document.validate()` in `finish_parse()`.
Useful for testing/debugging purposes and with the upcoming
"Docutils XML" parser.
Modified Paths:
--------------
trunk/docutils/docs/user/config.txt
trunk/docutils/docutils/parsers/__init__.py
trunk/docutils/test/data/help/docutils.txt
trunk/docutils/test/data/help/rst2html.txt
trunk/docutils/test/data/help/rst2latex.txt
Modified: trunk/docutils/docs/user/config.txt
===================================================================
--- trunk/docutils/docs/user/config.txt 2024-05-06 08:32:22 UTC (rev 9689)
+++ trunk/docutils/docs/user/config.txt 2024-05-06 12:41:07 UTC (rev 9690)
@@ -635,7 +635,7 @@
strict_visitor
--------------
-When processing a document tree with the Visitor pattern, raise an
+When processing a `document tree`_ with the Visitor pattern, raise an
error if a writer does not support a node type listed as optional.
For transitional development use.
@@ -647,7 +647,7 @@
-------------
List of "classes" attribute values (comma-separated_) that will be
-removed from all elements in the document tree.
+removed from all elements in the `document tree`_.
Values are appended. [#append-values]_
Allows eliding class values that interfere with, e.g, CSS rules from 3rd
@@ -662,7 +662,7 @@
strip_comments
--------------
-Enable or disable the removal of comment elements from the document tree.
+Enable or disable the removal of comment elements from the `document tree`_.
:Default: None (disabled).
:Options: ``--strip-comments``, ``--leave-comments``.
@@ -673,7 +673,7 @@
List of "classes" attribute values (comma-separated_).
Values are appended. [#append-values]_
-Matching elements are removed from the document tree.
+Matching elements are removed from the `document tree`_.
.. WARNING:: Potentially dangerous: may lead to an invalid document tree
and subsequent writer errors. Use with caution.
@@ -723,9 +723,7 @@
.. [#] unless Docutils is run programmatically
using the `Publisher Interface`_
-.. _Publisher Interface: ../api/publisher.html
-
warning_stream
--------------
@@ -776,6 +774,14 @@
*Default*: True. *Options*: ``--raw-enabled``, ``--no-raw``.
+validate
+--------
+
+Validate the parsing result.
+
+*Default*: False. *Options*: ``--validate``, ``--no-validation``.
+
+
[restructuredtext parser]
-------------------------
@@ -2410,9 +2416,13 @@
.. References
+.. _Document Tree: ../ref/doctree.html
+
.. _Docutils Runtime Settings:
.. _runtime settings: ../api/runtime-settings.html
+.. _Publisher Interface: ../api/publisher.html
+
.. RestructuredText Directives
.. _"class" directive: ../ref/rst/directives.html#class
.. _"code": ../ref/rst/directives.html#code
Modified: trunk/docutils/docutils/parsers/__init__.py
===================================================================
--- trunk/docutils/docutils/parsers/__init__.py 2024-05-06 08:32:22 UTC (rev 9689)
+++ trunk/docutils/docutils/parsers/__init__.py 2024-05-06 12:41:07 UTC (rev 9690)
@@ -39,6 +39,13 @@
['--line-length-limit'],
{'metavar': '<length>', 'type': 'int', 'default': 10000,
'validator': frontend.validate_nonnegative_int}),
+ ('Validate the document tree after parsing.',
+ ['--validate'],
+ {'action': 'store_true',
+ 'validator': frontend.validate_boolean}),
+ ('Do not validate the document tree. (default)',
+ ['--no-validation'],
+ {'action': 'store_false', 'dest': 'validate'}),
)
)
component_type = 'parser'
@@ -62,6 +69,8 @@
"""Finalize parse details. Call at end of `self.parse()`."""
self.document.reporter.detach_observer(
self.document.note_parse_message)
+ if self.document.settings.validate:
+ self.document.validate()
_parser_aliases = { # short names for known parsers
Modified: trunk/docutils/test/data/help/docutils.txt
===================================================================
--- trunk/docutils/test/data/help/docutils.txt 2024-05-06 08:32:22 UTC (rev 9689)
+++ trunk/docutils/test/data/help/docutils.txt 2024-05-06 12:41:07 UTC (rev 9690)
@@ -99,6 +99,8 @@
--line-length-limit=<length>
Maximal number of characters in an input line. Default
10 000.
+--validate Validate the document tree after parsing.
+--no-validation Do not validate the document tree. (default)
reStructuredText Parser Options
-------------------------------
Modified: trunk/docutils/test/data/help/rst2html.txt
===================================================================
--- trunk/docutils/test/data/help/rst2html.txt 2024-05-06 08:32:22 UTC (rev 9689)
+++ trunk/docutils/test/data/help/rst2html.txt 2024-05-06 12:41:07 UTC (rev 9690)
@@ -100,6 +100,8 @@
--line-length-limit=<length>
Maximal number of characters in an input line. Default
10 000.
+--validate Validate the document tree after parsing.
+--no-validation Do not validate the document tree. (default)
reStructuredText Parser Options
-------------------------------
Modified: trunk/docutils/test/data/help/rst2latex.txt
===================================================================
--- trunk/docutils/test/data/help/rst2latex.txt 2024-05-06 08:32:22 UTC (rev 9689)
+++ trunk/docutils/test/data/help/rst2latex.txt 2024-05-06 12:41:07 UTC (rev 9690)
@@ -100,6 +100,8 @@
--line-length-limit=<length>
Maximal number of characters in an input line. Default
10 000.
+--validate Validate the document tree after parsing.
+--no-validation Do not validate the document tree. (default)
reStructuredText Parser Options
-------------------------------
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <mi...@us...> - 2024-05-07 11:24:26
|
Revision: 9691
http://sourceforge.net/p/docutils/code/9691
Author: milde
Date: 2024-05-07 11:24:22 +0000 (Tue, 07 May 2024)
Log Message:
-----------
Doctree validation: new functions to validate Element attribute values.
Attribute validate functions:
* convert string representations to correct data type,
* normalize values,
* raise ValueError for invalid attribute names or values.
The `nodes.Element.validate()` function reports a warning
for validity problems if `self.document.reporter` is available
and raises a ValueError if not.
Testing revealed problems with the "recommonmark_wrapper" parser:
* Validating should be done *after* the "clean up" operations.
* One test case uses an invalid class argument (underscore not allowed
by Docutils). As this sample tests an "only Sphinx" feature,
we just drop it from the Docutils test suite.
Modified Paths:
--------------
trunk/docutils/HISTORY.txt
trunk/docutils/docs/ref/doctree.txt
trunk/docutils/docutils/nodes.py
trunk/docutils/docutils/parsers/recommonmark_wrapper.py
trunk/docutils/test/test_nodes.py
trunk/docutils/test/test_parsers/test_recommonmark/test_literal_blocks.py
Modified: trunk/docutils/HISTORY.txt
===================================================================
--- trunk/docutils/HISTORY.txt 2024-05-06 12:41:07 UTC (rev 9690)
+++ trunk/docutils/HISTORY.txt 2024-05-07 11:24:22 UTC (rev 9691)
@@ -27,15 +27,24 @@
- New `SubStructural` element category class.
- Fix element categories.
- New method `Element.validate()` (work in progress).
+ - New "attribute validating functions"
+ convert string representations to correct data type,
+ normalize values,
+ raise ValueError for invalid attribute names or values.
+* docutils/parsers/recommonmark_wrapper.py
+
+ - New method `Parser.finish_parse()` to clean up (before validating).
+
* docutils/transforms/frontmatter.py
- - Adapt `DocInfo` to fixed element categories.
+ - Update `DocInfo` to work with corrected element categories.
* docutils/writers/manpage.py
- Remove code for unused emdash bullets.
+
Release 0.21.2 (2024-04-23)
===========================
Modified: trunk/docutils/docs/ref/doctree.txt
===================================================================
--- trunk/docutils/docs/ref/doctree.txt 2024-05-06 12:41:07 UTC (rev 9690)
+++ trunk/docutils/docs/ref/doctree.txt 2024-05-07 11:24:22 UTC (rev 9691)
@@ -2587,10 +2587,10 @@
:Parents: Only the `\<document>`_ element contains <meta>.
:Children: The <meta> element has no content.
-:Attributes: The <meta> element contains the attributes *name*,
- *content*, *http-equiv*, *lang*, *dir*, *media*, and
- *scheme* that correspond to the respective attributes
- of the `HTML <meta> element`_.
+:Attributes: The <meta> element contains the attributes
+ *content*, *dir*, *http-equiv*, *lang*, *media*, *name*, and
+ *scheme* that correspond to the respective attributes of the
+ `HTML <meta> element`_.
See also the `\<docinfo>`_ element for displayed meta-data.
The document's `title attribute`_ stores the metadata document title.
@@ -4630,7 +4630,7 @@
elements but typically only used on the `root element`_.
.. note:: All ``docutils.nodes.Node`` instances also support an
- **internal** ``source`` attribute that is used when reporting
+ *internal* ``source`` attribute that is used when reporting
processing problems.
Modified: trunk/docutils/docutils/nodes.py
===================================================================
--- trunk/docutils/docutils/nodes.py 2024-05-06 12:41:07 UTC (rev 9690)
+++ trunk/docutils/docutils/nodes.py 2024-05-07 11:24:22 UTC (rev 9691)
@@ -567,6 +567,8 @@
if value is None: # boolean attribute
parts.append('%s="True"' % name)
continue
+ if isinstance(value, bool):
+ value = str(int(value))
if isinstance(value, list):
values = [serial_escape('%s' % (v,)) for v in value]
value = ' '.join(values)
@@ -1093,22 +1095,48 @@
return attr not in cls.common_attributes
def validate_attributes(self):
- # check for undeclared attributes
- # TODO: check attribute values
+ """Normalize and validate element attributes.
+
+ Convert string values to expected datatype.
+ Normalize values.
+
+ Raise `ValueError` for invalid attributes or attribute values.
+
+ Provisional.
+ """
+ messages = []
for key, value in self.attributes.items():
if key.startswith('internal:'):
continue # see docs/user/config.html#expose-internals
if key not in self.valid_attributes:
- raise ValueError(
- f'Element <{self.tagname}> has invalid attribute "{key}".')
+ va = ' '.join(self.valid_attributes)
+ messages.append(f'Attribute "{key}" not one of "{va}".')
+ continue
+ try:
+ self.attributes[key] = ATTRIBUTE_VALIDATORS[key](value)
+ except (ValueError, TypeError, KeyError) as e:
+ messages.append(
+ f'Attribute "{key}" has invalid value "{value}".\n'
+ + e.args[0]) # message argument
+ if messages:
+ raise ValueError('\n'.join(messages))
def validate(self):
- # print(f'validating', self.tagname)
- self.validate_attributes()
+ messages = []
+ try:
+ self.validate_attributes()
+ except ValueError as e:
+ messages.append(e.args[0]) # the message argument
# TODO: check number of children
for child in self.children:
# TODO: check whether child has allowed type
child.validate()
+ if messages:
+ msg = f'Element <{self.tagname}> invalid:\n' + '\n'.join(messages)
+ try:
+ self.document.reporter.warning(msg)
+ except AttributeError:
+ raise ValueError(msg)
# ========
@@ -2443,6 +2471,229 @@
return value.replace('\\', r'\\').replace(' ', r'\ ')
+def split_name_list(s):
+ r"""Split a string at non-escaped whitespace.
+
+ Backslashes escape internal whitespace (cf. `serial_escape()`).
+ Return list of "names" (after removing escaping backslashes).
+
+ >>> split_name_list(r'a\ n\ame two\\ n\\ames'),
+ ['a name', 'two\\', r'n\ames']
+
+ Provisional.
+ """
+ s = s.replace('\\', '\x00') # escape with NULL char
+ s = s.replace('\x00\x00', '\\') # unescape backslashes
+ s = s.replace('\x00 ', '\x00\x00') # escaped spaces -> NULL NULL
+ names = s.split(' ')
+ # restore internal spaces, drop other escaping characters
+ return [name.replace('\x00\x00', ' ').replace('\x00', '')
+ for name in names]
+
+
def pseudo_quoteattr(value):
"""Quote attributes for pseudo-xml"""
return '"%s"' % value
+
+
+# Methods to validate `Element attribute`__ values.
+
+# Ensure the expected Python `data type`__, normalize, and check for
+# restrictions.
+#
+# The methods can be used to convert `str` values (eg. from an XML
+# representation) or to validate an existing document tree or node.
+#
+# Cf. `Element.validate_attributes()`, `docutils.parsers.docutils_xml`,
+# and the `attribute_validating_functions` mapping below.
+#
+# __ https://docutils.sourceforge.io/docs/ref/doctree.html#attribute-reference
+# __ https://docutils.sourceforge.io/docs/ref/doctree.html#attribute-types
+
+def validate_enumerated_type(*keywords):
+ """
+ Return a function that validates a `str` against given `keywords`.
+
+ Provisional.
+ """
+ def validate_keywords(value):
+ if value not in keywords:
+ allowed = '", \"'.join(keywords)
+ raise ValueError(f'"{value}" is not one of "{allowed}".')
+ return value
+ return validate_keywords
+
+
+def validate_identifier(value):
+ """
+ Validate identifier key or class name.
+
+ Used in `idref.type`__ and for the tokens in `validate_identifier_list()`.
+
+ __ https://docutils.sourceforge.io/docs/ref/doctree.html#idref-type
+
+ Provisional.
+ """
+ if value != make_id(value):
+ raise ValueError(f'"{value}" is no valid id or class name.')
+ return value
+
+
+def validate_identifier_list(value):
+ """
+ A (space-separated) list of ids or class names.
+
+ `value` may be a `list` or a `str` with space separated
+ ids or class names (cf. `validate_identifier()`).
+
+ Used in `classnames.type`__, `ids.type`__, and `idrefs.type`__.
+
+ __ https://docutils.sourceforge.io/docs/ref/doctree.html#classnames-type
+ __ https://docutils.sourceforge.io/docs/ref/doctree.html#ids-type
+ __ https://docutils.sourceforge.io/docs/ref/doctree.html#idrefs-type
+
+ Provisional.
+ """
+ if isinstance(value, str):
+ value = value.split()
+ for token in value:
+ validate_identifier(token)
+ return value
+
+
+def validate_measure(value):
+ """
+ Validate a length measure__ (number + recognized unit).
+
+ __ https://docutils.sourceforge.io/docs/ref/doctree.html#measure
+
+ Provisional.
+ """
+ units = 'em|ex|px|in|cm|mm|pt|pc|%'
+ if not re.fullmatch(f'[-0-9.]+ *({units}?)', value):
+ raise ValueError(f'"{value}" is no valid measure. '
+ f'Valid units: {units.replace("|", " ")}.')
+ return value.replace(' ', '').strip()
+
+
+def validate_NMTOKEN(value):
+ """
+ Validate a "name token": a `str` of letters, digits, and [-._].
+
+ Provisional.
+ """
+ if not re.fullmatch('[-._A-Za-z0-9]+', value):
+ raise ValueError(f'"{value}" is no NMTOKEN.')
+ return value
+
+
+def validate_NMTOKENS(value):
+ """
+ Validate a list of "name tokens".
+
+ Provisional.
+ """
+ if isinstance(value, str):
+ value = value.split()
+ for token in value:
+ validate_NMTOKEN(token)
+ return value
+
+
+def validate_refname_list(value):
+ """
+ Validate a list of `reference names`__.
+
+ Reference names may contain all characters;
+ whitespace is normalized (cf, `whitespace_normalize_name()`).
+
+ `value` may be either a `list` of names or a `str` with
+ space separated names (with internal spaces backslash escaped
+ and literal backslashes doubled cf. `serial_escape()`).
+
+ Return a list of whitespace-normalized, unescaped reference names.
+
+ Provisional.
+
+ __ https://docutils.sourceforge.io/docs/ref/doctree.html#reference-name
+ """
+ if isinstance(value, str):
+ value = split_name_list(value)
+ return [whitespace_normalize_name(name) for name in value]
+
+
+def validate_yesorno(value):
+ if value == "0":
+ return False
+ return bool(value)
+
+
+ATTRIBUTE_VALIDATORS = {
+ 'alt': str, # CDATA
+ 'align': str,
+ 'anonymous': validate_yesorno,
+ 'auto': str, # CDATA (only '1' or '*' are used in rST)
+ 'backrefs': validate_identifier_list,
+ 'bullet': str, # CDATA (only '-', '+', or '*' are used in rST)
+ 'classes': validate_identifier_list,
+ 'char': str, # from Exchange Table Model (CALS), currently ignored
+ 'charoff': validate_NMTOKEN, # from CALS, currently ignored
+ 'colname': validate_NMTOKEN, # from CALS, currently ignored
+ 'colnum': int, # from CALS, currently ignored
+ 'cols': int, # from CALS: "NMTOKEN, […] must be an integer > 0".
+ 'colsep': validate_yesorno,
+ 'colwidth': int, # sic! CALS: CDATA (measure or number+'*')
+ 'content': str, # <meta>
+ 'delimiter': str,
+ 'depth': int,
+ 'dir': validate_enumerated_type('ltr', 'rtl', 'auto'), # <meta>
+ 'dupnames': validate_refname_list,
+ 'enumtype': validate_enumerated_type('arabic', 'loweralpha', 'lowerroman',
+ 'upperalpha', 'upperroman'),
+ 'format': str, # CDATA (space separated format names)
+ 'frame': validate_enumerated_type('top', 'bottom', 'topbot', 'all',
+ 'sides', 'none'), # from CALS, ignored
+ 'height': validate_measure,
+ 'http-equiv': str, # <meta>
+ 'ids': validate_identifier_list,
+ 'lang': str, # <meta>
+ 'level': int,
+ 'line': int,
+ 'local': validate_yesorno,
+ 'ltrim': validate_yesorno,
+ 'loading': validate_enumerated_type('embed', 'link', 'lazy'),
+ 'media': str, # <meta>
+ 'morecols': int,
+ 'morerows': int,
+ 'name': whitespace_normalize_name, # in <reference> (deprecated)
+ # 'name': node_attributes.validate_NMTOKEN, # in <meta>
+ 'names': validate_refname_list,
+ 'namest': validate_NMTOKEN, # start of span, from CALS, currently ignored
+ 'nameend': validate_NMTOKEN, # end of span, from CALS, currently ignored
+ 'pgwide': validate_yesorno, # from CALS, currently ignored
+ 'prefix': str,
+ 'refid': validate_identifier,
+ 'refname': whitespace_normalize_name,
+ 'refuri': str,
+ 'rowsep': validate_yesorno,
+ 'rtrim': validate_yesorno,
+ 'scale': int,
+ 'scheme': str,
+ 'source': str,
+ 'start': int,
+ 'stub': validate_yesorno,
+ 'suffix': str,
+ 'title': str,
+ 'type': validate_NMTOKEN,
+ 'uri': str,
+ 'valign': validate_enumerated_type('top', 'middle', 'bottom'), # from CALS
+ 'width': validate_measure,
+ 'xml:space': validate_enumerated_type('default', 'preserve'),
+ }
+"""
+Mapping of `attribute names`__ to validating functions.
+
+Provisional.
+
+__ https://docutils.sourceforge.io/docs/ref/doctree.html#attribute-reference
+"""
Modified: trunk/docutils/docutils/parsers/recommonmark_wrapper.py
===================================================================
--- trunk/docutils/docutils/parsers/recommonmark_wrapper.py 2024-05-06 12:41:07 UTC (rev 9690)
+++ trunk/docutils/docutils/parsers/recommonmark_wrapper.py 2024-05-07 11:24:22 UTC (rev 9691)
@@ -75,7 +75,9 @@
return Component.get_transforms(self) # + [AutoStructify]
def parse(self, inputstring, document):
- """Use the upstream parser and clean up afterwards.
+ """Wrapper of upstream method.
+
+ Ensure "line-length-limt". Report errors with `document.reporter`.
"""
# check for exorbitantly long lines
for i, line in enumerate(inputstring.split('\n')):
@@ -95,9 +97,14 @@
'returned the error:\n%s'%err)
document.append(error)
- # Post-Processing
- # ---------------
+ # Post-Processing
+ # ---------------
+ def finish_parse(self):
+ """Finalize parse details. Call at end of `self.parse()`."""
+
+ document = self.document
+
# merge adjoining Text nodes:
for node in document.findall(nodes.TextElement):
children = node.children
@@ -142,6 +149,8 @@
reference['name'] = nodes.fully_normalize_name(
reference.astext())
node.parent.replace(node, reference)
+ # now we are ready to call the upstream function:
+ super().finish_parse()
def visit_document(self, node):
"""Dummy function to prevent spurious warnings.
Modified: trunk/docutils/test/test_nodes.py
===================================================================
--- trunk/docutils/test/test_nodes.py 2024-05-06 12:41:07 UTC (rev 9690)
+++ trunk/docutils/test/test_nodes.py 2024-05-07 11:24:22 UTC (rev 9691)
@@ -474,14 +474,33 @@
node.append(nodes.emphasis('', 'emphasised text', ids='emphtext'))
node.validate()
+ def test_validate_attributes(self):
+ # Convert to expected data-type, normalize values,
+ # cf. AttributeTypeTests below for attribute validating function tests.
+ node = nodes.image(classes='my test-classes',
+ names='My teST\n\\ \xA0classes',
+ width='30 mm')
+ node.validate_attributes()
+ self.assertEqual(node['classes'], ['my', 'test-classes'])
+ self.assertEqual(node['names'], ['My', 'teST classes'])
+ self.assertEqual(node['width'], '30mm')
+
def test_validate_wrong_attribute(self):
node = nodes.paragraph('', 'text', id='test-paragraph')
with self.assertRaisesRegex(ValueError,
- 'Element <paragraph> '
- 'has invalid attribute "id".'):
+ 'Element <paragraph> invalid:\n'
+ 'Attribute "id" not one of "ids '):
node.validate()
+ def test_validate_wrong_attribute_value(self):
+ node = nodes.image(uri='test.png', width='20 inch') # invalid unit
+ with self.assertRaisesRegex(ValueError,
+ 'Element <image> invalid:\n'
+ '.*"width" has invalid value "20 inch".\n'
+ '.*Valid units: em ex '):
+ node.validate()
+
class MiscTests(unittest.TestCase):
def test_node_class_names(self):
@@ -807,6 +826,102 @@
result = nodes.fully_normalize_name(sample)
self.assertEqual(result, fully)
+ def test_split_name_list(self):
+ self.assertEqual(nodes.split_name_list(r'a\ n\ame two\\ n\\ames'),
+ ['a name', 'two\\', r'n\ames'])
+
+class AttributeTypeTests(unittest.TestCase):
+
+ def test_validate_enumerated_type(self):
+ # function factory for "choice validators"
+ food = nodes.validate_enumerated_type('ham', 'spam')
+ self.assertEqual(food('ham'), 'ham')
+ with self.assertRaisesRegex(ValueError,
+ '"bacon" is not one of "ham", "spam".'):
+ food('bacon')
+
+ def test_validate_identifier(self):
+ # Identifiers must start with an ASCII letter and may contain
+ # letters, digits and the hyphen
+ # https://docutils.sourceforge.io/docs/ref/doctree.html#idref-type
+ self.assertEqual(nodes.validate_identifier('mo-8b'), 'mo-8b')
+ with self.assertRaisesRegex(ValueError, '"8b-mo" is no valid id'):
+ nodes.validate_identifier('8b-mo')
+
+ def test_validate_identifier_list(self):
+ # list of identifiers (cf. above)
+ # or a `str` of space-separated identifiers.
+ l1 = ['m8-b', 'm8-c']
+ s1 = 'm8-b m8-c'
+ self.assertEqual(nodes.validate_identifier_list(l1), l1)
+ self.assertEqual(nodes.validate_identifier_list(s1), l1)
+ l2 = ['m8-b', 'm8_c']
+ s2 = 'm8-b #8c'
+ with self.assertRaises(ValueError):
+ nodes.validate_identifier_list(l2)
+ with self.assertRaises(ValueError):
+ nodes.validate_identifier_list(s2)
+
+ def test_validate_measure(self):
+ # number (may be decimal fraction) + optional CSS2 length unit
+ self.assertEqual(nodes.validate_measure('8ex'), '8ex')
+ self.assertEqual(nodes.validate_measure('3.5 %'), '3.5%')
+ self.assertEqual(nodes.validate_measure('2'), '2')
+ with self.assertRaisesRegex(ValueError, '"2km" is no valid measure. '
+ 'Valid units: em ex '):
+ nodes.validate_measure('2km')
+ # negative numbers are currently not supported
+ # TODO: allow? the spec doesnot mention negative numbers.
+ # but a negative width or height of an image is odd.
+ # nodes.validate_measure('-2')
+
+ def test_validate_NMTOKEN(self):
+ # str with ASCII-letters, digits, hyphen, underscore, and full-stop.
+ self.assertEqual(nodes.validate_NMTOKEN('-8x_.'), '-8x_.')
+ with self.assertRaises(ValueError):
+ nodes.validate_NMTOKEN('why me')
+
+ def test_validate_NMTOKENS(self):
+ # list of NMTOKENS or string with space-separated NMTOKENS
+ l1 = ['8_b', '8.c']
+ s1 = '8_b 8.c'
+ l2 = ['8_b', '8/c']
+ s2 = '8_b #8'
+ self.assertEqual(nodes.validate_NMTOKENS(l1), l1)
+ self.assertEqual(nodes.validate_NMTOKENS(s1), l1)
+ with self.assertRaises(ValueError):
+ nodes.validate_NMTOKENS(l2)
+ with self.assertRaises(ValueError):
+ nodes.validate_NMTOKENS(s2)
+
+ def test_validate_refname_list(self):
+ # list or string of "reference names".
+ l1 = ['*:@', r'"more"\ & \x!']
+ s1 = r'*:@ \"more"\\\ &\ \\x!' # unescaped backslash is ignored
+ self.assertEqual(nodes.validate_refname_list(l1), l1)
+ self.assertEqual(nodes.validate_refname_list(s1), l1)
+ # whitspace is normalized, case is not normalized
+ l2 = ['LARGE', 'a\t \tc']
+ s2 = r'LARGE a\ \ \c'
+ normalized = ['LARGE', 'a c']
+
+ self.assertEqual(nodes.validate_refname_list(l2), normalized)
+ self.assertEqual(nodes.validate_refname_list(s2), normalized)
+
+ def test_validate_yesorno(self):
+ # False if '0', else bool
+ # TODO: The docs say '0' is false:
+ # * Also return `True` for values that evaluate to `False`?
+ # Even for `False` and `None`?
+ # * Also return `False` for 'false', 'off', 'no'
+ # like boolean config settings?
+ self.assertFalse(nodes.validate_yesorno('0'))
+ self.assertFalse(nodes.validate_yesorno(0))
+ self.assertTrue(nodes.validate_yesorno('*'))
+ self.assertTrue(nodes.validate_yesorno(1))
+ # self.assertFalse(nodes.validate_yesorno('no'))
+
+
if __name__ == '__main__':
unittest.main()
Modified: trunk/docutils/test/test_parsers/test_recommonmark/test_literal_blocks.py
===================================================================
--- trunk/docutils/test/test_parsers/test_recommonmark/test_literal_blocks.py 2024-05-06 12:41:07 UTC (rev 9690)
+++ trunk/docutils/test/test_parsers/test_recommonmark/test_literal_blocks.py 2024-05-07 11:24:22 UTC (rev 9691)
@@ -204,20 +204,6 @@
A literal block (fenced code block)
with *info string*.
"""],
-["""\
-~~~eval_rst
-Evaluating embedded rST blocks requires the AutoStructify component
-in recommonmark. Otherwise this is just a code block
-with class ``eval_rst``.
-~~~
-""",
-"""\
-<document source="test data">
- <literal_block classes="code eval_rst" xml:space="preserve">
- Evaluating embedded rST blocks requires the AutoStructify component
- in recommonmark. Otherwise this is just a code block
- with class ``eval_rst``.
-"""],
]
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <mi...@us...> - 2024-05-08 07:10:59
|
Revision: 9694
http://sourceforge.net/p/docutils/code/9694
Author: milde
Date: 2024-05-08 07:10:56 +0000 (Wed, 08 May 2024)
Log Message:
-----------
Doctree validation: declare valid element children (part 2).
Declare valid children and valid number of children for all
document tree elements.
Requires some re-ordering (define sub-elements first so that
the class can be used in the content declaration of the parent).
New element category class `nodes.PureTextElement` for nodes that do
not accept Inline elements, only text (was a TODO comment).
Add declarations for valid attributes of table elements defined
in the Exchange Table Model but not used/supported by Docutils.
Modified Paths:
--------------
trunk/docutils/HISTORY.txt
trunk/docutils/docutils/nodes.py
Modified: trunk/docutils/HISTORY.txt
===================================================================
--- trunk/docutils/HISTORY.txt 2024-05-08 07:10:49 UTC (rev 9693)
+++ trunk/docutils/HISTORY.txt 2024-05-08 07:10:56 UTC (rev 9694)
@@ -24,7 +24,7 @@
* docutils/nodes.py
- - New `SubStructural` element category class.
+ - New element category classes `SubStructural` and `PureTextElement`.
- Fix element categories.
- New method `Element.validate()` (work in progress).
- New "attribute validating functions"
Modified: trunk/docutils/docutils/nodes.py
===================================================================
--- trunk/docutils/docutils/nodes.py 2024-05-08 07:10:49 UTC (rev 9693)
+++ trunk/docutils/docutils/nodes.py 2024-05-08 07:10:56 UTC (rev 9694)
@@ -1222,6 +1222,7 @@
class Admonition(Body):
"""Admonitions (distinctive and self-contained notices)."""
+ valid_children = Body # (%body.elements;)
class Sequential(Body):
@@ -1248,6 +1249,7 @@
Children of `decoration`.
"""
+ valid_children = Body # (%body.elements;)
class Inline:
@@ -1326,11 +1328,9 @@
self.attributes['xml:space'] = 'preserve'
-# TODO: PureTextElement(TextElement):
-# """An element which only contains text, no children."""
-# For elements in the DTD that directly employ #PCDATA in their definition:
-# citation_reference, comment, footnote_reference, label, math, math_block,
-# option_argument, option_string, raw,
+class PureTextElement(TextElement):
+ """An element which only contains text, no children."""
+ valid_children = Text # (#PCDATA)
# ==============
@@ -1345,6 +1345,13 @@
`docutils.utils.new_document()` instead.
"""
valid_attributes = Element.valid_attributes + ('title',)
+ # content model: ( (title, subtitle?)?,
+ # meta*,
+ # decoration?,
+ # (docinfo, transition?)?,
+ # %structure.model; )
+ valid_children = (Structural, SubRoot, Body)
+ valid_len = (0, None) # may be empty
def __init__(self, settings, reporter, *args, **kwargs):
Element.__init__(self, *args, **kwargs)
@@ -1748,8 +1755,11 @@
# Decorative Elements
# =====================
+
class decoration(PreBibliographic, SubRoot, Element):
- """Container for header and footer."""
+ """Container for `header` and `footer`."""
+ valid_children = Decorative # (header?, footer?)
+ valid_len = (0, 2) # TODO: empty element does not make sense.
def get_header(self):
if not len(self.children) or not isinstance(self.children[0], header):
@@ -1772,6 +1782,8 @@
class section(Structural, Element):
"""Document section. The main unit of hierarchy."""
+ # content model: (title, subtitle?, %structure.model;)
+ valid_children = (Structural, SubStructural, Body)
class topic(Structural, Element):
@@ -1782,9 +1794,9 @@
and it doesn't have to conform to section placement rules.
Topics are allowed wherever body elements (list, table, etc.) are allowed,
- but only at the top level of a section or document. Topics cannot nest
- inside topics, sidebars, or body elements; you can't have a topic inside a
- table, list, block quote, etc.
+ but only at the top level of a sideber, section or document.
+ Topics cannot nest inside topics, or body elements; you can't have
+ a topic inside a table, list, block quote, etc.
"""
# "depth" and "local" attributes may be added by the "Contents" transform:
valid_attributes = Element.valid_attributes + ('depth', 'local')
@@ -1823,40 +1835,96 @@
# ===============
class paragraph(General, TextElement): pass
-class compound(General, Element): pass
-class container(General, Element): pass
+class compound(General, Element):
+ valid_children = Body # (%body.elements;)+
+
+
+class container(General, Element):
+ valid_children = Body # (%body.elements;)+
+
+
+class attribution(Part, TextElement):
+ """Visible reference to the source of a `block_quote`."""
+
+
+class block_quote(General, Element):
+ """An extended quotation, set off from the main text."""
+ valid_children = (Body, attribution) # ((%body.elements;)+, attribution?)
+
+
+# Lists
+# =====
+#
+# Lists (Sequential) and related Body Subelements (Part)
+
+class list_item(Part, Element):
+ valid_children = Body # (%body.elements;)*
+ valid_len = (0, None)
+
+
class bullet_list(Sequential, Element):
valid_attributes = Element.valid_attributes + ('bullet',)
+ valid_children = list_item # (list_item+)
class enumerated_list(Sequential, Element):
valid_attributes = Element.valid_attributes + (
'enumtype', 'prefix', 'suffix', 'start')
+ valid_children = list_item # (list_item+)
-class list_item(Part, Element): pass
-class definition_list(Sequential, Element): pass
-class definition_list_item(Part, Element): pass
class term(Part, TextElement): pass
class classifier(Part, TextElement): pass
-class definition(Part, Element): pass
-class field_list(Sequential, Element): pass
-class field(Part, Bibliographic, Element): pass
+
+
+class definition(Part, Element):
+ """Definition of a `term` in a `definition_list`."""
+ valid_children = Body # (%body.elements;)+
+
+
+class definition_list_item(Part, Element):
+ valid_children = (term, classifier, definition)
+ valid_len = (2, None) # (term, classifier*, definition)
+
+
+class definition_list(Sequential, Element):
+ """List of terms and their definitions.
+
+ Can be used for glossaries or dictionaries, to describe or
+ classify things, for dialogues, or to itemize subtopics.
+ """
+ valid_children = definition_list_item # (definition_list_item+)
+
+
class field_name(Part, TextElement): pass
-class field_body(Part, Element): pass
-class option(Part, Element):
- """Option element in an `option_list_item`.
+class field_body(Part, Element):
+ valid_children = Body # (%body.elements;)*
+ valid_len = (0, None)
- Groups an option string with zero or more option argument placeholders.
+
+class field(Part, Bibliographic, Element):
+ valid_children = (field_name, field_body) # (field_name, field_body)
+ valid_len = (2, 2)
+
+
+class field_list(Sequential, Element):
+ """List of label & data pairs.
+
+ Typically rendered as a two-column list.
+ Also used for extension syntax or special processing.
"""
- child_text_separator = ''
+ valid_children = field # (field+)
-class option_argument(Part, TextElement):
+class option_string(Part, PureTextElement):
+ """A literal command-line option. Typically monospaced."""
+
+
+class option_argument(Part, PureTextElement):
"""Placeholder text for option arguments."""
valid_attributes = Element.valid_attributes + ('delimiter',)
@@ -1864,13 +1932,25 @@
return self.get('delimiter', ' ') + TextElement.astext(self)
+class option(Part, Element):
+ """Option element in an `option_list_item`.
+
+ Groups an option string with zero or more option argument placeholders.
+ """
+ child_text_separator = ''
+ # content model: (option_string, option_argument*)
+ valid_children = (option_string, option_argument)
+
+
class option_group(Part, Element):
"""Groups together one or more `option` elements, all synonyms."""
child_text_separator = ', '
+ valid_children = option # (option+)
-class option_list(Sequential, Element):
- """Two-column list of command-line options and descriptions."""
+class description(Part, Element):
+ """Describtion of a command-line option."""
+ valid_children = Body # (%body.elements;)+
class option_list_item(Part, Element):
@@ -1877,23 +1957,44 @@
"""Container for a pair of `option_group` and `description` elements.
"""
child_text_separator = ' '
+ valid_children = (option_group, description) # (option_group, description)
+ valid_len = (2, 2)
-class option_string(Part, TextElement): pass
-class description(Part, Element): pass
+class option_list(Sequential, Element):
+ """Two-column list of command-line options and descriptions."""
+ valid_children = option_list_item # (option_list_item+)
+
+
+# Pre-formatted text blocks
+# =========================
+
class literal_block(General, FixedTextElement): pass
class doctest_block(General, FixedTextElement): pass
-class math_block(General, FixedTextElement): pass
-class line_block(General, Element): pass
+class math_block(General, FixedTextElement, PureTextElement):
+ """Mathematical notation (display formula)."""
+
+
class line(Part, TextElement):
"""Single line of text in a `line_block`."""
indent = None
-class block_quote(General, Element): pass
-class attribution(Part, TextElement): pass
+class line_block(General, Element):
+ """Sequence of lines and nested line blocks.
+ """
+ # recursive content model: (line | line_block)+
+
+
+line_block.valid_children = (line, line_block)
+
+
+# Admonitions
+# ===========
+# distinctive and self-contained notices
+
class attention(Admonition, Element): pass
class caution(Admonition, Element): pass
class danger(Admonition, Element): pass
@@ -1903,10 +2004,20 @@
class tip(Admonition, Element): pass
class hint(Admonition, Element): pass
class warning(Admonition, Element): pass
-class admonition(Admonition, Element): pass
-class comment(Invisible, FixedTextElement): pass
+class admonition(Admonition, Element):
+ valid_children = (title, Body) # (title, (%body.elements;)+)
+ valid_len = (2, None)
+
+
+# Invisible elements
+# ==================
+
+class comment(Invisible, FixedTextElement, PureTextElement):
+ """Author notes, hidden from the output."""
+
+
class substitution_definition(Invisible, TextElement):
valid_attributes = Element.valid_attributes + ('ltrim', 'rtrim')
@@ -1916,47 +2027,114 @@
'anonymous', 'refid', 'refname', 'refuri')
+# Footnote and citation
+# =====================
+
+class label(Part, PureTextElement):
+ """Visible identifier for footnotes and citations."""
+
+
class footnote(General, BackLinkable, Element, Labeled, Targetable):
+ """Labelled note providing additional context (footnote or endnote)."""
valid_attributes = Element.valid_attributes + ('auto', 'backrefs')
+ valid_children = (label, Body) # (label?, (%body.elements;)+)
-class citation(General, BackLinkable, Element, Labeled, Targetable): pass
-class label(Part, TextElement): pass
+class citation(General, BackLinkable, Element, Labeled, Targetable):
+ valid_children = (label, Body) # (label, (%body.elements;)+)
+ valid_len = (2, None)
-class figure(General, Element):
- valid_attributes = Element.valid_attributes + ('align', 'width')
+# Graphical elements
+# ==================
+class image(General, Inline, Element):
+ """Reference to an image resource.
+ May be body element or inline element.
+ """
+ valid_attributes = Element.valid_attributes + (
+ 'uri', 'alt', 'align', 'height', 'width', 'scale', 'loading')
+ valid_len = (0, 0) # emtpy element
+
+ def astext(self):
+ return self.get('alt', '')
+
+
class caption(Part, TextElement): pass
-class legend(Part, Element): pass
-class table(General, Element):
- valid_attributes = Element.valid_attributes + (
- 'align', 'colsep', 'frame', 'pgwide', 'rowsep', 'width')
+class legend(Part, Element):
+ """A wrapper for text accompanying a `figure` that is not the caption."""
+ valid_children = Body # (%body.elements;)
-class tgroup(Part, Element):
+class figure(General, Element):
+ """A formal figure, generally an illustration, with a title."""
+ valid_attributes = Element.valid_attributes + ('align', 'width')
+ # content model: (image, ((caption, legend?) | legend))
+ valid_children = (image, caption, legend)
+ valid_len = (1, 3)
+ # TODO: According to the DTD, a caption or legend is required
+ # but rST allows "bare" figures which are formatted differently from
+ # images (floating in LaTeX, nested in a <figure> in HTML).
+
+
+# Tables
+# ======
+
+class entry(Part, Element):
+ """An entry in a `row` (a table cell)."""
valid_attributes = Element.valid_attributes + (
- 'align', 'cols', 'colsep', 'rowsep')
+ 'align', 'char', 'charoff', 'colname', 'colsep', 'morecols',
+ 'morerows', 'namest', 'nameend', 'rowsep', 'valign')
+ valid_children = Body # %tbl.entry.mdl -> (%body.elements;)*
+ valid_len = (0, None) # may be empty
+class row(Part, Element):
+ """Row of table cells."""
+ valid_attributes = Element.valid_attributes + ('rowsep', 'valign')
+ valid_children = entry # (%tbl.row.mdl;) -> entry+
+
+
class colspec(Part, Element):
+ """Specifications for a column in a `tgroup`."""
valid_attributes = Element.valid_attributes + (
'align', 'char', 'charoff', 'colname', 'colnum',
'colsep', 'colwidth', 'rowsep', 'stub')
+ valid_len = (0, 0) # empty element
-class thead(Part, Element): pass
-class tbody(Part, Element): pass
-class row(Part, Element): pass
+class thead(Part, Element):
+ """Row(s) that form the head of a `tgroup`."""
+ valid_attributes = Element.valid_attributes + ('valign',)
+ valid_children = row # (row+)
-class entry(Part, Element):
- valid_attributes = Element.valid_attributes + ('morecols', 'morerows')
+class tbody(Part, Element):
+ """Body of a `tgroup`."""
+ valid_attributes = Element.valid_attributes + ('valign',)
+ valid_children = row # (row+)
+class tgroup(Part, Element):
+ """A portion of a table. Most tables have just one `tgroup`."""
+ valid_attributes = Element.valid_attributes + (
+ 'align', 'cols', 'colsep', 'rowsep')
+ valid_children = (colspec, thead, tbody) # (colspec*, thead?, tbody)
+
+
+class table(General, Element):
+ """A data arrangement with rows and columns."""
+ valid_attributes = Element.valid_attributes + (
+ 'align', 'colsep', 'frame', 'pgwide', 'rowsep', 'width')
+ valid_children = (title, tgroup) # (title?, tgroup+)
+
+
+# Special purpose elements
+# ========================
+
class system_message(Special, BackLinkable, PreBibliographic, Element):
"""
System message element.
@@ -2059,7 +2237,8 @@
return obj
-class raw(Special, Inline, PreBibliographic, FixedTextElement):
+class raw(Special, Inline, PreBibliographic,
+ FixedTextElement, PureTextElement):
"""Raw data that is to be passed untouched to the Writer.
"""
valid_attributes = Element.valid_attributes + ('format', 'xml:space')
@@ -2079,11 +2258,11 @@
'anonymous', 'name', 'refid', 'refname', 'refuri')
-class footnote_reference(Inline, Referential, TextElement):
+class footnote_reference(Inline, Referential, PureTextElement):
valid_attributes = Element.valid_attributes + ('auto', 'refid', 'refname')
-class citation_reference(Inline, Referential, TextElement):
+class citation_reference(Inline, Referential, PureTextElement):
valid_attributes = Element.valid_attributes + ('refid', 'refname')
@@ -2096,19 +2275,12 @@
class acronym(Inline, TextElement): pass
class superscript(Inline, TextElement): pass
class subscript(Inline, TextElement): pass
-class math(Inline, TextElement): pass
-class image(General, Inline, Element):
- """Reference to an image resource."""
+class math(Inline, PureTextElement):
+ """Mathematical notation in running text."""
- valid_attributes = Element.valid_attributes + (
- 'uri', 'alt', 'align', 'height', 'width', 'scale', 'loading')
- def astext(self):
- return self.get('alt', '')
-
-
class inline(Inline, TextElement): pass
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <mi...@us...> - 2024-05-08 07:11:08
|
Revision: 9695
http://sourceforge.net/p/docutils/code/9695
Author: milde
Date: 2024-05-08 07:11:05 +0000 (Wed, 08 May 2024)
Log Message:
-----------
Doctree validation: Validate number and types of element children.
The "manpage" writer tests include a sample with invalid doctree
(`<citation>` without content, generated from an "rST" source without warning)
to ensure this case is handled gracefully -> ensure "validate" setting is off.
Modified Paths:
--------------
trunk/docutils/docutils/nodes.py
trunk/docutils/test/test_writers/test_manpage.py
Modified: trunk/docutils/docutils/nodes.py
===================================================================
--- trunk/docutils/docutils/nodes.py 2024-05-08 07:10:56 UTC (rev 9694)
+++ trunk/docutils/docutils/nodes.py 2024-05-08 07:11:05 UTC (rev 9695)
@@ -1146,8 +1146,16 @@
except ValueError as e:
messages.append(e.args[0]) # the message argument
# TODO: check number of children
+ n_min, n_max = self.valid_len
+ if len(self.children) < n_min:
+ messages.append(f'Expects at least {n_min} children, '
+ f'not {len(self.children)}.')
+ if n_max is not None and len(self.children) > n_max:
+ messages.append(f'Expects at most {n_max} children, '
+ f'not {len(self.children)}.')
for child in self.children:
- # TODO: check whether child has allowed type
+ if not isinstance(child, self.valid_children):
+ messages.append(f'May not contain "{child.tagname}" elements.')
child.validate()
if messages:
msg = f'Element <{self.tagname}> invalid:\n' + '\n'.join(messages)
Modified: trunk/docutils/test/test_writers/test_manpage.py
===================================================================
--- trunk/docutils/test/test_writers/test_manpage.py 2024-05-08 07:10:56 UTC (rev 9694)
+++ trunk/docutils/test/test_writers/test_manpage.py 2024-05-08 07:11:05 UTC (rev 9695)
@@ -34,6 +34,7 @@
settings_overrides={
'_disable_config': True,
'strict_visitor': True,
+ 'validate': False, # allow testing invalid doctree
}).decode()
self.assertEqual(case_expected, output)
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <mi...@us...> - 2024-05-08 10:32:44
|
Revision: 9696
http://sourceforge.net/p/docutils/code/9696
Author: milde
Date: 2024-05-08 10:32:41 +0000 (Wed, 08 May 2024)
Log Message:
-----------
Fix recommonmark parser support. Announce removal in Docutils 1.0.
Recommonmark is unmaintained since 2021 and deprecated in favour
of the `MyST parser`_.
Modified Paths:
--------------
trunk/docutils/RELEASE-NOTES.txt
trunk/docutils/docs/user/config.txt
trunk/docutils/docutils/parsers/recommonmark_wrapper.py
Modified: trunk/docutils/RELEASE-NOTES.txt
===================================================================
--- trunk/docutils/RELEASE-NOTES.txt 2024-05-08 07:11:05 UTC (rev 9695)
+++ trunk/docutils/RELEASE-NOTES.txt 2024-05-08 10:32:41 UTC (rev 9696)
@@ -151,6 +151,13 @@
* Remove `parsers.rst.directives.CSVTable.HeaderDialect`
in Docutils 0.22.
+* Remove support for the `recommonmark parser`_ in Docutils 1.0.
+ Recommonmark is unmaintained since 2021 and deprecated in favour
+ of the `MyST parser`_.
+
+ .. _recommonmark parser: docs/user/config.html#recommonmark-parser
+ .. _MyST parser: docs/user/config.html#myst-parser
+
* Remove the "rawsource" argument from `nodes.Text.__init__()`
in Docutils 2.0.
Modified: trunk/docutils/docs/user/config.txt
===================================================================
--- trunk/docutils/docs/user/config.txt 2024-05-08 07:11:05 UTC (rev 9695)
+++ trunk/docutils/docs/user/config.txt 2024-05-08 10:32:41 UTC (rev 9696)
@@ -967,9 +967,10 @@
[recommonmark parser]
---------------------
-.. admonition:: Provisional
+.. admonition:: Deprecated
Depends on deprecated 3rd-party package recommonmark__.
+ Support will be removed in Docutils 1.0.
Currently no configuration settings.
Modified: trunk/docutils/docutils/parsers/recommonmark_wrapper.py
===================================================================
--- trunk/docutils/docutils/parsers/recommonmark_wrapper.py 2024-05-08 07:11:05 UTC (rev 9695)
+++ trunk/docutils/docutils/parsers/recommonmark_wrapper.py 2024-05-08 10:32:41 UTC (rev 9696)
@@ -16,10 +16,10 @@
__ https://pypi.org/project/recommonmark/
-.. important:: This module is provisional
+.. important:: This module is deprecated.
* The "recommonmark" package is unmaintained and deprecated.
- This wrapper module will be removed in a future Docutils version.
+ This wrapper module will be removed in Docutils 1.0.
* The API is not settled and may change with any minor Docutils version.
"""
@@ -117,6 +117,11 @@
else:
i += 1
+ # remove empty Text nodes:
+ for node in document.findall(nodes.Text):
+ if not len(node):
+ node.parent.remove(node)
+
# add "code" class argument to literal elements (inline and block)
for node in document.findall(is_literal):
if 'code' not in node['classes']:
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <mi...@us...> - 2024-05-14 12:28:21
|
Revision: 9702
http://sourceforge.net/p/docutils/code/9702
Author: milde
Date: 2024-05-14 12:28:18 +0000 (Tue, 14 May 2024)
Log Message:
-----------
Fix for `misc.Transitions`.
Report an error if a <transition> element follows a <meta> or
<decoration> element as this is invalid according to ``docutils.dtd``.
Modified Paths:
--------------
trunk/docutils/HISTORY.txt
trunk/docutils/docutils/transforms/misc.py
trunk/docutils/test/test_transforms/test_transitions.py
Modified: trunk/docutils/HISTORY.txt
===================================================================
--- trunk/docutils/HISTORY.txt 2024-05-12 15:57:27 UTC (rev 9701)
+++ trunk/docutils/HISTORY.txt 2024-05-14 12:28:18 UTC (rev 9702)
@@ -40,6 +40,12 @@
- Update `DocInfo` to work with corrected element categories.
+* docutils/transforms/misc.py:
+
+ - Fix for `misc.Transitions`: report an error if a <transition> element
+ follows a <meta> or <decoration> element as this is invalid
+ according to ``docutils.dtd``.
+
* docutils/writers/manpage.py
- Remove code for unused emdash bullets.
Modified: trunk/docutils/docutils/transforms/misc.py
===================================================================
--- trunk/docutils/docutils/transforms/misc.py 2024-05-12 15:57:27 UTC (rev 9701)
+++ trunk/docutils/docutils/transforms/misc.py 2024-05-14 12:28:18 UTC (rev 9702)
@@ -71,8 +71,8 @@
"""
Move transitions at the end of sections up the tree. Complain
- on transitions after a title, at the beginning or end of the
- document, and after another transition.
+ on transitions after a title, subtitle, meta, or decoration element,
+ at the beginning or end of the document, and after another transition.
For example, transform this::
@@ -99,24 +99,20 @@
def visit_transition(self, node):
index = node.parent.index(node)
- error = None
- if (index == 0
- or isinstance(node.parent[0], nodes.title)
- and (index == 1
- or isinstance(node.parent[1], nodes.subtitle)
- and index == 2)):
- assert (isinstance(node.parent, nodes.document)
- or isinstance(node.parent, nodes.section))
- error = self.document.reporter.error(
- 'Document or section may not begin with a transition.',
- source=node.source, line=node.line)
- elif isinstance(node.parent[index - 1], nodes.transition):
- error = self.document.reporter.error(
- 'At least one body element must separate transitions; '
- 'adjacent transitions are not allowed.',
- source=node.source, line=node.line)
- if error:
+ previous_sibling = node.previous_sibling()
+ msg = ''
+ assert isinstance(node.parent, (nodes.document, nodes.section))
+ if index == 0 or isinstance(previous_sibling, (nodes.title,
+ nodes.subtitle,
+ nodes.meta,
+ nodes.decoration)):
+ msg = 'Document or section may not begin with a transition.'
+ elif isinstance(previous_sibling, nodes.transition):
+ msg = ('At least one body element must separate transitions; '
+ 'adjacent transitions are not allowed.')
+ if msg:
# Insert before node and update index.
+ error = self.document.reporter.error(msg, base_node=node)
node.parent.insert(index, error)
index += 1
assert index < len(node.parent)
Modified: trunk/docutils/test/test_transforms/test_transitions.py
===================================================================
--- trunk/docutils/test/test_transforms/test_transitions.py 2024-05-12 15:57:27 UTC (rev 9701)
+++ trunk/docutils/test/test_transforms/test_transitions.py 2024-05-14 12:28:18 UTC (rev 9702)
@@ -245,6 +245,43 @@
Document beginning with a transition.
"""],
["""\
+.. meta:: :keywords: transition test
+
+----------
+
+Document beginning with a transition (meta elements don't count).
+""",
+"""\
+<document source="test data">
+ <meta content="transition test" name="keywords">
+ <system_message level="3" line="3" source="test data" type="ERROR">
+ <paragraph>
+ Document or section may not begin with a transition.
+ <transition>
+ <paragraph>
+ Document beginning with a transition (meta elements don't count).
+"""],
+["""\
+.. header:: a header
+
+----------
+
+Document beginning with a transition (decoration elements don't count).
+""",
+"""\
+<document source="test data">
+ <decoration>
+ <header>
+ <paragraph>
+ a header
+ <system_message level="3" line="3" source="test data" type="ERROR">
+ <paragraph>
+ Document or section may not begin with a transition.
+ <transition>
+ <paragraph>
+ Document beginning with a transition (decoration elements don't count).
+"""],
+["""\
Section 1
=========
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <gr...@us...> - 2024-05-16 14:26:07
|
Revision: 9705
http://sourceforge.net/p/docutils/code/9705
Author: grubert
Date: 2024-05-16 14:26:04 +0000 (Thu, 16 May 2024)
Log Message:
-----------
Feature-request #105 more informative document comments.
docutils version in header
Modified Paths:
--------------
trunk/docutils/HISTORY.txt
trunk/docutils/docutils/writers/manpage.py
trunk/docutils/test/test_writers/test_manpage.py
Modified: trunk/docutils/HISTORY.txt
===================================================================
--- trunk/docutils/HISTORY.txt 2024-05-16 13:49:09 UTC (rev 9704)
+++ trunk/docutils/HISTORY.txt 2024-05-16 14:26:04 UTC (rev 9705)
@@ -49,6 +49,8 @@
* docutils/writers/manpage.py
- Remove code for unused emdash bullets.
+ - Feature-request #105 more informative document comments.
+ docutils version in header
Release 0.21.2 (2024-04-23)
Modified: trunk/docutils/docutils/writers/manpage.py
===================================================================
--- trunk/docutils/docutils/writers/manpage.py 2024-05-16 13:49:09 UTC (rev 9704)
+++ trunk/docutils/docutils/writers/manpage.py 2024-05-16 14:26:04 UTC (rev 9705)
@@ -45,6 +45,7 @@
import re
+import docutils
from docutils import nodes, writers, languages
try:
import roman
@@ -167,8 +168,8 @@
words_and_spaces = re.compile(r'\S+| +|\n')
possibly_a_roff_command = re.compile(r'\.\w')
- document_start = """Man page generated from reStructuredText."""
- # TODO add "from docutils 0.21rc1."
+ document_start = ( 'Man page generated from reStructuredText by manpage writer\n'
+ f'from docutils {docutils.__version__}.' )
def __init__(self, document):
nodes.NodeVisitor.__init__(self, document)
@@ -623,7 +624,7 @@
if self._docinfo['copyright']:
self.body.append('.SH COPYRIGHT\n%s\n'
% self._docinfo['copyright'])
- self.body.append(self.comment('Generated by docutils manpage writer.'))
+ self.body.append(self.comment_begin('End of generated man page.'))
def visit_emphasis(self, node):
self.body.append(self.defs['emphasis'][0])
Modified: trunk/docutils/test/test_writers/test_manpage.py
===================================================================
--- trunk/docutils/test/test_writers/test_manpage.py 2024-05-16 13:49:09 UTC (rev 9704)
+++ trunk/docutils/test/test_writers/test_manpage.py 2024-05-16 14:26:04 UTC (rev 9705)
@@ -38,6 +38,10 @@
}).decode()
self.assertEqual(case_expected, output)
+document_start = r""".\" Man page generated from reStructuredText by manpage writer
+.\" from docutils 0.22b.dev.
+.
+"""
indend_macros = r""".
.nr rst2man-indent-level 0
@@ -72,13 +76,10 @@
totest['blank'] = [
["",
- r""".\" Man page generated from reStructuredText.
-.
-""" + indend_macros + """.TH "" "" "" ""
+ document_start + indend_macros + """.TH "" "" "" ""
.SH NAME
\\- \n\
-.\\" Generated by docutils manpage writer.
-.
+.\\" End of generated man page.
"""],
[r"""Hello, world.
=============
@@ -87,9 +88,8 @@
This broke docutils-sphinx.
""",
- r""".\" Man page generated from reStructuredText.
-.
-""" + indend_macros + """.TH "HELLO, WORLD." "" "" ""
+ document_start + indend_macros +
+""".TH "HELLO, WORLD." "" "" ""
.SH NAME
Hello, world. \\- \n\
.sp
@@ -99,8 +99,7 @@
This broke docutils\\-sphinx.
.UNINDENT
.UNINDENT
-.\\" Generated by docutils manpage writer.
-.
+.\\" End of generated man page.
"""],
]
@@ -164,9 +163,8 @@
and . in a line and at line start
.in a paragraph
""",
- r""".\" Man page generated from reStructuredText.
-.
-""" + indend_macros + """\
+ document_start + indend_macros +
+"""\
.TH "SIMPLE" "1" "2009-08-05" "0.1" "text processing"
.SH NAME
simple \\- The way to go
@@ -230,8 +228,7 @@
Arbitrary field: some text
.SH COPYRIGHT
public domain
-.\\" Generated by docutils manpage writer.
-.
+.\\" End of generated man page.
"""],
]
@@ -246,9 +243,7 @@
""",
'''\
\'\\" t
-.\\" Man page generated from reStructuredText.
-.
-''' + indend_macros + '''.TH "" "" "" ""
+''' + document_start + indend_macros + '''.TH "" "" "" ""
.SH NAME
\\- \n\
.INDENT 0.0
@@ -276,8 +271,7 @@
.TE
.UNINDENT
.UNINDENT
-.\\" Generated by docutils manpage writer.
-.
+.\\" End of generated man page.
''']
]
@@ -300,10 +294,7 @@
]
bla bla bla
""",
- """\
-.\\" Man page generated from reStructuredText.
-.
-""" + indend_macros + """.TH "" "" "" ""
+document_start + indend_macros + """.TH "" "" "" ""
.SH NAME
\\- \n\
optin group with dot as group item
@@ -332,8 +323,7 @@
.B ]
bla bla bla
.UNINDENT
-.\\" Generated by docutils manpage writer.
-.
+.\\" End of generated man page.
"""],
]
@@ -357,10 +347,7 @@
Description of Term 1 Description of Term 1
""",
-'''\
-.\\" Man page generated from reStructuredText.
-.
-''' + indend_macros + '''.TH "DEFINITION LIST TEST" "" "" ""
+document_start + indend_macros + '''.TH "DEFINITION LIST TEST" "" "" ""
.SH NAME
Definition List Test \\- \n\
''' + '''.SS Abstract
@@ -376,8 +363,7 @@
Description of Term 1 Description of Term 1 Description of Term 1
Description of Term 1 Description of Term 1
.UNINDENT
-.\\" Generated by docutils manpage writer.
-.
+.\\" End of generated man page.
'''],
]
@@ -387,9 +373,7 @@
--output FILE, -o FILE output filename
-i DEVICE, --input DEVICE input device
""",
- r""".\" Man page generated from reStructuredText.
-.
-""" + indend_macros + """.TH "" "" "" ""
+document_start + indend_macros + """.TH "" "" "" ""
.SH NAME
\\- \n\
.INDENT 0.0
@@ -407,8 +391,7 @@
input device
.UNINDENT
.UNINDENT
-.\\" Generated by docutils manpage writer.
-.
+.\\" End of generated man page.
"""],
]
@@ -416,16 +399,13 @@
[""".. [docutils] blah blah blah
.. [empty_citation]
""",
- r""".\" Man page generated from reStructuredText.
-.
-""" + indend_macros + """.TH "" "" "" ""
+document_start + indend_macros + """.TH "" "" "" ""
.SH NAME
\\- \n\
.IP [docutils] 5
blah blah blah
.IP [empty_citation] 5
-.\\" Generated by docutils manpage writer.
-.
+.\\" End of generated man page.
"""],
]
@@ -435,9 +415,7 @@
- followed by
- a list
""",
- r""".\" Man page generated from reStructuredText.
-.
-""" + indend_macros + """.TH "" "" "" ""
+document_start + indend_macros + """.TH "" "" "" ""
.SH NAME
\\- \n\
some rubric
@@ -447,8 +425,7 @@
.IP \\(bu 2
a list
.UNINDENT
-.\\" Generated by docutils manpage writer.
-.
+.\\" End of generated man page.
"""],
]
@@ -458,9 +435,7 @@
They are "escaped" anywhere.
""",
- r""".\" Man page generated from reStructuredText.
-.
-""" + indend_macros + """.TH "" "" "" ""
+document_start + indend_macros + """.TH "" "" "" ""
.SH NAME
\\- \n\
.INDENT 0.0
@@ -470,8 +445,7 @@
.UNINDENT
.sp
They are \\(dqescaped\\(dq anywhere.
-.\\" Generated by docutils manpage writer.
-.
+.\\" End of generated man page.
"""],
]
@@ -491,15 +465,12 @@
Test title, docinfo to man page header.
""",
- r""".\" Man page generated from reStructuredText.
-.
-""" + indend_macros + r""".TH "PAGE TITLE" "3" "3/Nov/2022" "0.0" "the books"
+document_start + indend_macros + r""".TH "PAGE TITLE" "3" "3/Nov/2022" "0.0" "the books"
.SH NAME
page title \- in short
.sp
Test title, docinfo to man page header.
-.\" Generated by docutils manpage writer.
-.
+.\" End of generated man page.
"""],
]
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <mi...@us...> - 2024-05-18 08:44:59
|
Revision: 9712
http://sourceforge.net/p/docutils/code/9712
Author: milde
Date: 2024-05-18 08:44:56 +0000 (Sat, 18 May 2024)
Log Message:
-----------
Prevent accidental drop of first child when initializing `nodes.Element`.
Raise ValueError if the "rawsource" argument in `nodes.Element.__init__()`
is an `Element` instance.
Prevents surprises when initializing an element with child element(s)
but forgetting about the mandatory "rawsource" argument.
Modified Paths:
--------------
trunk/docutils/HISTORY.txt
trunk/docutils/docutils/nodes.py
Modified: trunk/docutils/HISTORY.txt
===================================================================
--- trunk/docutils/HISTORY.txt 2024-05-17 07:15:06 UTC (rev 9711)
+++ trunk/docutils/HISTORY.txt 2024-05-18 08:44:56 UTC (rev 9712)
@@ -24,6 +24,9 @@
* docutils/nodes.py
+ - Raise ValueError if the "rawsource" argument in `Element.__init__()`
+ is an `Element` instance.
+ Catches errors like ``nodes.hint(nodes.paragraph())``.
- New element category classes `SubStructural` and `PureTextElement`.
- Fix element categories.
- New method `Element.validate()` (work in progress).
Modified: trunk/docutils/docutils/nodes.py
===================================================================
--- trunk/docutils/docutils/nodes.py 2024-05-17 07:15:06 UTC (rev 9711)
+++ trunk/docutils/docutils/nodes.py 2024-05-18 08:44:56 UTC (rev 9712)
@@ -514,6 +514,8 @@
NOTE: some elements do not set this value (default '').
"""
+ if isinstance(rawsource, Element):
+ raise ValueError('First argument "rawsource" must be a string.')
self.children = []
"""List of child nodes (elements and/or `Text`)."""
@@ -1318,7 +1320,7 @@
"""Separator for child nodes, used by `astext()` method."""
def __init__(self, rawsource='', text='', *children, **attributes):
- if text != '':
+ if text:
textnode = Text(text)
Element.__init__(self, rawsource, textnode, *children,
**attributes)
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <mi...@us...> - 2024-05-18 08:45:10
|
Revision: 9713
http://sourceforge.net/p/docutils/code/9713
Author: milde
Date: 2024-05-18 08:45:06 +0000 (Sat, 18 May 2024)
Log Message:
-----------
Doctree validation: small fixes.
Docstring for Element.validate().
Indent secondary message lines with 2 spaces.
Modified Paths:
--------------
trunk/docutils/docutils/nodes.py
trunk/docutils/test/test_nodes.py
Modified: trunk/docutils/docutils/nodes.py
===================================================================
--- trunk/docutils/docutils/nodes.py 2024-05-18 08:44:56 UTC (rev 9712)
+++ trunk/docutils/docutils/nodes.py 2024-05-18 08:45:06 UTC (rev 9713)
@@ -1142,12 +1142,20 @@
raise ValueError('\n'.join(messages))
def validate(self):
+ """Validate element and its children.
+
+ Test conformance to the Docutils Document Model ("doctree").
+ Report violations as warning or raise ValueError if there is
+ no reporter attached to the root node.
+
+ Provisional (work in progress).
+ """
messages = []
try:
self.validate_attributes()
except ValueError as e:
messages.append(e.args[0]) # the message argument
- # TODO: check number of children
+ # test number of children
n_min, n_max = self.valid_len
if len(self.children) < n_min:
messages.append(f'Expects at least {n_min} children, '
@@ -1160,7 +1168,8 @@
messages.append(f'May not contain "{child.tagname}" elements.')
child.validate()
if messages:
- msg = f'Element <{self.tagname}> invalid:\n' + '\n'.join(messages)
+ msg = (f'Element <{self.tagname}> invalid:\n '
+ + '\n '.join(messages))
try:
self.document.reporter.warning(msg)
except AttributeError:
@@ -2053,6 +2062,8 @@
class citation(General, BackLinkable, Element, Labeled, Targetable):
valid_children = (label, Body) # (label, (%body.elements;)+)
valid_len = (2, None)
+ # TODO: DTD requires both label and content but rST allows empty citation
+ # (see test_rst/test_citations.py). Is this sensible?
# Graphical elements
Modified: trunk/docutils/test/test_nodes.py
===================================================================
--- trunk/docutils/test/test_nodes.py 2024-05-18 08:44:56 UTC (rev 9712)
+++ trunk/docutils/test/test_nodes.py 2024-05-18 08:45:06 UTC (rev 9713)
@@ -489,7 +489,7 @@
node = nodes.paragraph('', 'text', id='test-paragraph')
with self.assertRaisesRegex(ValueError,
'Element <paragraph> invalid:\n'
- 'Attribute "id" not one of "ids '):
+ ' Attribute "id" not one of "ids '):
node.validate()
def test_validate_wrong_attribute_value(self):
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <gr...@us...> - 2024-05-19 16:03:43
|
Revision: 9717
http://sourceforge.net/p/docutils/code/9717
Author: grubert
Date: 2024-05-19 16:03:42 +0000 (Sun, 19 May 2024)
Log Message:
-----------
Stop converting text to full capitals (bug #481).
Modified Paths:
--------------
trunk/docutils/HISTORY.txt
trunk/docutils/docutils/writers/manpage.py
trunk/docutils/test/functional/expected/standalone_rst_manpage.man
trunk/docutils/test/test_writers/test_manpage.py
Modified: trunk/docutils/HISTORY.txt
===================================================================
--- trunk/docutils/HISTORY.txt 2024-05-19 10:38:12 UTC (rev 9716)
+++ trunk/docutils/HISTORY.txt 2024-05-19 16:03:42 UTC (rev 9717)
@@ -54,8 +54,8 @@
- Remove code for unused emdash bullets.
- Feature-request #105 more informative document comments.
docutils version in header
+ - Stop converting text to full capitals (bug #481).
-
Release 0.21.2 (2024-04-23)
===========================
Modified: trunk/docutils/docutils/writers/manpage.py
===================================================================
--- trunk/docutils/docutils/writers/manpage.py 2024-05-19 10:38:12 UTC (rev 9716)
+++ trunk/docutils/docutils/writers/manpage.py 2024-05-19 16:03:42 UTC (rev 9717)
@@ -24,19 +24,19 @@
Man pages have no subsection only parts.
Standard parts
- NAME ,
- SYNOPSIS ,
- DESCRIPTION ,
- OPTIONS ,
- FILES ,
- SEE ALSO ,
- BUGS ,
+ Name ,
+ Synopsis ,
+ Description ,
+ Options ,
+ Files ,
+ See also ,
+ Bugs ,
and
- AUTHOR .
+ AUthor .
-A unix-like system keeps an index of the DESCRIPTIONs, which is accessible
+A unix-like system keeps an index of the Descriptions, which is accessible
by the command whatis or apropos.
"""
@@ -195,10 +195,10 @@
self.compact_simple = None
# the list style "*" bullet or "#" numbered
self._list_char = []
- # writing the header .TH and .SH NAME is postboned after
+ # writing the header .TH and .SH Name is postboned after
# docinfo.
self._docinfo = {
- "title": "", "title_upper": "",
+ "title": "",
"subtitle": "",
"manual_section": "", "manual_group": "",
"author": [],
@@ -388,19 +388,19 @@
self._list_char.pop()
def header(self):
- th = (".TH \"%(title_upper)s\" \"%(manual_section)s\""
+ th = (".TH \"%(title)s\" \"%(manual_section)s\""
" \"%(date)s\" \"%(version)s\"") % self._docinfo
if self._docinfo["manual_group"]:
th += " \"%(manual_group)s\"" % self._docinfo
th += "\n"
- sh_tmpl = (".SH NAME\n"
+ sh_tmpl = (".SH Name\n"
"%(title)s \\- %(subtitle)s\n")
return th + sh_tmpl % self._docinfo
def append_header(self):
- """append header with .TH and .SH NAME"""
+ """append header with .TH and .SH Name"""
# NOTE before everything
- # .TH title_upper section date source manual
+ # .TH title section date source manual
# BUT macros before .TH for whatis database generators.
if self.header_written:
return
@@ -428,7 +428,7 @@
self.body.append('.sp\n')
name = '%s%s:%s\n' % (
self.defs['strong'][0],
- self.language.labels.get(name, name).upper(),
+ self.language.labels.get(name, name),
self.defs['strong'][1],
)
self.body.append(name)
@@ -607,12 +607,12 @@
def depart_document(self, node):
if self._docinfo['author']:
- self.body.append('.SH AUTHOR\n%s\n'
+ self.body.append('.SH Author\n%s\n'
% ', '.join(self._docinfo['author']))
skip = ('author', 'copyright', 'date',
'manual_group', 'manual_section',
'subtitle',
- 'title', 'title_upper', 'version')
+ 'title', 'version')
for name in self._docinfo_keys:
if name == 'address':
self.body.append("\n%s:\n%s%s.nf\n%s\n.fi\n%s%s" % (
@@ -629,7 +629,7 @@
label = self.language.labels.get(name, name)
self.body.append("\n%s: %s\n" % (label, self._docinfo[name]))
if self._docinfo['copyright']:
- self.body.append('.SH COPYRIGHT\n%s\n'
+ self.body.append('.SH Copyright\n%s\n'
% self._docinfo['copyright'])
self.body.append(self.comment_begin('End of generated man page.'))
@@ -1159,11 +1159,9 @@
self.body.append('.IP "')
elif self.section_level == 0:
self._docinfo['title'] = node.astext()
- # document title for .TH
- self._docinfo['title_upper'] = node.astext().upper()
raise nodes.SkipNode
elif self.section_level == 1:
- self.body.append('.SH %s\n'%self.deunicode(node.astext().upper()))
+ self.body.append('.SH %s\n'%self.deunicode(node.astext()))
raise nodes.SkipNode
else:
self.body.append('.SS ')
Modified: trunk/docutils/test/functional/expected/standalone_rst_manpage.man
===================================================================
--- trunk/docutils/test/functional/expected/standalone_rst_manpage.man 2024-05-19 10:38:12 UTC (rev 9716)
+++ trunk/docutils/test/functional/expected/standalone_rst_manpage.man 2024-05-19 16:03:42 UTC (rev 9717)
@@ -28,8 +28,8 @@
.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]]
.in \\n[rst2man-indent\\n[rst2man-indent-level]]u
..
-.TH "RST2MAN" "1" "2006-10-22" "0.1" "text processing"
-.SH NAME
+.TH "rst2man" "1" "2006-10-22" "0.1" "text processing"
+.SH Name
rst2man \- generate unix manpages from reStructured text
.\" TODO: authors and author with name <email>
.
@@ -100,8 +100,8 @@
.UNINDENT
.sp
Discussion is still open.
-.SH AUTHOR
+.SH Author
gr...@us...
-.SH COPYRIGHT
+.SH Copyright
public domain
.\" End of generated man page.
Modified: trunk/docutils/test/test_writers/test_manpage.py
===================================================================
--- trunk/docutils/test/test_writers/test_manpage.py 2024-05-19 10:38:12 UTC (rev 9716)
+++ trunk/docutils/test/test_writers/test_manpage.py 2024-05-19 16:03:42 UTC (rev 9717)
@@ -77,7 +77,7 @@
totest['blank'] = [
["",
document_start + indend_macros + """.TH "" "" "" ""
-.SH NAME
+.SH Name
\\- \n\
.\\" End of generated man page.
"""],
@@ -89,11 +89,11 @@
""",
document_start + indend_macros +
-""".TH "HELLO, WORLD." "" "" ""
-.SH NAME
+""".TH "Hello, world." "" "" ""
+.SH Name
Hello, world. \\- \n\
.sp
-\\fBWARNING:\\fP
+\\fBWarning:\\fP
.INDENT 0.0
.INDENT 3.5
This broke docutils\\-sphinx.
@@ -165,8 +165,8 @@
""",
document_start + indend_macros +
"""\
-.TH "SIMPLE" "1" "2009-08-05" "0.1" "text processing"
-.SH NAME
+.TH "simple" "1" "2009-08-05" "0.1" "text processing"
+.SH Name
simple \\- The way to go
.SH SYNOPSIS
.INDENT 0.0
@@ -192,13 +192,13 @@
.B \\-\\-help\\fP,\\fB \\-h
Show this help message and exit.
.UNINDENT
-.SH OTHER SECTION
+.SH OtHeR SECTION
.sp
link to <http://docutils.sourceforge.io> \n\
.sp
With mixed case.
.sp
-\\fBATTENTION!:\\fP
+\\fBAttention!:\\fP
.INDENT 0.0
.INDENT 3.5
Admonition with title
@@ -222,11 +222,11 @@
.sp
and . in a line and at line start
\\&.in a paragraph
-.SH AUTHOR
+.SH Author
so...@so...
Arbitrary field: some text
-.SH COPYRIGHT
+.SH Copyright
public domain
.\\" End of generated man page.
"""],
@@ -244,7 +244,7 @@
'''\
\'\\" t
''' + document_start + indend_macros + '''.TH "" "" "" ""
-.SH NAME
+.SH Name
\\- \n\
.INDENT 0.0
.INDENT 3.5
@@ -295,7 +295,7 @@
bla bla bla
""",
document_start + indend_macros + """.TH "" "" "" ""
-.SH NAME
+.SH Name
\\- \n\
optin group with dot as group item
.INDENT 0.0
@@ -347,13 +347,13 @@
Description of Term 1 Description of Term 1
""",
-document_start + indend_macros + '''.TH "DEFINITION LIST TEST" "" "" ""
-.SH NAME
+document_start + indend_macros + '''.TH "Definition List Test" "" "" ""
+.SH Name
Definition List Test \\- \n\
''' + '''.SS Abstract
.sp
Docinfo is required.
-.SH SECTION
+.SH Section
.INDENT 0.0
.TP
.B term1
@@ -374,7 +374,7 @@
-i DEVICE, --input DEVICE input device
""",
document_start + indend_macros + """.TH "" "" "" ""
-.SH NAME
+.SH Name
\\- \n\
.INDENT 0.0
.TP
@@ -400,7 +400,7 @@
.. [empty_citation]
""",
document_start + indend_macros + """.TH "" "" "" ""
-.SH NAME
+.SH Name
\\- \n\
.IP [docutils] 5
blah blah blah
@@ -416,7 +416,7 @@
- a list
""",
document_start + indend_macros + """.TH "" "" "" ""
-.SH NAME
+.SH Name
\\- \n\
some rubric
.INDENT 0.0
@@ -436,7 +436,7 @@
They are "escaped" anywhere.
""",
document_start + indend_macros + """.TH "" "" "" ""
-.SH NAME
+.SH Name
\\- \n\
.INDENT 0.0
.TP
@@ -465,8 +465,8 @@
Test title, docinfo to man page header.
""",
-document_start + indend_macros + r""".TH "PAGE TITLE" "3" "3/Nov/2022" "0.0" "the books"
-.SH NAME
+document_start + indend_macros + r""".TH "page title" "3" "3/Nov/2022" "0.0" "the books"
+.SH Name
page title \- in short
.sp
Test title, docinfo to man page header.
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <mi...@us...> - 2024-05-21 21:09:50
|
Revision: 9719
http://sourceforge.net/p/docutils/code/9719
Author: milde
Date: 2024-05-21 21:09:47 +0000 (Tue, 21 May 2024)
Log Message:
-----------
Stop generating invalid doctree if "use_latex_toc" setting is True.
The "use_latex_toc" setting tells Docutils to leave the ToC generation
LaTeX (to get a ToC with page numbers).
The "contents" directive generates a `<topic>` node with a nested
`<pending>` node. By default, the `parts.Contents` transform replaces
the `<pending>` node with a generated ToC in a `<bullet_list>`.
Keep the `<pending>` node if "use_latex_toc" is True to avoid an
empty (and hence invalid) `<topic>`.
Change the LaTeX writer to extract the value of the "content"
directive's options from the `<pending>` node.
This allows to drop "local" and "depth" from the attribute list of
`<topic>`, the "attributes reference" section in doctree.txt, and
the valid attributes in nodes.py.
Modified Paths:
--------------
trunk/docutils/docs/ref/doctree.txt
trunk/docutils/docs/ref/docutils.dtd
trunk/docutils/docutils/nodes.py
trunk/docutils/docutils/transforms/parts.py
trunk/docutils/docutils/writers/latex2e/__init__.py
trunk/docutils/test/test_writers/test_latex2e_misc.py
Modified: trunk/docutils/docs/ref/doctree.txt
===================================================================
--- trunk/docutils/docs/ref/doctree.txt 2024-05-19 16:17:27 UTC (rev 9718)
+++ trunk/docutils/docs/ref/doctree.txt 2024-05-21 21:09:47 UTC (rev 9719)
@@ -3763,8 +3763,7 @@
(title?, (%body.elements;)+)
-:Attributes: The <topic> element accepts the `common attributes`_ plus
- depth_ and local_.
+:Attributes: The <topic> element accepts the `common attributes`_.
:Parameter Entities: The `%structure.model`_ parameter entity
directly includes <topic>.
@@ -4322,15 +4321,7 @@
separating it from the `\<option_string>`_ (typically either "=" or " ")
or the text between option arguments (typically either "," or " ").
-``depth``
-=========
-Attribute type: `%number`_. Default value: none.
-
-The ``depth`` attribute may be used in a `\<topic>`_ element generated by
-the `"contents" directive`_ to hold the value of the "depth" option.
-
-
``dupnames``
============
@@ -4426,15 +4417,6 @@
The ``line`` attribute is used in the `\<system_message>`_ element.
-``local``
-=========
-
-Attribute type: `%yesorno`_. Default value: none.
-
-The ``local`` attribute may be used in a `\<topic>` element generated by
-the `"contents" directive`_ to hold the value of the "local" option.
-
-
``ltrim``
=========
Modified: trunk/docutils/docs/ref/docutils.dtd
===================================================================
--- trunk/docutils/docs/ref/docutils.dtd 2024-05-19 16:17:27 UTC (rev 9718)
+++ trunk/docutils/docs/ref/docutils.dtd 2024-05-21 21:09:47 UTC (rev 9719)
@@ -224,7 +224,7 @@
<!-- These parameter entities customize the table model DTD. -->
<!-- table element TODO: use %tbl.table.att. Keep or drop pgwide? -->
-<!ENTITY % bodyatt
+<!ENTITY % bodyatt
" %basic.atts;
%align-h.att;
width %measure; #IMPLIED ">
@@ -355,10 +355,7 @@
<!ATTLIST section %basic.atts;>
<!ELEMENT topic (title?, (%body.elements;)+)>
-<!ATTLIST topic
- %basic.atts;
- depth %number; #IMPLIED
- local %yesorno; #IMPLIED>
+<!ATTLIST topic %basic.atts;>
<!ELEMENT sidebar ((title, subtitle?)?, (%body.elements; | topic)+)>
<!ATTLIST sidebar %basic.atts;>
Modified: trunk/docutils/docutils/nodes.py
===================================================================
--- trunk/docutils/docutils/nodes.py 2024-05-19 16:17:27 UTC (rev 9718)
+++ trunk/docutils/docutils/nodes.py 2024-05-21 21:09:47 UTC (rev 9719)
@@ -1817,8 +1817,6 @@
Topics cannot nest inside topics, or body elements; you can't have
a topic inside a table, list, block quote, etc.
"""
- # "depth" and "local" attributes may be added by the "Contents" transform:
- valid_attributes = Element.valid_attributes + ('depth', 'local')
valid_children = (title, Body) # (title?, (%body.elements;)+)
@@ -2886,7 +2884,6 @@
'colwidth': int, # sic! CALS: CDATA (measure or number+'*')
'content': str, # <meta>
'delimiter': str,
- 'depth': int,
'dir': validate_enumerated_type('ltr', 'rtl', 'auto'), # <meta>
'dupnames': validate_refname_list,
'enumtype': validate_enumerated_type('arabic', 'loweralpha', 'lowerroman',
@@ -2900,7 +2897,6 @@
'lang': str, # <meta>
'level': int,
'line': int,
- 'local': validate_yesorno,
'ltrim': validate_yesorno,
'loading': validate_enumerated_type('embed', 'link', 'lazy'),
'media': str, # <meta>
Modified: trunk/docutils/docutils/transforms/parts.py
===================================================================
--- trunk/docutils/docutils/transforms/parts.py 2024-05-19 16:17:27 UTC (rev 9718)
+++ trunk/docutils/docutils/transforms/parts.py 2024-05-21 21:09:47 UTC (rev 9719)
@@ -87,6 +87,9 @@
def apply(self):
# let the writer (or output software) build the contents list?
toc_by_writer = getattr(self.document.settings, 'use_latex_toc', False)
+ # TODO: handle "generate_oowriter_toc" setting of the "ODT" writer.
+ if toc_by_writer:
+ return
details = self.startnode.details
if 'local' in details:
startnode = self.startnode.parent.parent
@@ -101,16 +104,11 @@
self.backlinks = details['backlinks']
else:
self.backlinks = self.document.settings.toc_backlinks
- if toc_by_writer:
- # move customization settings to the parent node
- self.startnode.parent.attributes.update(details)
- self.startnode.parent.remove(self.startnode)
+ contents = self.build_contents(startnode)
+ if len(contents):
+ self.startnode.replace_self(contents)
else:
- contents = self.build_contents(startnode)
- if len(contents):
- self.startnode.replace_self(contents)
- else:
- self.startnode.parent.parent.remove(self.startnode.parent)
+ self.startnode.parent.parent.remove(self.startnode.parent)
def build_contents(self, node, level=0):
level += 1
Modified: trunk/docutils/docutils/writers/latex2e/__init__.py
===================================================================
--- trunk/docutils/docutils/writers/latex2e/__init__.py 2024-05-19 16:17:27 UTC (rev 9718)
+++ trunk/docutils/docutils/writers/latex2e/__init__.py 2024-05-21 21:09:47 UTC (rev 9719)
@@ -3224,7 +3224,13 @@
return
# ToC by LaTeX
- depth = node.get('depth', 0)
+ try:
+ details = node.next_node(nodes.pending).details
+ except AttributeError:
+ self.warn('Setting "use_latex_toc" is True but "contents" details '
+ 'are missing. Directive option values may be lost.')
+ details = {}
+ depth = details.get('depth', 0)
maxdepth = len(self.d_class.sections)
if isinstance(node.next_node(), nodes.title):
title = self.encode(node[0].astext())
Modified: trunk/docutils/test/test_writers/test_latex2e_misc.py
===================================================================
--- trunk/docutils/test/test_writers/test_latex2e_misc.py 2024-05-19 16:17:27 UTC (rev 9718)
+++ trunk/docutils/test/test_writers/test_latex2e_misc.py 2024-05-21 21:09:47 UTC (rev 9719)
@@ -47,6 +47,7 @@
"""
settings = self.settings.copy()
settings['output_encoding'] = 'unicode'
+ settings['warning_stream'] = '' # don't warn for missing ToC details
doctree = core.publish_doctree(contents_test_input,
settings_overrides=settings)
result = core.publish_from_doctree(doctree,
@@ -53,7 +54,7 @@
writer_name='latex',
settings_overrides=settings)
self.assertNotIn(r'\item \hyperref[foo]{foo}', result)
- # self.assertIn(r'\tableofcontents', result)
+ self.assertIn(r'\tableofcontents', result)
def test_publish_parts(self):
"""Check for the presence of documented parts.
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <mi...@us...> - 2024-05-31 14:42:08
|
Revision: 9722
http://sourceforge.net/p/docutils/code/9722
Author: milde
Date: 2024-05-31 14:42:06 +0000 (Fri, 31 May 2024)
Log Message:
-----------
xml-writer: improve formatting with "indents" setting.
Do not increase indentation of follow-up lines inside inline elements.
when formatting XML output with "indents".
Modified Paths:
--------------
trunk/docutils/HISTORY.txt
trunk/docutils/docutils/writers/docutils_xml.py
trunk/docutils/test/functional/expected/standalone_rst_docutils_xml.xml
Modified: trunk/docutils/HISTORY.txt
===================================================================
--- trunk/docutils/HISTORY.txt 2024-05-31 14:41:57 UTC (rev 9721)
+++ trunk/docutils/HISTORY.txt 2024-05-31 14:42:06 UTC (rev 9722)
@@ -56,6 +56,13 @@
docutils version in header
- Stop converting text to full capitals (bug #481).
+* docutils/writers/docutils-xml.py
+
+ - Do not increase indentation of follow-up lines inside inline elements.
+ when formatting with `indents`_.
+
+
+
Release 0.21.2 (2024-04-23)
===========================
Modified: trunk/docutils/docutils/writers/docutils_xml.py
===================================================================
--- trunk/docutils/docutils/writers/docutils_xml.py 2024-05-31 14:41:57 UTC (rev 9721)
+++ trunk/docutils/docutils/writers/docutils_xml.py 2024-05-31 14:42:06 UTC (rev 9722)
@@ -119,7 +119,8 @@
if not self.in_simple:
self.output.append(self.indent*self.level)
self.output.append(node.starttag(xml.sax.saxutils.quoteattr))
- self.level += 1
+ if not isinstance(node, nodes.Inline):
+ self.level += 1
# `nodes.literal` is not an instance of FixedTextElement by design,
# see docs/ref/rst/restructuredtext.html#inline-literals
if isinstance(node, (nodes.FixedTextElement, nodes.literal)):
@@ -131,7 +132,8 @@
def default_departure(self, node):
"""Default node depart method."""
- self.level -= 1
+ if not isinstance(node, nodes.Inline):
+ self.level -= 1
if not self.in_simple:
self.output.append(self.indent*self.level)
self.output.append(node.endtag())
Modified: trunk/docutils/test/functional/expected/standalone_rst_docutils_xml.xml
===================================================================
--- trunk/docutils/test/functional/expected/standalone_rst_docutils_xml.xml 2024-05-31 14:41:57 UTC (rev 9721)
+++ trunk/docutils/test/functional/expected/standalone_rst_docutils_xml.xml 2024-05-31 14:42:06 UTC (rev 9722)
@@ -241,7 +241,7 @@
(<reference refuri="http://www.python.org">http://www.python.org</reference>), external hyperlinks (<reference name="Python" refuri="http://www.python.org/">Python</reference> <footnote_reference auto="1" ids="footnote-reference-10" refid="footnote-6">5</footnote_reference>), internal
cross-references (<reference name="example" refid="example">example</reference>), external hyperlinks with embedded URIs
(<reference name="Python web site" refuri="http://www.python.org">Python web site</reference>), <reference anonymous="1" name="anonymous hyperlink references" refuri="http://www.python.org/">anonymous hyperlink
- references</reference> <footnote_reference auto="1" ids="footnote-reference-14" refid="footnote-6">5</footnote_reference> (<reference anonymous="1" name="a second reference" refuri="https://docutils.sourceforge.io/">a second reference</reference> <footnote_reference auto="1" ids="footnote-reference-15" refid="footnote-8">7</footnote_reference>), footnote references (manually
+ references</reference> <footnote_reference auto="1" ids="footnote-reference-14" refid="footnote-6">5</footnote_reference> (<reference anonymous="1" name="a second reference" refuri="https://docutils.sourceforge.io/">a second reference</reference> <footnote_reference auto="1" ids="footnote-reference-15" refid="footnote-8">7</footnote_reference>), footnote references (manually
numbered <footnote_reference ids="footnote-reference-1" refid="footnote-1">1</footnote_reference>, anonymous auto-numbered <footnote_reference auto="1" ids="footnote-reference-2" refid="footnote-2">3</footnote_reference>, labeled auto-numbered
<footnote_reference auto="1" ids="footnote-reference-3" refid="label">2</footnote_reference>, or symbolic <footnote_reference auto="*" ids="footnote-reference-4" refid="footnote-3">*</footnote_reference>), citation references (see <citation_reference ids="citation-reference-1" refid="cit2002">CIT2002</citation_reference>),
substitution references (<image alt="EXAMPLE" uri="../../../docs/user/rst/images/biohazard.png"></image> &
@@ -751,7 +751,7 @@
<target refid="example"></target>
<paragraph ids="example" names="example">This paragraph is pointed to by the explicit "example" target. A
reference can be found under <reference name="Inline Markup" refid="inline-markup">Inline Markup</reference>, above. <reference name="Inline hyperlink targets" refid="inline-hyperlink-targets">Inline
- hyperlink targets</reference> are also possible.</paragraph>
+ hyperlink targets</reference> are also possible.</paragraph>
<paragraph>Section headers are implicit targets, referred to by name. See
<reference name="Targets" refid="targets">Targets</reference>, which is a subsection of <reference name="Body Elements" refid="body-elements">Body Elements</reference>.</paragraph>
<paragraph>Explicit external targets are interpolated into references such as
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <mi...@us...> - 2024-06-05 10:22:30
|
Revision: 9723
http://sourceforge.net/p/docutils/code/9723
Author: milde
Date: 2024-06-05 10:22:23 +0000 (Wed, 05 Jun 2024)
Log Message:
-----------
Doctree validation: custom exception `nodes.ValidationError`.
The custom `ValidationError` has the additional attribute "problematic
element" that holds the element close to the violation ("self", for
invalid XML attributes and spurious text or a child node that does not
fit in the content model).
Use for warnings with source-code line
in `transforms.universal.Validate`
Revise validation error messages.
Modified Paths:
--------------
trunk/docutils/HISTORY.txt
trunk/docutils/docutils/nodes.py
trunk/docutils/docutils/transforms/universal.py
trunk/docutils/test/test_nodes.py
Modified: trunk/docutils/HISTORY.txt
===================================================================
--- trunk/docutils/HISTORY.txt 2024-05-31 14:42:06 UTC (rev 9722)
+++ trunk/docutils/HISTORY.txt 2024-06-05 10:22:23 UTC (rev 9723)
@@ -24,12 +24,14 @@
* docutils/nodes.py
- - Raise ValueError if the "rawsource" argument in `Element.__init__()`
+ - Raise TypeError if the "rawsource" argument in `Element.__init__()`
is an `Element` instance.
Catches errors like ``nodes.hint(nodes.paragraph())``.
- New element category classes `SubStructural` and `PureTextElement`.
- Fix element categories.
- - New method `Element.validate()` (work in progress).
+ - New method `Element.validate()`: raise `nodes.ValidationError` if
+ the element does not comply with the "Docutils Document Model"
+ (work in progress).
- New "attribute validating functions"
convert string representations to correct data type,
normalize values,
Modified: trunk/docutils/docutils/nodes.py
===================================================================
--- trunk/docutils/docutils/nodes.py 2024-05-31 14:42:06 UTC (rev 9722)
+++ trunk/docutils/docutils/nodes.py 2024-06-05 10:22:23 UTC (rev 9723)
@@ -515,7 +515,7 @@
NOTE: some elements do not set this value (default '').
"""
if isinstance(rawsource, Element):
- raise ValueError('First argument "rawsource" must be a string.')
+ raise TypeError('First argument "rawsource" must be a string.')
self.children = []
"""List of child nodes (elements and/or `Text`)."""
@@ -1120,7 +1120,7 @@
Convert string values to expected datatype.
Normalize values.
- Raise `ValueError` for invalid attributes or attribute values.
+ Raise `ValidationError` for invalid attributes or attribute values.
Provisional.
"""
@@ -1129,7 +1129,7 @@
if key.startswith('internal:'):
continue # see docs/user/config.html#expose-internals
if key not in self.valid_attributes:
- va = ' '.join(self.valid_attributes)
+ va = '", "'.join(self.valid_attributes)
messages.append(f'Attribute "{key}" not one of "{va}".')
continue
try:
@@ -1136,23 +1136,28 @@
self.attributes[key] = ATTRIBUTE_VALIDATORS[key](value)
except (ValueError, TypeError, KeyError) as e:
messages.append(
- f'Attribute "{key}" has invalid value "{value}".\n{e}')
+ f'Attribute "{key}" has invalid value "{value}".\n {e}')
if messages:
- raise ValueError('\n'.join(messages))
+ raise ValidationError(f'Element {self.starttag()} invalid:\n '
+ + '\n '.join(messages),
+ problematic_element=self)
def validate(self):
- """Validate element against the Docutils Document Model ("doctree").
+ """Validate Docutils Document Tree element ("doctree").
- Raise ValueError if there are violations.
+ Raise ValidationError if there are violations.
+ See `The Docutils Document Tree`__ for details of the
+ Docutils Document Model.
+
+ __ https://docutils.sourceforge.io/docs/ref/doctree.html
+
Provisional (work in progress).
"""
+ self.validate_attributes()
+
+ # test number of children
messages = []
- try:
- self.validate_attributes()
- except ValueError as e:
- messages.append(str(e))
- # test number of children
n_min, n_max = self.valid_len
if len(self.children) < n_min:
messages.append(f'Expects at least {n_min} children, '
@@ -1212,7 +1217,7 @@
class SubStructural(SubRoot):
"""`Structural subelements`__ are children of structural elements.
- Most Structural elements accept only some of the SubStructural elements.
+ Most Structural elements accept only specific `SubStructural` elements.
__ https://docutils.sourceforge.io/docs/ref/doctree.html
#structural-subelements
@@ -2518,6 +2523,16 @@
self.parent = self.parent_stack.pop()
+# Custom Exceptions
+# =================
+
+class ValidationError(ValueError):
+ """Invalid Docutils Document Tree Element."""
+ def __init__(self, msg, problematic_element=None):
+ super().__init__(msg)
+ self.problematic_element = problematic_element
+
+
class TreePruningException(Exception):
"""
Base class for `NodeVisitor`-related tree pruning exceptions.
Modified: trunk/docutils/docutils/transforms/universal.py
===================================================================
--- trunk/docutils/docutils/transforms/universal.py 2024-05-31 14:42:06 UTC (rev 9722)
+++ trunk/docutils/docutils/transforms/universal.py 2024-06-05 10:22:23 UTC (rev 9723)
@@ -350,5 +350,11 @@
for node in self.document.findall():
try:
node.validate()
- except ValueError as e:
- self.document.reporter.warning(e.args[0], base_node=node)
+ except nodes.ValidationError as e:
+ self.document.reporter.warning(
+ str(e), base_node=e.problematic_element or node)
+ # TODO: append a link to the Document Tree documentation?
+ # nodes.paragraph('', 'See ',
+ # nodes.reference('', 'doctree.html#document',
+ # refuri='https://docutils.sourceforge.io/'
+ # 'docs/ref/doctree.html#document'),
Modified: trunk/docutils/test/test_nodes.py
===================================================================
--- trunk/docutils/test/test_nodes.py 2024-05-31 14:42:06 UTC (rev 9722)
+++ trunk/docutils/test/test_nodes.py 2024-06-05 10:22:23 UTC (rev 9723)
@@ -469,7 +469,11 @@
with self.assertWarns(DeprecationWarning):
node.set_class('parrot')
+
+class ElementValidationTests(unittest.TestCase):
+
def test_validate(self):
+ """Valid node: validation should simply pass."""
node = nodes.paragraph('', 'plain text', classes='my test classes')
node.append(nodes.emphasis('', 'emphasised text', ids='emphtext'))
node.validate()
@@ -487,15 +491,15 @@
def test_validate_wrong_attribute(self):
node = nodes.paragraph('', 'text', id='test-paragraph')
- with self.assertRaisesRegex(ValueError,
- 'Element <paragraph> invalid:\n'
- ' Attribute "id" not one of "ids '):
+ with self.assertRaisesRegex(nodes.ValidationError,
+ 'Element <paragraph id=.*> invalid:\n'
+ ' Attribute "id" not one of "ids", '):
node.validate()
def test_validate_wrong_attribute_value(self):
node = nodes.image(uri='test.png', width='20 inch') # invalid unit
- with self.assertRaisesRegex(ValueError,
- 'Element <image> invalid:\n'
+ with self.assertRaisesRegex(nodes.ValidationError,
+ 'Element <image.*> invalid:\n'
'.*"width" has invalid value "20 inch".\n'
'.*Valid units: em ex '):
node.validate()
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <mi...@us...> - 2024-06-05 10:29:16
|
Revision: 9727
http://sourceforge.net/p/docutils/code/9727
Author: milde
Date: 2024-06-05 10:29:13 +0000 (Wed, 05 Jun 2024)
Log Message:
-----------
Doctree validation: Drop category class `nodes.SubRoot`.
It was new in Docutils 0.22b.dev and is no longer required.
Modified Paths:
--------------
trunk/docutils/docs/ref/doctree.txt
trunk/docutils/docutils/nodes.py
Modified: trunk/docutils/docs/ref/doctree.txt
===================================================================
--- trunk/docutils/docs/ref/doctree.txt 2024-06-05 10:29:00 UTC (rev 9726)
+++ trunk/docutils/docs/ref/doctree.txt 2024-06-05 10:29:13 UTC (rev 9727)
@@ -180,7 +180,7 @@
:simple: `\<title>`_, `\<subtitle>`_
:compound: `\<decoration>`_, `\<docinfo>`_
-:Docutils classes: ``nodes.SubStructural``, ``nodes.SubRoot``
+:Docutils class: ``nodes.SubStructural``
Decorative Elements
Modified: trunk/docutils/docutils/nodes.py
===================================================================
--- trunk/docutils/docutils/nodes.py 2024-06-05 10:29:00 UTC (rev 9726)
+++ trunk/docutils/docutils/nodes.py 2024-06-05 10:29:13 UTC (rev 9727)
@@ -1200,11 +1200,7 @@
"""
-class SubRoot:
- """Elements that may only be children of the root element."""
-
-
-class SubStructural(SubRoot):
+class SubStructural:
"""`Structural subelements`__ are children of `Structural` elements.
Most Structural elements accept only specific `SubStructural` elements.
@@ -1360,7 +1356,7 @@
# Meta-Data Element
# ==================
-class meta(PreBibliographic, SubRoot, Element):
+class meta(PreBibliographic, SubStructural, Element):
"""Container for "invisible" bibliographic data, or meta-data."""
valid_attributes = Element.valid_attributes + (
'content', 'dir', 'http-equiv', 'lang', 'media', 'name', 'scheme')
@@ -1370,7 +1366,7 @@
# Bibliographic Elements
# ========================
-class docinfo(SubRoot, Element):
+class docinfo(SubStructural, Element):
"""Container for displayed document meta-data."""
content_model = ( # (%bibliographic.elements;)+
(Bibliographic, '+'),)
@@ -1406,7 +1402,7 @@
class footer(Decorative, Element): pass
-class decoration(PreBibliographic, SubRoot, Element):
+class decoration(PreBibliographic, SubStructural, Element):
"""Container for `header` and `footer`."""
content_model = ( # (header?, footer?)
(header, '?'),
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <mi...@us...> - 2024-06-05 12:05:59
|
Revision: 9729
http://sourceforge.net/p/docutils/code/9729
Author: milde
Date: 2024-06-05 12:05:56 +0000 (Wed, 05 Jun 2024)
Log Message:
-----------
Doctree validation: Test and fix recursive validation.
Fixes [r9724].
Modified Paths:
--------------
trunk/docutils/docutils/nodes.py
trunk/docutils/test/test_nodes.py
Modified: trunk/docutils/docutils/nodes.py
===================================================================
--- trunk/docutils/docutils/nodes.py 2024-06-05 10:29:22 UTC (rev 9728)
+++ trunk/docutils/docutils/nodes.py 2024-06-05 12:05:56 UTC (rev 9729)
@@ -398,7 +398,7 @@
def lstrip(self, chars=None):
return self.__class__(str.lstrip(self, chars))
- def validate(self):
+ def validate(self, recursive=True):
pass # Text nodes have no attributes and no children.
@@ -1161,7 +1161,7 @@
if recursive:
for child in self:
- child.validate()
+ child.validate(recursive=recursive)
# ====================
Modified: trunk/docutils/test/test_nodes.py
===================================================================
--- trunk/docutils/test/test_nodes.py 2024-06-05 10:29:22 UTC (rev 9728)
+++ trunk/docutils/test/test_nodes.py 2024-06-05 12:05:56 UTC (rev 9729)
@@ -478,6 +478,16 @@
node.append(nodes.emphasis('', 'emphasised text', ids='emphtext'))
node.validate()
+ def test_validate_invalid_descendent(self):
+ paragraph = nodes.paragraph('', 'plain text')
+ tip = nodes.tip('', paragraph)
+ paragraph.append(nodes.strong('doll', id='missing-es'))
+ tip.validate(recursive=False)
+ with self.assertRaisesRegex(nodes.ValidationError,
+ 'Element <strong id=.*> invalid:\n'
+ ' Attribute "id" not one of "ids", '):
+ tip.validate()
+
def test_validate_attributes(self):
# Convert to expected data-type, normalize values,
# cf. AttributeTypeTests below for attribute validating function tests.
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <mi...@us...> - 2024-06-05 15:54:11
|
Revision: 9730
http://sourceforge.net/p/docutils/code/9730
Author: milde
Date: 2024-06-05 15:54:07 +0000 (Wed, 05 Jun 2024)
Log Message:
-----------
Doctree validation: validate content (part 1, simple models)
Check simple content models (without nested sequences).
Modified Paths:
--------------
trunk/docutils/docutils/nodes.py
trunk/docutils/test/test_nodes.py
trunk/docutils/test/test_writers/test_latex2e.py
Modified: trunk/docutils/docutils/nodes.py
===================================================================
--- trunk/docutils/docutils/nodes.py 2024-06-05 12:05:56 UTC (rev 9729)
+++ trunk/docutils/docutils/nodes.py 2024-06-05 15:54:07 UTC (rev 9730)
@@ -1142,6 +1142,58 @@
+ '\n '.join(messages),
problematic_element=self)
+ def validate_content(self, model=None, elements=None):
+ """Test compliance of `elements` with `model`.
+
+ :model: content model description, default `self.content_model`,
+ :elements: list of doctree elements, default `self.children`.
+
+ Return list of children that do not fit in the model or raise
+ `ValidationError` if the content does not comply with the `model`.
+
+ Provisional.
+ """
+ if model is None:
+ model = self.content_model
+ if elements is None:
+ elements = self.children
+ ichildren = iter(elements)
+ child = next(ichildren, None)
+ for category, quantifier in model:
+ if not isinstance(child, category):
+ if quantifier in ('.', '+'):
+ raise ValidationError(self._report_child(child, category),
+ problematic_element=child)
+ else: # quantifier in ('?', '*') -> optional child
+ continue # try same child with next part of content model
+ # TODO: check additional placement constraints (if applicable)
+ # child.check_position()
+ # advance:
+ if quantifier in ('.', '?'): # go to next element
+ child = next(ichildren, None)
+ else: # if quantifier in ('*', '+'): # pass all matching elements
+ for child in ichildren:
+ if not isinstance(child, category):
+ break
+ else:
+ child = None
+ return [] if child is None else [child, *ichildren]
+
+ def _report_child(self, child, category):
+ # Return a str reporting a missing child or child of wrong category.
+ try:
+ type = category.__name__
+ except AttributeError:
+ type = '> or <'.join(c.__name__ for c in category)
+ msg = f'Element {self.starttag()} invalid:\n'
+ if child is None:
+ return f'{msg} Missing child of type <{type}>.'
+ if isinstance(child, Text):
+ return (f'{msg} Expecting child of type <{type}>, '
+ f'not text data "{child.astext()}".')
+ return (f'{msg} Expecting child of type <{type}>, '
+ f'not {child.starttag()}.')
+
def validate(self, recursive=True):
"""Validate Docutils Document Tree element ("doctree").
@@ -1156,9 +1208,19 @@
Provisional (work in progress).
"""
self.validate_attributes()
-
- # TODO: validate content
+ leftover_childs = self.validate_content()
+ for child in leftover_childs:
+ if isinstance(child, Text):
+ raise ValidationError(f'Element {self.starttag()} invalid:\n'
+ f' Spurious text: "{child.astext()}".',
+ problematic_element=self)
+ else:
+ raise ValidationError(f'Element {self.starttag()} invalid:\n'
+ f' Child element {child.starttag()} '
+ 'not allowed at this position.',
+ problematic_element=child)
+
if recursive:
for child in self:
child.validate(recursive=recursive)
Modified: trunk/docutils/test/test_nodes.py
===================================================================
--- trunk/docutils/test/test_nodes.py 2024-06-05 12:05:56 UTC (rev 9729)
+++ trunk/docutils/test/test_nodes.py 2024-06-05 15:54:07 UTC (rev 9730)
@@ -514,7 +514,262 @@
'.*Valid units: em ex '):
node.validate()
+ def test_validate_spurious_element(self):
+ label = nodes.label('', '*')
+ label.append(nodes.strong())
+ with self.assertRaisesRegex(nodes.ValidationError,
+ 'Element <label> invalid:\n'
+ ' Child element <strong> not allowed '):
+ label.validate()
+ def test_validate_content(self):
+ """Check, whether an element's children fit into its content model.
+
+ Return empty lists for valid elements,
+ lists with warnings and spurious children if children don't match.
+ """
+ # sample elements
+ inline = nodes.inline() # inline element
+ text = nodes.Text('explanation') # <#text>
+ hint = nodes.hint() # body element
+
+ # empty element: (EMPTY)
+ image = nodes.image('')
+ self.assertEqual(image.validate_content(), [])
+ image.append(text)
+ self.assertEqual(image.validate_content(), [text])
+ # ValueError, "Spurious Element <#text: 'explanation'>"):
+
+ # TextElement: (#PCDATA | %inline.elements;)*
+ paragraph = nodes.paragraph() # empty element
+ self.assertEqual(paragraph.validate_content(), [])
+ paragraph = nodes.paragraph('', 'text') # just text
+ self.assertEqual(paragraph.validate_content(), [])
+ paragraph.extend([inline, nodes.Text('text 2'), nodes.math()])
+ self.assertEqual(paragraph.validate_content(), [])
+ paragraph.append(hint) # body element (sic!)
+ paragraph.append(text)
+ self.assertEqual(paragraph.validate_content(), [hint, text])
+ # validate() reports "relics" as ValueError:
+ with self.assertRaisesRegex(nodes.ValidationError,
+ '<paragraph> invalid:\n'
+ ' Child element <hint> not allowed '):
+ paragraph.validate()
+
+ # PureTextElement: (#PCDATA)
+ label = nodes.label() # empty element
+ self.assertEqual(label.validate_content(), [])
+ label = nodes.label('', '†')
+ self.assertEqual(label.validate_content(), [])
+ label.append(inline) # sic!
+ self.assertEqual(label.validate_content(), [inline])
+
+ # docinfo: (%bibliographic.elements;)+
+ docinfo = nodes.docinfo() # empty element (sic!)
+ with self.assertRaisesRegex(nodes.ValidationError,
+ 'Missing child of type <Bibliographic>.'):
+ docinfo.validate_content()
+ docinfo.append(nodes.paragraph())
+ with self.assertRaisesRegex(nodes.ValidationError,
+ 'Expecting .* <Bibliographic>, not '):
+ docinfo.validate_content()
+ docinfo = nodes.docinfo('', nodes.authors(), nodes.contact())
+ self.assertEqual(docinfo.validate_content(), [])
+ docinfo.append(hint) # sic!
+ self.assertEqual(docinfo.validate_content(), [hint])
+
+ # decoration: (header?, footer?)
+ decoration = nodes.decoration() # empty element
+ self.assertEqual(decoration.validate_content(), [])
+ decoration = nodes.decoration('', nodes.header(), nodes.footer())
+ self.assertEqual(decoration.validate_content(), [])
+ header = nodes.header()
+ decoration.append(header) # 3rd element (sic!)
+ self.assertEqual(decoration.validate_content(), [header])
+ decoration = nodes.decoration('', nodes.footer())
+ self.assertEqual(decoration.validate_content(), [])
+ decoration.append(header) # wrong order!
+ self.assertEqual(decoration.validate_content(), [header])
+
+ # Body elements have a range of different content models.
+
+ # container: (%body.elements;)+
+ container = nodes.container() # empty (sic!)
+ with self.assertRaisesRegex(nodes.ValidationError,
+ 'Missing child of type <Body>.'):
+ container.validate_content()
+ container.append(inline) # sic!
+ with self.assertRaisesRegex(nodes.ValidationError,
+ 'Expecting child of type <Body>, not <in'):
+ container.validate_content()
+ container = nodes.container('', nodes.paragraph()) # one body element
+ self.assertEqual(container.validate_content(), []) # valid
+ container.append(nodes.tip()) # more body elements
+ self.assertEqual(container.validate_content(), []) # valid
+ container.append(inline) # sic!
+ self.assertEqual(container.validate_content(), [inline])
+
+ # block_quote: ((%body.elements;)+, attribution?)
+ block_quote = nodes.block_quote('', hint, nodes.table())
+ self.assertEqual(block_quote.validate_content(), [])
+ block_quote.append(nodes.attribution())
+ self.assertEqual(block_quote.validate_content(), [])
+ block_quote.append(hint) # element after attribution (sic!)
+ self.assertEqual(block_quote.validate_content(), [hint])
+
+ # list item (%body.elements;)*
+ list_item = nodes.list_item() # empty list item is valid
+ self.assertEqual(list_item.validate_content(), [])
+ list_item.append(nodes.bullet_list()) # lists may be nested
+ list_item.append(paragraph)
+ self.assertEqual(list_item.validate_content(), [])
+ list_item.append(inline) # sic!
+ self.assertEqual(list_item.validate_content(), [inline])
+
+ # bullet_list, enumerated_list: (list_item+)
+ bullet_list = nodes.bullet_list() # empty (sic!)
+ with self.assertRaisesRegex(nodes.ValidationError,
+ 'Missing child of type <list_item>.'):
+ bullet_list.validate_content()
+ bullet_list.extend([list_item, list_item, list_item])
+ self.assertEqual(bullet_list.validate_content(), [])
+ bullet_list.append(hint) # must nest in <list_item>
+ self.assertEqual(bullet_list.validate_content(), [hint])
+
+ # definition_list_item: (term, classifier*, definition)
+ definition_list_item = nodes.definition_list_item()
+ with self.assertRaisesRegex(nodes.ValidationError,
+ 'Element <definition_list_item> invalid:\n'
+ ' Missing child of type <term>.'):
+ definition_list_item.validate_content(),
+ definition_list_item.append(nodes.term())
+ definition_list_item.append(nodes.definition())
+ self.assertEqual(definition_list_item.validate_content(), [])
+ definition_list_item.children.insert(1, nodes.classifier())
+ definition_list_item.children.insert(1, nodes.classifier())
+ self.assertEqual(definition_list_item.validate_content(), [])
+
+ # field: (field_name, field_body)
+ field = nodes.field()
+ with self.assertRaisesRegex(nodes.ValidationError,
+ 'Missing child of type <field_name>.'):
+ field.validate_content()
+ field.extend([nodes.field_name(), nodes.field_body()])
+ self.assertEqual(field.validate_content(), [])
+ field = nodes.field('', nodes.field_body(), nodes.field_name())
+ # wrong order!
+ with self.assertRaisesRegex(nodes.ValidationError,
+ 'Expecting child of type <field_name>,'
+ ' not <field_body>.'):
+ field.validate_content()
+
+ # option: (option_string, option_argument*)
+ option = nodes.option()
+ with self.assertRaisesRegex(nodes.ValidationError,
+ 'Missing child of type <option_string>.'):
+ option.validate_content()
+ option.append(nodes.paragraph()) # sic!
+ with self.assertRaisesRegex(nodes.ValidationError,
+ 'Expecting child of type <option_string>,'
+ ' not <paragraph>.'):
+ option.validate_content()
+ option = nodes.option('', nodes.option_string())
+ self.assertEqual(option.validate_content(), [])
+ option.append(nodes.option_argument())
+ self.assertEqual(option.validate_content(), [])
+
+ # line_block: (line | line_block)+
+ line_block = nodes.line_block() # sic!
+ with self.assertRaisesRegex(nodes.ValidationError,
+ ' child of type <line> or <line_block>.'):
+ line_block.validate_content()
+ line_block.append(nodes.line_block())
+ self.assertEqual(line_block.validate_content(), [])
+ line_block = nodes.line_block('', nodes.paragraph(), nodes.line())
+ with self.assertRaisesRegex(nodes.ValidationError,
+ 'Expecting child of type <line> or '
+ '<line_block>, not <paragraph>.'):
+ line_block.validate_content()
+
+ # admonition: (title, (%body.elements;)+)
+ admonition = nodes.admonition('', nodes.paragraph())
+ with self.assertRaisesRegex(nodes.ValidationError,
+ 'Expecting child of type <title>,'
+ ' not <paragraph>.'):
+ admonition.validate_content()
+ admonition = nodes.admonition('', nodes.title(), nodes.paragraph())
+ self.assertEqual(admonition.validate_content(), [])
+
+ # specific admonitions: (%body.elements;)+
+ note = nodes.note()
+ with self.assertRaisesRegex(nodes.ValidationError,
+ 'Missing child of type <Body>.'):
+ note.validate_content()
+ note.append(nodes.enumerated_list())
+ self.assertEqual(note.validate_content(), [])
+
+ # footnote: (label?, (%body.elements;)+)
+ # TODO: use case for footnote without label (make it required?)
+ # rST parser can generate footnotes without body elements!
+ footnote = nodes.footnote('', hint)
+ self.assertEqual(footnote.validate_content(), [])
+
+ # citation: (label, (%body.elements;)+)
+ # TODO: rST parser allows empty citation
+ # (see test_rst/test_citations.py). Is this sensible?
+ citation = nodes.citation('', hint)
+ with self.assertRaisesRegex(nodes.ValidationError,
+ 'Expecting child of type <label>,'
+ ' not <hint>.'):
+ citation.validate_content()
+
+ # Table group: (colspec*, thead?, tbody)
+ tgroup = nodes.tgroup() # empty (sic!)
+ with self.assertRaisesRegex(nodes.ValidationError,
+ 'Missing child of type <tbody>.'):
+ tgroup.validate_content()
+ tgroup = nodes.tgroup('', nodes.colspec(), nodes.colspec(),
+ nodes.thead(), nodes.tbody())
+ self.assertEqual(tgroup.validate_content(), [])
+ thead = nodes.thead()
+ tgroup = nodes.tgroup('', nodes.tbody(), thead) # wrong order!
+ self.assertEqual(tgroup.validate_content(), [thead])
+
+ def test_validate_content_authors(self):
+ """Return empty list for valid elements, raise ValidationError else.
+
+ Specific method for `authors` instances: complex content model
+ requires repeated application of `authors.content_model`.
+ """
+ authors = nodes.authors()
+ with self.assertRaisesRegex(nodes.ValidationError,
+ 'Missing child of type <author>.'):
+ authors.validate_content()
+ authors.extend([nodes.author(), nodes.address(), nodes.contact()])
+ self.assertEqual(authors.validate_content(), [])
+ # TODO: check content model again, with next set of elements
+
+ def test_validate_content_subtitle(self):
+ """<subtitle> must follow a <title>.
+ """
+ subtitle = nodes.subtitle()
+ paragraph = nodes.paragraph()
+ sidebar = nodes.sidebar('', subtitle, paragraph)
+ # TODO additional restriction "only after title"
+ sidebar.validate_content()
+
+ def test_validate_content_transition(self):
+ """Test additional constraints on <transition> placement:
+ Not at begin or end of a section or document,
+ not after another transition.
+ """
+ transition = nodes.transition()
+ paragraph = nodes.paragraph()
+ section = nodes.section('', nodes.title(), transition, paragraph)
+ # TODO: additional restrictions on transition placement.
+ section.validate_content()
+
+
class MiscTests(unittest.TestCase):
def test_node_class_names(self):
Modified: trunk/docutils/test/test_writers/test_latex2e.py
===================================================================
--- trunk/docutils/test/test_writers/test_latex2e.py 2024-06-05 12:05:56 UTC (rev 9729)
+++ trunk/docutils/test/test_writers/test_latex2e.py 2024-06-05 15:54:07 UTC (rev 9730)
@@ -55,7 +55,9 @@
self.assertEqual(expected, output)
def test_defaults(self):
- self.run_samples(samples_default, self.settings)
+ settings = self.settings.copy()
+ settings['validate'] = False # we test an invalid footnote
+ self.run_samples(samples_default, settings)
def test_docutils_toc(self):
settings = self.settings.copy()
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <mi...@us...> - 2024-06-05 15:54:19
|
Revision: 9731
http://sourceforge.net/p/docutils/code/9731
Author: milde
Date: 2024-06-05 15:54:16 +0000 (Wed, 05 Jun 2024)
Log Message:
-----------
Doctree validation: fix check of `authors` content.
The `authors` element has a complex content model that cannot be
represented with the data-structure used in `Element.content_model`:
(author, organization?, address?, contact?)+
Use specific method in `nodes.authors` to repeatedly test a
simplified content model description until all children are handled.
Modified Paths:
--------------
trunk/docutils/docutils/nodes.py
trunk/docutils/test/test_nodes.py
Modified: trunk/docutils/docutils/nodes.py
===================================================================
--- trunk/docutils/docutils/nodes.py 2024-06-05 15:54:07 UTC (rev 9730)
+++ trunk/docutils/docutils/nodes.py 2024-06-05 15:54:16 UTC (rev 9731)
@@ -1901,7 +1901,17 @@
(address, '?'),
(contact, '?'))
+ def validate_content(self):
+ """Repeatedly test for children matching the content model.
+ Provisional.
+ """
+ relics = super().validate_content()
+ while relics:
+ relics = super().validate_content(elements=relics)
+ return relics
+
+
# Body Elements
# =============
#
Modified: trunk/docutils/test/test_nodes.py
===================================================================
--- trunk/docutils/test/test_nodes.py 2024-06-05 15:54:07 UTC (rev 9730)
+++ trunk/docutils/test/test_nodes.py 2024-06-05 15:54:16 UTC (rev 9731)
@@ -747,7 +747,14 @@
authors.validate_content()
authors.extend([nodes.author(), nodes.address(), nodes.contact()])
self.assertEqual(authors.validate_content(), [])
- # TODO: check content model again, with next set of elements
+ authors.append(nodes.hint())
+ with self.assertRaisesRegex(nodes.ValidationError,
+ ' child of type <author>, not <hint>.'):
+ authors.validate_content()
+ authors.extend([nodes.author(), nodes.tip(), nodes.contact()])
+ with self.assertRaisesRegex(nodes.ValidationError,
+ ' child of type <author>, not <hint>.'):
+ authors.validate_content()
def test_validate_content_subtitle(self):
"""<subtitle> must follow a <title>.
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <mi...@us...> - 2024-06-05 15:54:32
|
Revision: 9732
http://sourceforge.net/p/docutils/code/9732
Author: milde
Date: 2024-06-05 15:54:27 +0000 (Wed, 05 Jun 2024)
Log Message:
-----------
Doctree validation: additional checks for `subtitle` and `transition` position.
A `subtitle` must not be used without preceding `title`.
A `transition` must not be used at the start or end of a section or document,
or after another transition.
Modified Paths:
--------------
trunk/docutils/HISTORY.txt
trunk/docutils/docutils/nodes.py
trunk/docutils/test/test_nodes.py
Modified: trunk/docutils/HISTORY.txt
===================================================================
--- trunk/docutils/HISTORY.txt 2024-06-05 15:54:16 UTC (rev 9731)
+++ trunk/docutils/HISTORY.txt 2024-06-05 15:54:27 UTC (rev 9732)
@@ -30,8 +30,8 @@
- New element category classes `SubStructural` and `PureTextElement`.
- Fix element categories.
- New method `Element.validate()`: raise `nodes.ValidationError` if
- the element does not comply with the "Docutils Document Model"
- (work in progress).
+ the element does not comply with the "Docutils Document Model".
+ Provisional.
- New "attribute validating functions"
convert string representations to correct data type,
normalize values,
Modified: trunk/docutils/docutils/nodes.py
===================================================================
--- trunk/docutils/docutils/nodes.py 2024-06-05 15:54:16 UTC (rev 9731)
+++ trunk/docutils/docutils/nodes.py 2024-06-05 15:54:27 UTC (rev 9732)
@@ -399,9 +399,14 @@
return self.__class__(str.lstrip(self, chars))
def validate(self, recursive=True):
- pass # Text nodes have no attributes and no children.
+ """Validate Docutils Document Tree element ("doctree")."""
+ # Text nodes have no attributes and no children.
+ def check_position(self):
+ """Hook for additional checks of the parent's content model."""
+ # no special placement requirements for Text nodes
+
class Element(Node):
"""
`Element` is the superclass to all specific elements.
@@ -1166,8 +1171,9 @@
problematic_element=child)
else: # quantifier in ('?', '*') -> optional child
continue # try same child with next part of content model
- # TODO: check additional placement constraints (if applicable)
- # child.check_position()
+ else:
+ # Check additional placement constraints (if applicable):
+ child.check_position()
# advance:
if quantifier in ('.', '?'): # go to next element
child = next(ichildren, None)
@@ -1175,6 +1181,7 @@
for child in ichildren:
if not isinstance(child, category):
break
+ child.check_position()
else:
child = None
return [] if child is None else [child, *ichildren]
@@ -1194,6 +1201,13 @@
return (f'{msg} Expecting child of type <{type}>, '
f'not {child.starttag()}.')
+ def check_position(self):
+ """Hook for additional checks of the parent's content model.
+
+ Raise ValidationError, if `self` is at an invalid position.
+ See `subtitle.check_position()` and `transition.check_position()`.
+ """
+
def validate(self, recursive=True):
"""Validate Docutils Document Tree element ("doctree").
@@ -1420,9 +1434,17 @@
valid_attributes = Element.valid_attributes + ('auto', 'refid')
-class subtitle(Titular, PreBibliographic, SubStructural, TextElement): pass
+class subtitle(Titular, PreBibliographic, SubStructural, TextElement):
+ """Sub-title of `document`, `section` and `sidebar`."""
+ def check_position(self):
+ """Check position of subtitle: must follow a title."""
+ if self.parent and self.parent.index(self) == 0:
+ raise ValidationError(f'Element {self.parent.starttag()} invalid:'
+ '\n <subtitle> only allowed after <title>.',
+ problematic_element=self)
+
class meta(PreBibliographic, SubStructural, Element):
"""Container for "invisible" bibliographic data, or meta-data."""
valid_attributes = Element.valid_attributes + (
@@ -1453,13 +1475,36 @@
class transition(SubStructural, Element):
- """Transitions are breaks between untitled text parts.
+ """Transitions__ are breaks between untitled text parts.
- A transition may not begin or end a section or document, nor may two
- transitions be immediately adjacent.
+ __ https://docutils.sourceforge.io/docs/ref/doctree.html#transition
"""
+ def check_position(self):
+ """Check additional constraints on `transition` placement.
+ A transition may not begin or end a section or document,
+ nor may two transitions be immediately adjacent.
+ """
+ messages = [f'Element {self.parent.starttag()} invalid:']
+ predecessor = self.previous_sibling()
+ if (predecessor is None # index == 0
+ or isinstance(predecessor, (title, subtitle, meta, decoration))
+ # A transition following these elements still counts as
+ # "at the beginning of a document or section".
+ ):
+ messages.append(
+ '<transition> may not begin a section or document.')
+ if self.parent.index(self) == len(self.parent) - 1:
+ messages.append('<transition> may not end a section or document.')
+ if isinstance(predecessor, transition):
+ messages.append(
+ '<transition> may not directly follow another transition.')
+ if len(messages) > 1:
+ raise ValidationError('\n '.join(messages),
+ problematic_element=self)
+
+
# Structural Elements
# ===================
@@ -1486,11 +1531,15 @@
content_model = ( # ((title, subtitle?)?, (%body.elements; | topic)+)
(title, '?'),
(subtitle, '?'),
- ((topic, Body), '+')) # TODO complex model
+ ((topic, Body), '+'))
+ # "subtitle only after title" is ensured in `subtitle.check_position()`.
class section(Structural, Element):
- """Document section. The main unit of hierarchy."""
+ """Document section__. The main unit of hierarchy.
+
+ __ https://docutils.sourceforge.io/docs/ref/doctree.html#section
+ """
# recursive content model, see below
@@ -1499,7 +1548,8 @@
(subtitle, '?'),
((Body, topic, sidebar, transition), '*'),
((section, transition), '*'),
- ) # TODO complex model
+ )
+# Correct transition placement is ensured in `transition.check_position()`.
# Root Element
@@ -1527,7 +1577,7 @@
((Body, topic, sidebar, transition), '*'),
((section, transition), '*'),
)
- # additional restrictions for `subtitle` and `transition` will be tested
+ # Additional restrictions for `subtitle` and `transition` are tested
# with the respective `check_position()` methods.
def __init__(self, settings, reporter, *args, **kwargs):
Modified: trunk/docutils/test/test_nodes.py
===================================================================
--- trunk/docutils/test/test_nodes.py 2024-06-05 15:54:16 UTC (rev 9731)
+++ trunk/docutils/test/test_nodes.py 2024-06-05 15:54:27 UTC (rev 9732)
@@ -762,8 +762,9 @@
subtitle = nodes.subtitle()
paragraph = nodes.paragraph()
sidebar = nodes.sidebar('', subtitle, paragraph)
- # TODO additional restriction "only after title"
- sidebar.validate_content()
+ with self.assertRaisesRegex(nodes.ValidationError,
+ '<subtitle> only allowed after <title>.'):
+ sidebar.validate_content()
def test_validate_content_transition(self):
"""Test additional constraints on <transition> placement:
@@ -773,8 +774,20 @@
transition = nodes.transition()
paragraph = nodes.paragraph()
section = nodes.section('', nodes.title(), transition, paragraph)
- # TODO: additional restrictions on transition placement.
- section.validate_content()
+ with self.assertRaisesRegex(nodes.ValidationError,
+ '<transition> may not begin a section '):
+ section.validate_content()
+ section = nodes.section('', nodes.title(), paragraph, transition)
+ with self.assertRaisesRegex(nodes.ValidationError,
+ '<transition> may not end a section '):
+ section.validate_content()
+ section = nodes.section('', nodes.title(), paragraph,
+ nodes.transition(), transition)
+ with self.assertRaisesRegex(nodes.ValidationError,
+ 'Element <section> invalid:\n'
+ ' <transition> may not end .*\n'
+ ' <transition> may not directly '):
+ section.validate_content()
class MiscTests(unittest.TestCase):
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <mi...@us...> - 2024-06-06 14:01:25
|
Revision: 9733
http://sourceforge.net/p/docutils/code/9733
Author: milde
Date: 2024-06-06 14:01:22 +0000 (Thu, 06 Jun 2024)
Log Message:
-----------
Start development of a new "xml" parser for Docutils XML.
First draft implementation, documentation update, and tests.
Modified Paths:
--------------
trunk/docutils/HISTORY.txt
trunk/docutils/docs/ref/rst/directives.txt
trunk/docutils/docs/user/config.txt
trunk/docutils/docutils/parsers/__init__.py
trunk/docutils/test/test_parsers/test_get_parser_class.py
Added Paths:
-----------
trunk/docutils/docutils/parsers/docutils_xml.py
trunk/docutils/test/test_parsers/test_docutils_xml/
trunk/docutils/test/test_parsers/test_docutils_xml/__init__.py
trunk/docutils/test/test_parsers/test_docutils_xml/test_parse.py
Modified: trunk/docutils/HISTORY.txt
===================================================================
--- trunk/docutils/HISTORY.txt 2024-06-05 15:54:27 UTC (rev 9732)
+++ trunk/docutils/HISTORY.txt 2024-06-06 14:01:22 UTC (rev 9733)
@@ -37,6 +37,10 @@
normalize values,
raise ValueError for invalid attribute names or values.
+* docutils/parsers/docutils_xml.py
+
+ - New parser for Docutils XML sources. Provisional.
+
* docutils/parsers/recommonmark_wrapper.py
- New method `Parser.finish_parse()` to clean up (before validating).
@@ -1019,8 +1023,10 @@
* docutils/writers/pseudoxml.py
- - New option `detailled`_.
+ - New option `detailled`__.
+ __ detailed_
+
* test/DocutilsTestSupport.py
- Run python3 test like python2 against source not the build/-directory
Modified: trunk/docutils/docs/ref/rst/directives.txt
===================================================================
--- trunk/docutils/docs/ref/rst/directives.txt 2024-06-05 15:54:27 UTC (rev 9732)
+++ trunk/docutils/docs/ref/rst/directives.txt 2024-06-06 14:01:22 UTC (rev 9733)
@@ -1599,6 +1599,8 @@
``parser`` : text_ (parser name)
Parse the included content with the specified parser.
+ See the `"parser" configuration setting`_ for available parsers.
+
(New in Docutils 0.17)
``start-after`` : text_
@@ -2249,6 +2251,7 @@
.. _image_loading: ../../user/config.html#image-loading
.. _input_encoding: ../../user/config.html#input-encoding
.. _math_output: ../../user/config.html#math-output
+.. _"parser" configuration setting: ../../user/config.html#parser
.. _raw_enabled: ../../user/config.html#raw-enabled
.. _root_prefix: ../../user/config.html#root-prefix
.. _sectnum_xform: ../../user/config.html#sectnum-xform
Modified: trunk/docutils/docs/user/config.txt
===================================================================
--- trunk/docutils/docs/user/config.txt 2024-06-05 15:54:27 UTC (rev 9732)
+++ trunk/docutils/docs/user/config.txt 2024-06-06 14:01:22 UTC (rev 9733)
@@ -935,12 +935,11 @@
and it is left if the reference style is "brackets".
-.. _myst:
-
[myst parser]
-------------
-Provided by the 3rd party package `myst-docutils`_.
+Parser for Markdown (CommonMark_) with rST-compatibility extensions
+provided by the 3rd party package `myst-docutils`_.
See `MyST with Docutils`_ and MyST's `Sphinx configuration options`_
(some settings are not applicable with Docutils).
@@ -951,32 +950,49 @@
https://myst-parser.readthedocs.io/en/latest/sphinx/reference.html#sphinx-config-options
-.. _pycmark:
-
[pycmark parser]
----------------
-Provided by the 3rd party package `pycmark`__.
+Parser for Markdown (CommonMark_)
+provided by the 3rd party package `pycmark`_.
Currently no configuration settings.
-__ https://pypi.org/project/pycmark/
+.. _pycmark: https://pypi.org/project/pycmark/
-.. _recommonmark:
-
[recommonmark parser]
---------------------
+Parser for Markdown (CommonMark_)
+provided by the 3rd party package recommonmark_.
+
.. admonition:: Deprecated
- Depends on deprecated 3rd-party package recommonmark__.
+ Depends on deprecated 3rd-party package recommonmark_.
Support will be removed in Docutils 1.0.
Currently no configuration settings.
-__ https://pypi.org/project/recommonmark/
+.. _recommonmark: https://pypi.org/project/recommonmark/
+[xml parser]
+------------
+
+The `Docutils XML parser` processes an XML representation of a
+`Docutils Document Tree`_
+(e.g. the output of the `Docutils XML writer <[docutils_xml writer]_>`__).
+
+New in Docutils 0.22
+
+Parser Specific Defaults
+~~~~~~~~~~~~~~~~~~~~~~~~
+.. class:: run-in narrow
+
+:doctitle_xform_: False.
+:validate_: True.
+
+
[readers]
=========
@@ -2285,15 +2301,17 @@
parser
~~~~~~
Parser component name.
-Either "rst" (default) or the import name of a plug-in parser module.
+Either "`rst <[restructuredtext parser]_>`__" (default),
+"`xml <[xml parser]_>`__", or the import name of a plug-in parser module.
-Parsers for CommonMark_ known to work with Docutils include "pycmark_",
-"myst_", and "recommonmark_".
+Parsers for CommonMark_ known to work with Docutils include
+"`pycmark <[pycmark parser]_>`__", "`myst <[myst parser]_>`__",
+and "`recommonmark <[recommonmark parser]_>`__".
*Default*: "rst".
*Option*: ``--parser``
-.. _CommonMark: https://spec.commonmark.org/0.30/
+.. _CommonMark: https://spec.commonmark.org/current/
.. _writer [docutils application]:
@@ -2417,6 +2435,7 @@
.. References
+.. _Docutils Document Tree:
.. _Document Tree: ../ref/doctree.html
.. _Docutils Runtime Settings:
Modified: trunk/docutils/docutils/parsers/__init__.py
===================================================================
--- trunk/docutils/docutils/parsers/__init__.py 2024-06-05 15:54:27 UTC (rev 9732)
+++ trunk/docutils/docutils/parsers/__init__.py 2024-06-06 14:01:22 UTC (rev 9733)
@@ -82,6 +82,9 @@
'rest': 'docutils.parsers.rst',
'restx': 'docutils.parsers.rst',
'rtxt': 'docutils.parsers.rst',
+ # Docutils XML
+ 'docutils_xml': 'docutils.parsers.docutils_xml',
+ 'xml': 'docutils.parsers.docutils_xml',
# 3rd-party Markdown parsers
'recommonmark': 'docutils.parsers.recommonmark_wrapper',
'myst': 'myst_parser.docutils_',
Added: trunk/docutils/docutils/parsers/docutils_xml.py
===================================================================
--- trunk/docutils/docutils/parsers/docutils_xml.py (rev 0)
+++ trunk/docutils/docutils/parsers/docutils_xml.py 2024-06-06 14:01:22 UTC (rev 9733)
@@ -0,0 +1,104 @@
+#! /usr/bin/env python3
+# :Copyright: © 2024 Günter Milde.
+# :License: Released under the terms of the `2-Clause BSD license`_, in short:
+#
+# Copying and distribution of this file, with or without modification,
+# are permitted in any medium without royalty provided the copyright
+# notice and this notice are preserved.
+# This file is offered as-is, without any warranty.
+#
+# .. _2-Clause BSD license: https://opensource.org/licenses/BSD-2-Clause
+#
+# Revision: $Revision$
+# Date: $Date$
+
+"""A Docutils-XML parser.
+
+ Provisional: The API is not fixed yet.
+ Defined objects may be renamed or changed in any Docutils release
+ without prior notice.
+"""
+
+import xml.etree.ElementTree as ET
+
+from docutils import nodes, parsers
+
+
+class Parser(parsers.Parser):
+
+ """A Docutils-XML parser."""
+
+ supported = ('xml', 'docutils-xml')
+ """Aliases this parser supports."""
+
+ config_section = 'xml parser'
+ config_section_dependencies = ('parsers',)
+ settings_default_overrides = {'doctitle_xform': False,
+ 'validate': True,
+ }
+
+ def parse(self, inputstring, document):
+ """
+ Parse `inputstring` and populate `document`, a "document tree".
+
+ Provisional.
+ """
+ self.setup_parse(inputstring, document)
+
+ # get ElementTree
+ root = ET.fromstring(inputstring)
+ # convert ElementTree to Docutils Document Tree
+ if root.tag == 'document':
+ convert_attribs(document, root.attrib)
+ for element in root:
+ document.append(element2node(element))
+ else:
+ document.append(element2node(root))
+
+ self.finish_parse()
+
+
+def element2node(element):
+ """
+ Convert an `etree` element and its children to Docutils doctree nodes.
+
+ Return a `docutils.nodes.Element` instance.
+
+ Internal.
+ """
+ # Get the corresponding `nodes.Element` instance:
+ nodeclass = getattr(nodes, element.tag)
+ node = nodeclass()
+
+ # Attributes: convert and add to `node.attributes`.
+ convert_attribs(node, element.attrib)
+
+ # Append text (wrapped in a `nodes.Text` instance)
+ append_text(node, element.text)
+
+ # Append children and their tailing text
+ for child in element:
+ node.append(element2node(child))
+ # Text after a child node
+ append_text(node, child.tail)
+
+ return node
+
+
+def convert_attribs(node, a):
+ # Convert doctree element attribute values from string to their datatype,
+ for key, value in a.items():
+ if key.startswith('{'):
+ continue # skip duplicate attributes with namespace URL
+ node.attributes[key] = nodes.ATTRIBUTE_VALIDATORS[key](value)
+
+
+def append_text(node, text):
+ if not text:
+ return
+ if isinstance(node, nodes.TextElement):
+ node.append(nodes.Text(text))
+ elif text.strip():
+ # no TextElement: ignore formatting whitespace
+ # but append other text (node becomes invalid!)
+ node.append(nodes.Text(text.strip()))
Property changes on: trunk/docutils/docutils/parsers/docutils_xml.py
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+Author Date Id Revision
\ No newline at end of property
Added: trunk/docutils/test/test_parsers/test_docutils_xml/__init__.py
===================================================================
--- trunk/docutils/test/test_parsers/test_docutils_xml/__init__.py (rev 0)
+++ trunk/docutils/test/test_parsers/test_docutils_xml/__init__.py 2024-06-06 14:01:22 UTC (rev 9733)
@@ -0,0 +1 @@
+"""Tests for `docutils.parsers.docutils_xml`."""
Property changes on: trunk/docutils/test/test_parsers/test_docutils_xml/__init__.py
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+Author Date Id Revision
\ No newline at end of property
Added: trunk/docutils/test/test_parsers/test_docutils_xml/test_parse.py
===================================================================
--- trunk/docutils/test/test_parsers/test_docutils_xml/test_parse.py (rev 0)
+++ trunk/docutils/test/test_parsers/test_docutils_xml/test_parse.py 2024-06-06 14:01:22 UTC (rev 9733)
@@ -0,0 +1,136 @@
+#!/usr/bin/env python3
+# :Copyright: © 2024 Günter Milde.
+# :License: Released under the terms of the `2-Clause BSD license`_, in short:
+#
+# Copying and distribution of this file, with or without modification,
+# are permitted in any medium without royalty provided the copyright
+# notice and this notice are preserved.
+# This file is offered as-is, without any warranty.
+#
+# .. _2-Clause BSD license: https://opensource.org/licenses/BSD-2-Clause
+
+"""Tests for `docutils.parsers.docutils_xml.Parser.parse()`."""
+
+from pathlib import Path
+import sys
+import unittest
+
+if __name__ == '__main__':
+ # prepend the "docutils root" to the Python library path
+ # so we import the local `docutils` package.
+ sys.path.insert(0, str(Path(__file__).resolve().parents[3]))
+
+from docutils.frontend import get_default_settings
+from docutils.parsers import docutils_xml
+from docutils.utils import new_document
+
+parser = docutils_xml.Parser()
+
+
+class XmlParserTestCase(unittest.TestCase):
+ def test_parser(self):
+ settings = get_default_settings(docutils_xml.Parser)
+ # settings.warning_stream = ''
+ for name, cases in totest.items():
+ for casenum, (case_input, case_expected) in enumerate(cases):
+ with self.subTest(id=f'totest[{name!r}][{casenum}]'):
+ document = new_document('test data', settings.copy())
+ parser.parse(case_input, document)
+ output = document.pformat()
+ self.assertEqual(case_expected, output)
+
+
+totest = {}
+
+totest['simple'] = [
+["""\
+<document>
+ <paragraph>A paragraph.</paragraph>
+</document>
+""",
+"""\
+<document source="test data">
+ <paragraph>
+ A paragraph.
+"""],
+["""\
+<document source="test sample">
+ <paragraph>A paragraph
+ with two lines.</paragraph>
+</document>
+""",
+# TODO: unindent second line
+"""\
+<document source="test sample">
+ <paragraph>
+ A paragraph
+ with two lines.
+"""],
+["""\
+<document>
+ <paragraph>Paragraph 1.</paragraph>
+ <paragraph>Paragraph 2.</paragraph>
+</document>
+""",
+"""\
+<document source="test data">
+ <paragraph>
+ Paragraph 1.
+ <paragraph>
+ Paragraph 2.
+"""],
+["""\
+<paragraph>Sub-trees are parsed, too.
+Line breaks are preserved.</paragraph>
+""",
+"""\
+<document source="test data">
+ <paragraph>
+ Sub-trees are parsed, too.
+ Line breaks are preserved.
+"""],
+]
+
+totest['inline'] = [
+["""\
+<paragraph><emphasis>emphatically</emphasis></paragraph>
+""",
+"""\
+<document source="test data">
+ <paragraph>
+ <emphasis>
+ emphatically
+"""],
+["""\
+<paragraph><strong>strong</strong> week</paragraph>
+""",
+"""\
+<document source="test data">
+ <paragraph>
+ <strong>
+ strong
+ week
+"""],
+]
+
+totest['attributes'] = [
+["""\
+<paragraph classes="top secret">classified text</paragraph>
+""",
+"""\
+<document source="test data">
+ <paragraph classes="top secret">
+ classified text
+"""],
+[r"""
+<paragraph ids="ref-2 ref-1" names="ref\ 2 ref\ 1">target paragraph</paragraph>
+""",
+r"""<document source="test data">
+ <paragraph ids="ref-2 ref-1" names="ref\ 2 ref\ 1">
+ target paragraph
+"""],
+]
+
+
+if __name__ == '__main__':
+ unittest.main()
Property changes on: trunk/docutils/test/test_parsers/test_docutils_xml/test_parse.py
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+Author Date Id Revision
\ No newline at end of property
Modified: trunk/docutils/test/test_parsers/test_get_parser_class.py
===================================================================
--- trunk/docutils/test/test_parsers/test_get_parser_class.py 2024-06-05 15:54:27 UTC (rev 9732)
+++ trunk/docutils/test/test_parsers/test_get_parser_class.py 2024-06-06 14:01:22 UTC (rev 9733)
@@ -29,10 +29,17 @@
class GetParserClassTestCase(unittest.TestCase):
- def test_registered_parser(self):
+ def test_registered_parsers(self):
+ get_parser_class('null')
get_parser_class('rst')
+ get_parser_class('docutils_xml')
# raises ImportError on failure
+ def test_registered_parsers_case_folding(self):
+ get_parser_class('reStructuredText')
+ get_parser_class('XML')
+ # raises ImportError on failure
+
def test_bogus_parser(self):
with self.assertRaises(ImportError):
get_parser_class('nope')
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <mi...@us...> - 2024-06-06 14:01:38
|
Revision: 9734
http://sourceforge.net/p/docutils/code/9734
Author: milde
Date: 2024-06-06 14:01:35 +0000 (Thu, 06 Jun 2024)
Log Message:
-----------
xml parser: new method `parse_element()`.
The method `Parser.parse_element()` does not require/populate
a <document> root node.
It parses an XML representation of a "document tree" element
(with possible sub-elements) and returns a `docutils.nodes.Element`
instance (with possible child nodes).
See the unittest script for usage examples.
Modified Paths:
--------------
trunk/docutils/docutils/parsers/docutils_xml.py
trunk/docutils/test/test_nodes.py
Added Paths:
-----------
trunk/docutils/test/test_parsers/test_docutils_xml/test_parse_element.py
Modified: trunk/docutils/docutils/parsers/docutils_xml.py
===================================================================
--- trunk/docutils/docutils/parsers/docutils_xml.py 2024-06-06 14:01:22 UTC (rev 9733)
+++ trunk/docutils/docutils/parsers/docutils_xml.py 2024-06-06 14:01:35 UTC (rev 9734)
@@ -58,6 +58,26 @@
self.finish_parse()
+def parse_element(inputstring):
+ """
+ Parse `inputstring` as "Docutils XML", return `nodes.Element` instance.
+
+ :inputstring: XML source.
+
+ Caution:
+ The function does not detect invalid XML.
+
+ To check the validity of the returned node,
+ you may use its `validate()` method::
+
+ node = parse_element('<tip><hint>text</hint></tip>')
+ node.validate()
+
+ Provisional.
+ """
+ return element2node(ET.fromstring(inputstring))
+
+
def element2node(element):
"""
Convert an `etree` element and its children to Docutils doctree nodes.
Modified: trunk/docutils/test/test_nodes.py
===================================================================
--- trunk/docutils/test/test_nodes.py 2024-06-06 14:01:22 UTC (rev 9733)
+++ trunk/docutils/test/test_nodes.py 2024-06-06 14:01:35 UTC (rev 9734)
@@ -1121,7 +1121,13 @@
class AttributeTypeTests(unittest.TestCase):
+ """Test validator functions for the supported `attribute data types`__
+ See also test_parsers/test_docutils_xml/test_parse_element.py.
+
+ __ https://docutils.sourceforge.io/docs/ref/doctree.html#attribute-types
+ """
+
def test_validate_enumerated_type(self):
# function factory for "choice validators"
food = nodes.validate_enumerated_type('ham', 'spam')
Added: trunk/docutils/test/test_parsers/test_docutils_xml/test_parse_element.py
===================================================================
--- trunk/docutils/test/test_parsers/test_docutils_xml/test_parse_element.py (rev 0)
+++ trunk/docutils/test/test_parsers/test_docutils_xml/test_parse_element.py 2024-06-06 14:01:35 UTC (rev 9734)
@@ -0,0 +1,207 @@
+#!/usr/bin/env python3
+# :Copyright: © 2024 Günter Milde.
+# :License: Released under the terms of the `2-Clause BSD license`_, in short:
+#
+# Copying and distribution of this file, with or without modification,
+# are permitted in any medium without royalty provided the copyright
+# notice and this notice are preserved.
+# This file is offered as-is, without any warranty.
+#
+# .. _2-Clause BSD license: https://opensource.org/licenses/BSD-2-Clause
+
+"""Tests for parsers/docutils_xml.py."""
+
+from pathlib import Path
+import sys
+import unittest
+import xml.etree.ElementTree as ET
+
+if __name__ == '__main__':
+ # prepend the "docutils root" to the Python library path
+ # so we import the local `docutils` package.
+ sys.path.insert(0, str(Path(__file__).resolve().parents[3]))
+
+from docutils.parsers import docutils_xml
+
+
+class ParseElementTestCase(unittest.TestCase):
+
+ def test_element_with_child_with_text(self):
+ xml = '<tip><paragraph>some text</paragraph></tip>'
+ node = docutils_xml.parse_element(xml)
+ self.assertEqual(xml, str(node))
+
+ def test_tailing_text(self):
+ xml = '<strong>text</strong>trailing text'
+ with self.assertRaisesRegex(ET.ParseError,
+ 'junk after document element'):
+ docutils_xml.parse_element(xml)
+
+
+class XmlAttributesTestCase(unittest.TestCase):
+ """
+ Test correct parsing of the `supported element attributes`_.
+
+ See also `AttributeTypeTests` in ../../test_nodes.py.
+
+ __ https://docutils.sourceforge.io/
+ docs/ref/doctree.html#attribute-reference
+ """
+ common_attributes = {'classes': [],
+ 'dupnames': [],
+ 'ids': [],
+ 'names': []}
+
+ def test_alt(self): # CDATA (str)
+ xml = ('<image alt="a barking dog" align="left" height="3ex"'
+ ' loading="embed" scale="3" uri="dog.jpg" width="4cm"/>')
+ expected = {'alt': 'a barking dog',
+ 'align': 'left',
+ 'height': '3ex',
+ 'loading': 'embed',
+ 'scale': 3,
+ 'uri': 'dog.jpg',
+ 'width': '4cm'}
+ node = docutils_xml.parse_element(xml)
+ self.assertEqual(node.attributes, self.common_attributes | expected)
+
+ # 'align': CDATA (str) → test_alt
+
+ def test_anonymous(self): # yesorno (int)
+ xml = '<target anonymous="1" ids="target-1" refuri="example.html" />'
+ expected = {'anonymous': 1,
+ 'ids': ['target-1'],
+ 'refuri': 'example.html'}
+ node = docutils_xml.parse_element(xml)
+ self.assertEqual(node.attributes, self.common_attributes | expected)
+
+ def test_auto(self): # CDATA (str) number sequence: '1' or '*'
+ xml = '<footnote auto="*" backrefs="footnote-reference-2" />'
+ expected = {'auto': '*',
+ 'backrefs': ['footnote-reference-2']}
+ node = docutils_xml.parse_element(xml)
+ self.assertEqual(node.attributes, self.common_attributes | expected)
+
+ # 'backrefs': idrefs.type (list[str]) → test_auto
+
+ def test_bullet(self): # CDATA (str)
+ xml = '<bullet_list bullet="*" classes="first x-2nd" />'
+ expected = {'bullet': '*',
+ 'classes': ['first', 'x-2nd']}
+ node = docutils_xml.parse_element(xml)
+ self.assertEqual(node.attributes, self.common_attributes | expected)
+
+ # 'classes': classnames.type (list[str]) → test_bullet
+
+ def test_colwidth(self): # CDATA (int) sic!
+ xml = '<colspec colwidth="33" stub="1" />'
+ expected = {'colwidth': 33, 'stub': 1}
+ node = docutils_xml.parse_element(xml)
+ self.assertEqual(node.attributes, self.common_attributes | expected)
+
+ def test_delimiter(self): # CDATA (str)
+ xml = '<option_argument delimiter="=">FILE</option_argument>'
+ expected = {'delimiter': '='}
+ node = docutils_xml.parse_element(xml)
+ self.assertEqual(node.attributes, self.common_attributes | expected)
+
+ def test_dupnames(self): # refnames.type (list[str]).
+ xml = r'<section dupnames="title\ 1" ids="title-1" />'
+ expected = {'dupnames': ['title 1'],
+ 'ids': ['title-1']}
+ node = docutils_xml.parse_element(xml)
+ self.assertEqual(node.attributes, self.common_attributes | expected)
+
+ def test_enumtype(self): # EnumeratedType (str)
+ xml = ('<enumerated_list enumtype="upperroman"'
+ ' prefix="(" start="2" suffix=")" />')
+ expected = {'enumtype': 'upperroman',
+ 'prefix': '(',
+ 'start': 2,
+ 'suffix': ')'}
+ node = docutils_xml.parse_element(xml)
+ self.assertEqual(node.attributes, self.common_attributes | expected)
+
+ def test_format(self): # NMTOKENS (str) (space-delimited list of keywords)
+ xml = '<raw format="html latex" xml:space="preserve" />'
+ expected = {'format': 'html latex',
+ 'xml:space': 'preserve'}
+ node = docutils_xml.parse_element(xml)
+ self.assertEqual(node.attributes, self.common_attributes | expected)
+
+ # 'height': measure (str) → test_alt
+ # 'ids': ids.type (list[str]) → test_names
+
+ def test_level(self): # level (int)
+ xml = ('<system_message level="3" line="21" source="string"'
+ ' type="ERROR" />')
+ expected = {'backrefs': [],
+ 'level': 3,
+ 'line': 21,
+ 'source': 'string',
+ 'type': 'ERROR'}
+ node = docutils_xml.parse_element(xml)
+ self.assertEqual(node.attributes, self.common_attributes | expected)
+
+ def test_ltrim(self): # yesorno (int)
+ xml = '<substitution_definition ltrim="1" names="nbsp" />'
+ expected = {'ltrim': 1, 'names': ['nbsp']}
+ node = docutils_xml.parse_element(xml)
+ self.assertEqual(node.attributes, self.common_attributes | expected)
+
+ # 'loading': EnumeratedType (str) → test_alt
+
+ def test_morecols(self): # number (int)
+ xml = '<entry morecols="1" />'
+ expected = {'morecols': 1}
+ node = docutils_xml.parse_element(xml)
+ self.assertEqual(node.attributes, self.common_attributes | expected)
+
+ def test_names(self): # refnames.type (list[str])
+ # internal whitespace in XML escaped
+ xml = r'<section ids="title-2 title-1" names="title\ 2\\ title\ 1" />'
+ expected = {'ids': ['title-2', 'title-1'],
+ 'names': ['title 2\\', 'title 1']}
+ node = docutils_xml.parse_element(xml)
+ self.assertEqual(node.attributes, self.common_attributes | expected)
+
+ # 'prefix': CDATA (str) → test_enumtype
+
+ def test_refid(self): # idref.type (str)
+ xml = '<target refid="title-1-1"></target>'
+ expected = {'refid': 'title-1-1'}
+ node = docutils_xml.parse_element(xml)
+ self.assertEqual(node.attributes, self.common_attributes | expected)
+
+ def test_refname(self): # refname.type (str)
+ xml = '<target refname="title 2"></target>'
+ expected = {'refname': 'title 2'}
+ node = docutils_xml.parse_element(xml)
+ self.assertEqual(node.attributes, self.common_attributes | expected)
+
+ # 'refuri: CDATA (str) → test_anonymous
+
+ def test_rtrim(self): # yesorno (int)
+ xml = '<substitution_definition ltrim="1" names="nbsp" />'
+ expected = {'ltrim': 1,
+ 'names': ['nbsp']}
+ node = docutils_xml.parse_element(xml)
+ self.assertEqual(node.attributes, self.common_attributes | expected)
+
+ # 'scale': number (int) → test_alt
+ # 'source': CDATA (str) → test_title
+ # 'start': number (int) → test_enumtype
+ # 'stub': yesorno (int) → test_colwidth
+ # 'suffix': CDATA (str) → test_enumtype
+
+ def test_title(self): # CDATA (str)
+ ...
+ # TODO: <document> does not work with parse_element()
+
+ # 'uri': CDATA (str) → test_alt
+ # 'width' measure (str) → test_alt
+ # 'xml:space' EnumeratedType (str) → test_format
+
+
+if __name__ == '__main__':
+ unittest.main()
Property changes on: trunk/docutils/test/test_parsers/test_docutils_xml/test_parse_element.py
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+Author Date Id Revision
\ No newline at end of property
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <mi...@us...> - 2024-06-06 14:01:48
|
Revision: 9735
http://sourceforge.net/p/docutils/code/9735
Author: milde
Date: 2024-06-06 14:01:46 +0000 (Thu, 06 Jun 2024)
Log Message:
-----------
xml-parser: new optional argument `document` for `parse_node()`.
The default is a new "dummy" `nodes.document` instance.
* Passed to the auxiliary `element2node()` function.
* Fixes ``parse_node('<document></document>)`` (instantiating
`nodes.document` requires additional arguments).
* Provides a "reporter" instance.
* Provides a configuration settings instance.
Use `parse_node()` to simplify `Parser.parse()`.
Modified Paths:
--------------
trunk/docutils/docutils/parsers/docutils_xml.py
trunk/docutils/test/test_parsers/test_docutils_xml/test_parse_element.py
Modified: trunk/docutils/docutils/parsers/docutils_xml.py
===================================================================
--- trunk/docutils/docutils/parsers/docutils_xml.py 2024-06-06 14:01:35 UTC (rev 9734)
+++ trunk/docutils/docutils/parsers/docutils_xml.py 2024-06-06 14:01:46 UTC (rev 9735)
@@ -21,7 +21,7 @@
import xml.etree.ElementTree as ET
-from docutils import nodes, parsers
+from docutils import frontend, nodes, parsers, utils
class Parser(parsers.Parser):
@@ -45,24 +45,22 @@
"""
self.setup_parse(inputstring, document)
- # get ElementTree
- root = ET.fromstring(inputstring)
- # convert ElementTree to Docutils Document Tree
- if root.tag == 'document':
- convert_attribs(document, root.attrib)
- for element in root:
- document.append(element2node(element))
- else:
- document.append(element2node(root))
+ node = parse_element(inputstring, document)
+ if not isinstance(node, nodes.document):
+ document.append(node)
self.finish_parse()
-def parse_element(inputstring):
+def parse_element(inputstring, document=None):
"""
Parse `inputstring` as "Docutils XML", return `nodes.Element` instance.
:inputstring: XML source.
+ :document: `nodes.document` instance (default: a new dummy instance).
+ Provides settings and reporter.
+ Populated and returned, if the inputstring's root element
+ is <document>.
Caution:
The function does not detect invalid XML.
@@ -75,10 +73,10 @@
Provisional.
"""
- return element2node(ET.fromstring(inputstring))
+ return element2node(ET.fromstring(inputstring), document)
-def element2node(element):
+def element2node(element, document=None):
"""
Convert an `etree` element and its children to Docutils doctree nodes.
@@ -86,12 +84,22 @@
Internal.
"""
+ if document is None:
+ document = utils.new_document('xml input',
+ frontend.get_default_settings(Parser))
+
# Get the corresponding `nodes.Element` instance:
nodeclass = getattr(nodes, element.tag)
- node = nodeclass()
+ if nodeclass == nodes.document:
+ node = document
+ else:
+ node = nodeclass()
# Attributes: convert and add to `node.attributes`.
- convert_attribs(node, element.attrib)
+ for key, value in element.items():
+ if key.startswith('{'):
+ continue # skip duplicate attributes with namespace URL
+ node.attributes[key] = nodes.ATTRIBUTE_VALIDATORS[key](value)
# Append text (wrapped in a `nodes.Text` instance)
append_text(node, element.text)
@@ -98,7 +106,7 @@
# Append children and their tailing text
for child in element:
- node.append(element2node(child))
+ node.append(element2node(child, document))
# Text after a child node
append_text(node, child.tail)
@@ -105,14 +113,6 @@
return node
-def convert_attribs(node, a):
- # Convert doctree element attribute values from string to their datatype,
- for key, value in a.items():
- if key.startswith('{'):
- continue # skip duplicate attributes with namespace URL
- node.attributes[key] = nodes.ATTRIBUTE_VALIDATORS[key](value)
-
-
def append_text(node, text):
if not text:
return
Modified: trunk/docutils/test/test_parsers/test_docutils_xml/test_parse_element.py
===================================================================
--- trunk/docutils/test/test_parsers/test_docutils_xml/test_parse_element.py 2024-06-06 14:01:35 UTC (rev 9734)
+++ trunk/docutils/test/test_parsers/test_docutils_xml/test_parse_element.py 2024-06-06 14:01:46 UTC (rev 9735)
@@ -195,8 +195,14 @@
# 'suffix': CDATA (str) → test_enumtype
def test_title(self): # CDATA (str)
- ...
- # TODO: <document> does not work with parse_element()
+ xml = (r'<document ids="test-document" names="test\ document"'
+ r' source="/tmp/foo.rst" title="Test Document" />')
+ expected = {'ids': ['test-document'],
+ 'names': ['test document'],
+ 'source': '/tmp/foo.rst',
+ 'title': 'Test Document'}
+ node = docutils_xml.parse_element(xml)
+ self.assertEqual(node.attributes, self.common_attributes | expected)
# 'uri': CDATA (str) → test_alt
# 'width' measure (str) → test_alt
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <mi...@us...> - 2024-06-06 14:02:12
|
Revision: 9737
http://sourceforge.net/p/docutils/code/9737
Author: milde
Date: 2024-06-06 14:02:08 +0000 (Thu, 06 Jun 2024)
Log Message:
-----------
xml-parser: test and fix handling of invalid input
Gracefully handle invalid attributes and text inserts:
Just generate (invalid) nodes without complaining.
With, e.g., `docutils --parser=xml myfile.xml`, the document tree
is validated by the `universal.Validate` transform by default.
Nodes returned from `parse_elememt()` can be easily validated
via their `validate()` method.
+ Allows parsing XML documents with "extended" document model
for special applications.
+ Allows for cleanup operations between parsing and validation.
+ Simpler implementation (no duplicating of the reporting framework
of the validator).
Modified Paths:
--------------
trunk/docutils/docutils/parsers/docutils_xml.py
trunk/docutils/test/test_parsers/test_docutils_xml/test_parse.py
trunk/docutils/test/test_parsers/test_docutils_xml/test_parse_element.py
Modified: trunk/docutils/docutils/parsers/docutils_xml.py
===================================================================
--- trunk/docutils/docutils/parsers/docutils_xml.py 2024-06-06 14:01:54 UTC (rev 9736)
+++ trunk/docutils/docutils/parsers/docutils_xml.py 2024-06-06 14:02:08 UTC (rev 9737)
@@ -52,6 +52,11 @@
self.finish_parse()
+class Unknown(nodes.Special, nodes.Inline, nodes.Element):
+ """An unknown element found by the XML parser."""
+ content_model = (((nodes.Element, nodes.Text), '*'),) # no restrictions
+
+
def parse_element(inputstring, document=None):
"""
Parse `inputstring` as "Docutils XML", return `nodes.Element` instance.
@@ -76,11 +81,19 @@
root = None
parser = ET.XMLPullParser(events=('start',))
for i, line in enumerate(inputstring.splitlines(keepends=True)):
- parser.feed(line)
- for event, element in parser.read_events():
- if root is None:
- root = element
- element.attrib['source line'] = str(i+1)
+ try:
+ parser.feed(line)
+ for event, element in parser.read_events():
+ if root is None:
+ root = element
+ element.attrib['source line'] = str(i+1)
+ except ET.ParseError as e:
+ if document is None:
+ raise
+ document.reporter.error(f'XML parse error: {e}.',
+ source=document.settings._source,
+ line=e.position[0])
+ break
return element2node(root, document)
@@ -95,21 +108,38 @@
if document is None:
document = utils.new_document('xml input',
frontend.get_default_settings(Parser))
+ document.source == 'xml input'
# Get the corresponding `nodes.Element` instance:
- nodeclass = getattr(nodes, element.tag)
+ try:
+ nodeclass = getattr(nodes, element.tag)
+ if not issubclass(nodeclass, nodes.Element):
+ nodeclass = Unknown
+ except AttributeError:
+ nodeclass = Unknown
if nodeclass == nodes.document:
node = document
+ document.source = document.source or document.settings._source
else:
node = nodeclass()
node.line = int(element.get('source line'))
+ if isinstance(node, Unknown):
+ node.tagname = element.tag
+ document.reporter.warning(
+ f'Unknown element type <{element.tag}>.',
+ base_node=node)
# Attributes: convert and add to `node.attributes`.
for key, value in element.items():
if key.startswith('{') or key == 'source line':
continue # skip duplicate attributes with namespace URL
- node.attributes[key] = nodes.ATTRIBUTE_VALIDATORS[key](value)
+ try:
+ node.attributes[key] = nodes.ATTRIBUTE_VALIDATORS[key](value)
+ except (ValueError, KeyError):
+ if key in node.list_attributes:
+ value = value.split()
+ node.attributes[key] = value # node becomes invalid!
# Append text (wrapped in a `nodes.Text` instance)
append_text(node, element.text)
Modified: trunk/docutils/test/test_parsers/test_docutils_xml/test_parse.py
===================================================================
--- trunk/docutils/test/test_parsers/test_docutils_xml/test_parse.py 2024-06-06 14:01:54 UTC (rev 9736)
+++ trunk/docutils/test/test_parsers/test_docutils_xml/test_parse.py 2024-06-06 14:02:08 UTC (rev 9737)
@@ -131,6 +131,63 @@
"""],
]
+totest['invalid'] = [
+["""\
+<document>
+ <tip>
+ spurious text
+ <paragraph>A paragraph.</paragraph>
+ </tip>
+</document>
+""",
+"""\
+<document source="test data">
+ <tip>
+ spurious text
+ <paragraph>
+ A paragraph.
+"""],
+["""\
+<document>
+ spurious text
+ <paragraph>A paragraph.</paragraph>
+</document>
+""",
+"""\
+<document source="test data">
+ spurious text
+ <paragraph>
+ A paragraph.
+"""],
+["""\
+<document>
+ <tip>
+ <paragraph>A paragraph.</paragraph>
+ spurious tailing text
+ </tip>
+</document>
+""",
+"""\
+<document source="test data">
+ <tip>
+ <paragraph>
+ A paragraph.
+ spurious tailing text
+"""],
+["""\
+<document>
+ <paragraph>A paragraph.</paragraph>
+ spurious tailing text
+</document>
+""",
+"""\
+<document source="test data">
+ <paragraph>
+ A paragraph.
+ spurious tailing text
+"""],
+]
+
if __name__ == '__main__':
unittest.main()
Modified: trunk/docutils/test/test_parsers/test_docutils_xml/test_parse_element.py
===================================================================
--- trunk/docutils/test/test_parsers/test_docutils_xml/test_parse_element.py 2024-06-06 14:01:54 UTC (rev 9736)
+++ trunk/docutils/test/test_parsers/test_docutils_xml/test_parse_element.py 2024-06-06 14:02:08 UTC (rev 9737)
@@ -21,23 +21,82 @@
# so we import the local `docutils` package.
sys.path.insert(0, str(Path(__file__).resolve().parents[3]))
+from docutils import frontend, utils
from docutils.parsers import docutils_xml
class ParseElementTestCase(unittest.TestCase):
+ """Test the `docutils.xml.parse_element()` function."""
+ # supress warnings when passing `document` to `parse_element()`
+ settings = frontend.get_default_settings(docutils_xml.Parser)
+ settings.warning_stream = '' # comment out to see warnings
+ document = utils.new_document('xml input', settings)
+
def test_element_with_child_with_text(self):
xml = '<tip><paragraph>some text</paragraph></tip>'
node = docutils_xml.parse_element(xml)
self.assertEqual(xml, str(node))
- def test_tailing_text(self):
- xml = '<strong>text</strong>trailing text'
- with self.assertRaisesRegex(ET.ParseError,
- 'junk after document element'):
+ def test_tailing_text_after_root(self):
+ """etree.ElementTree does not accept tailing text in the input.
+ """
+ xml = '<strong>text</strong>tailing text'
+ with self.assertRaisesRegex(ET.ParseError, 'junk after document '):
docutils_xml.parse_element(xml)
+ # If a document is provided, report via a "loose" error system message
+ # comment out ``settings.warning_stream = ''`` above to see it).
+ node = docutils_xml.parse_element(xml, self.document)
+ self.assertEqual('<strong>text</strong>', str(node))
+ def test_nonexistent_element_type(self):
+ xml = '<tip><p>some text</p></tip>'
+ node = docutils_xml.parse_element(xml, self.document)
+ self.assertEqual(xml, str(node))
+ # see test_misc.py for the warning
+ def test_junk_text(self):
+ # insert text also in nodes that are not TextElement instances
+ xml = '<tip>some text</tip>'
+ node = docutils_xml.parse_element(xml)
+ self.assertEqual(xml, str(node))
+ with self.assertRaisesRegex(ValueError,
+ 'Expecting child of type <Body>,'
+ ' not text data "some text"'):
+ node.validate()
+
+ def test_tailing_junk_text(self):
+ # insert text also in nodes that are not TextElement instances
+ xml = '<tip><paragraph>some text</paragraph>tailing text</tip>'
+ node = docutils_xml.parse_element(xml)
+ self.assertEqual(xml, str(node))
+ with self.assertRaisesRegex(
+ ValueError, 'Spurious text: "tailing text"'):
+ node.validate()
+
+ def test_element_with_attributes(self):
+ xml = ('<image align="left" alt="a barking dog" height="3ex"'
+ ' loading="embed" scale="3" uri="dog.jpg" width="4cm"/>')
+ node = docutils_xml.parse_element(xml)
+ self.assertEqual(xml, str(node))
+
+ def test_element_with_invalid_attributes(self):
+ """Silently accept invalid attribute names and values.
+
+ Validation reports problems.
+ """
+ xml = ('<image breadth="3 cm" height="3 inch"/>')
+ node = docutils_xml.parse_element(xml)
+ self.assertEqual(xml, str(node))
+ with self.assertRaisesRegex(ValueError,
+ 'Element <image breadth="3 cm".*invalid:\n'
+ '.*"breadth" not one of "ids",.*\n'
+ '.*"height" has invalid value "3 inch".\n'
+ '.*Valid units: em ex px in cm mm pt '
+ ):
+ node.validate()
+
+
class XmlAttributesTestCase(unittest.TestCase):
"""
Test correct parsing of the `supported element attributes`_.
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <mi...@us...> - 2024-06-06 14:02:22
|
Revision: 9738
http://sourceforge.net/p/docutils/code/9738
Author: milde
Date: 2024-06-06 14:02:18 +0000 (Thu, 06 Jun 2024)
Log Message:
-----------
Let `transforms.universal.Messages` handle parser messages.
Include system messages generated by the parser in the list of
messages handled by the `universal.Messages` transform
(attach to "System Messages" section).
Helps parsers when a problem occurs but it is not clear
where to attach a system message (see "xml" parser for examples).
Modified Paths:
--------------
trunk/docutils/HISTORY.txt
trunk/docutils/docutils/transforms/universal.py
Modified: trunk/docutils/HISTORY.txt
===================================================================
--- trunk/docutils/HISTORY.txt 2024-06-06 14:02:08 UTC (rev 9737)
+++ trunk/docutils/HISTORY.txt 2024-06-06 14:02:18 UTC (rev 9738)
@@ -55,6 +55,10 @@
follows a <meta> or <decoration> element as this is invalid
according to ``docutils.dtd``.
+* docutils/transforms/universal.py
+
+ - `Messages` also handles "loose" system messages generated by the parser.
+
* docutils/writers/manpage.py
- Remove code for unused emdash bullets.
Modified: trunk/docutils/docutils/transforms/universal.py
===================================================================
--- trunk/docutils/docutils/transforms/universal.py 2024-06-06 14:02:08 UTC (rev 9737)
+++ trunk/docutils/docutils/transforms/universal.py 2024-06-06 14:02:18 UTC (rev 9738)
@@ -113,15 +113,17 @@
class Messages(Transform):
+ """Handle "loose" messages.
+
+ Place system messages generated by parsing or transforms that are not
+ attached to the document tree into a dedicated section of the document.
"""
- Place any system messages generated after parsing into a dedicated section
- of the document.
- """
default_priority = 860
def apply(self):
- messages = self.document.transform_messages
+ messages = [*self.document.parse_messages,
+ *self.document.transform_messages]
loose_messages = [msg for msg in messages if not msg.parent]
if loose_messages:
section = nodes.section(classes=['system-messages'])
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <mi...@us...> - 2024-06-06 14:03:06
|
Revision: 9741
http://sourceforge.net/p/docutils/code/9741
Author: milde
Date: 2024-06-06 14:03:03 +0000 (Thu, 06 Jun 2024)
Log Message:
-----------
Make `references.AnonymousHyperlinks` transform idempotent.
Don't try to resolve already resolved anonymous hyperlinks again.
Fixes XML round-trip.
Modified Paths:
--------------
trunk/docutils/HISTORY.txt
trunk/docutils/docutils/transforms/references.py
Modified: trunk/docutils/HISTORY.txt
===================================================================
--- trunk/docutils/HISTORY.txt 2024-06-06 14:02:54 UTC (rev 9740)
+++ trunk/docutils/HISTORY.txt 2024-06-06 14:03:03 UTC (rev 9741)
@@ -55,6 +55,10 @@
follows a <meta> or <decoration> element as this is invalid
according to ``docutils.dtd``.
+* docutils/transforms/references.py
+
+ - Make `AnonymousHyperlinks` transform idempotent.
+
* docutils/transforms/universal.py
- `Messages` also handles "loose" system messages generated by the parser.
@@ -72,7 +76,6 @@
when formatting with `indents`_.
-
Release 0.21.2 (2024-04-23)
===========================
Modified: trunk/docutils/docutils/transforms/references.py
===================================================================
--- trunk/docutils/docutils/transforms/references.py 2024-06-06 14:02:54 UTC (rev 9740)
+++ trunk/docutils/docutils/transforms/references.py 2024-06-06 14:03:03 UTC (rev 9741)
@@ -135,11 +135,13 @@
ref.replace_self(prb)
return
for ref, target in zip(anonymous_refs, anonymous_targets):
- target.referenced = 1
+ if ref.hasattr('refid') or ref.hasattr('refuri'):
+ continue
+ target.referenced = True
while True:
if target.hasattr('refuri'):
ref['refuri'] = target['refuri']
- ref.resolved = 1
+ ref.resolved = True
break
else:
if not target['ids']:
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <mi...@us...> - 2024-06-06 14:03:26
|
Revision: 9743
http://sourceforge.net/p/docutils/code/9743
Author: milde
Date: 2024-06-06 14:03:21 +0000 (Thu, 06 Jun 2024)
Log Message:
-----------
xml-parser: Unindent continuation lines. Fixes xml round trip.
The XML writer supports formatting XML with newlines and indentation
(see the corresponding configuration settings).
In most reStructuredText markup blocks, indentation has a semantic
importance and is removed before passing the block for further parsing.
As a result, only a limited set of elements may have text with
spaces after a newline.
Remove indentation of continuation lines unless the containing element
is a `FixedTextElement`, `literal`, or `Unknown` instance.
Modified Paths:
--------------
trunk/docutils/docutils/parsers/docutils_xml.py
trunk/docutils/test/test_parsers/test_docutils_xml/test_parse.py
Modified: trunk/docutils/docutils/parsers/docutils_xml.py
===================================================================
--- trunk/docutils/docutils/parsers/docutils_xml.py 2024-06-06 14:03:12 UTC (rev 9742)
+++ trunk/docutils/docutils/parsers/docutils_xml.py 2024-06-06 14:03:21 UTC (rev 9743)
@@ -19,6 +19,7 @@
without prior notice.
"""
+import re
import xml.etree.ElementTree as ET
from docutils import frontend, nodes, parsers, utils
@@ -97,10 +98,16 @@
return element2node(root, document)
-def element2node(element, document=None):
+def element2node(element, document=None, unindent=True):
"""
Convert an `etree` element and its children to Docutils doctree nodes.
+ :element: `xml.etree` element
+ :document: see `parse_element()`
+ :unindent: Remove formatting indentation of follow-up text lines?
+ Cf. `append_text()`.
+ TODO: do we need an "unindent" configuration setting?
+
Return a `docutils.nodes.Element` instance.
Internal.
@@ -141,21 +148,28 @@
value = value.split()
node.attributes[key] = value # node becomes invalid!
- # Append text (wrapped in a `nodes.Text` instance)
- append_text(node, element.text)
-
- # Append children and their tailing text
+ # Append content:
+ # update "unindent" flag: change line indentation?
+ unindent = unindent and not isinstance(
+ node, (nodes.FixedTextElement, nodes.literal, Unknown))
+ # (leading) text
+ append_text(node, element.text, unindent)
+ # children and their tailing text
for child in element:
- node.append(element2node(child, document))
+ node.append(element2node(child, document, unindent))
# Text after a child node
- append_text(node, child.tail)
+ append_text(node, child.tail, unindent)
return node
-def append_text(node, text):
+def append_text(node, text, unindent):
+ # Format `text`, wrap in a TextElement and append to `node`.
+ # Skip if `text` is empty or just formatting whitespace.
if not text:
return
+ if unindent:
+ text = re.sub('\n +', '\n', text)
if isinstance(node, nodes.TextElement):
node.append(nodes.Text(text))
elif text.strip():
Modified: trunk/docutils/test/test_parsers/test_docutils_xml/test_parse.py
===================================================================
--- trunk/docutils/test/test_parsers/test_docutils_xml/test_parse.py 2024-06-06 14:03:12 UTC (rev 9742)
+++ trunk/docutils/test/test_parsers/test_docutils_xml/test_parse.py 2024-06-06 14:03:21 UTC (rev 9743)
@@ -59,12 +59,11 @@
with two lines.</paragraph>
</document>
""",
-# TODO: unindent second line
"""\
<document source="test sample">
<paragraph>
A paragraph
- with two lines.
+ with two lines.
"""],
["""\
<document>
@@ -111,6 +110,25 @@
strong
week
"""],
+["""\
+<document source="test data">
+ <literal_block xml:space="preserve"> Inline element
+with <strong> space at start,
+ in the middle</strong>
+ and after end.</literal_block>
+</document>
+""",
+"""\
+<document source="test data">
+ <literal_block xml:space="preserve">
+ Inline element
+ with \n\
+ <strong>
+ space at start,
+ in the middle
+ \n\
+ and after end.
+"""],
]
totest['attributes'] = [
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|