|
From: <mi...@us...> - 2024-01-13 09:50:10
|
Revision: 9511
http://sourceforge.net/p/docutils/code/9511
Author: milde
Date: 2024-01-13 09:50:07 +0000 (Sat, 13 Jan 2024)
Log Message:
-----------
Refactor/fix HTML math handling.
Use custom exception `utils.math.MathSyntaxError` instead of
abusing `SyntaxError` for LaTeX math syntax errors.
Unify interface of LaTeX -> MathML conversion functions.
Run external MathML converters with `subprocess.run()`.
Revise pre- and post-processing.
Improve error reporting.
Use WARNING instead of ERROR for TeX to MathML conversion errors.
(The source is included in the output as TeX code, so no data is lost.)
Provisional test script for external TeX to MathML converters.
(Placed in a separate directory not to be run by "alltests".
It tests a rather arcane use case, requires optional helpers, and is slow.)
Modified Paths:
--------------
trunk/docutils/HISTORY.txt
trunk/docutils/docutils/utils/math/__init__.py
trunk/docutils/docutils/utils/math/latex2mathml.py
trunk/docutils/docutils/utils/math/tex2mathml_extern.py
trunk/docutils/docutils/writers/_html_base.py
trunk/docutils/docutils/writers/html4css1/html4css1.css
trunk/docutils/docutils/writers/html5_polyglot/minimal.css
trunk/docutils/test/test_writers/test_html5_polyglot_parts.py
Added Paths:
-----------
trunk/docutils/test/extra/
trunk/docutils/test/extra/test_math_conversion.py
Modified: trunk/docutils/HISTORY.txt
===================================================================
--- trunk/docutils/HISTORY.txt 2024-01-12 11:32:31 UTC (rev 9510)
+++ trunk/docutils/HISTORY.txt 2024-01-13 09:50:07 UTC (rev 9511)
@@ -88,6 +88,13 @@
- Consider the root-prefix_ setting when including files with
"include", "raw", or "csv-table" directives.
+* docutils/utils/math/*
+
+ - Use custom exception `utils.math.MathError` instead of
+ abusing `SyntaxError` for LaTeX math syntax errors.
+ - Unify interface of LaTeX -> MathML conversion functions.
+ Improve error reporting.
+
* docutils/utils/roman.py
- Update to version `1.4 <https://pypi.org/project/roman/4.1/>`__.
Modified: trunk/docutils/docutils/utils/math/__init__.py
===================================================================
--- trunk/docutils/docutils/utils/math/__init__.py 2024-01-12 11:32:31 UTC (rev 9510)
+++ trunk/docutils/docutils/utils/math/__init__.py 2024-01-13 09:50:07 UTC (rev 9511)
@@ -26,6 +26,17 @@
# =================================
+class MathError(ValueError):
+ """Exception for math syntax and math conversion errors.
+
+ The additional attribute `details` may hold a list of Docutils
+ nodes suitable as children for a ``<system_message>``.
+ """
+ def __init__(self, msg, details=[]):
+ super().__init__(msg)
+ self.details = details
+
+
def toplevel_code(code):
"""Return string (LaTeX math) `code` with environments stripped out."""
chunks = code.split(r'\begin{')
Modified: trunk/docutils/docutils/utils/math/latex2mathml.py
===================================================================
--- trunk/docutils/docutils/utils/math/latex2mathml.py 2024-01-12 11:32:31 UTC (rev 9510)
+++ trunk/docutils/docutils/utils/math/latex2mathml.py 2024-01-13 09:50:07 UTC (rev 9511)
@@ -26,7 +26,7 @@
import re
import unicodedata
-from docutils.utils.math import tex2unichar, toplevel_code
+from docutils.utils.math import MathError, tex2unichar, toplevel_code
# Character data
@@ -399,7 +399,7 @@
`None`.
"""
if self.full():
- raise SyntaxError('Node %s already full!' % self)
+ raise MathError(f'Node {self} already full!')
self.children.append(child)
child.parent = self
if self.full():
@@ -620,7 +620,7 @@
# munder(mi('lim'), mo('-'), accent=False)
# >>> mu.append(mi('lim'))
# Traceback (most recent call last):
-# SyntaxError: Node munder(mi('lim'), mo('-'), accent=False) already full!
+# docutils.utils.math.MathError: Node munder(mi('lim'), mo('-'), accent=False) already full!
# >>> munder(mo('-'), mi('lim'), accent=False, switch=True).toprettyxml()
# '<munder accent="false">\n <mi>lim</mi>\n <mo>-</mo>\n</munder>'
@@ -759,7 +759,7 @@
if nest_level == 0:
break
else:
- raise SyntaxError('Group without closing bracket')
+ raise MathError('Group without closing bracket!')
return string[1:split_index-1], string[split_index:]
@@ -821,7 +821,7 @@
try:
return m.group('optarg'), m.group('remainder')
except AttributeError:
- raise SyntaxError('Could not extract optional argument from %r' % string)
+ raise MathError(f'Could not extract optional argument from "{string}"!')
# Test:
# >>> tex_optarg(' [optional argument] after whitespace')
@@ -828,10 +828,12 @@
# ('optional argument', ' after whitespace')
# >>> tex_optarg('[missing right bracket')
# Traceback (most recent call last):
-# SyntaxError: Could not extract optional argument from '[missing right bracket'
+# ...
+# docutils.utils.math.MathError: Could not extract optional argument from "[missing right bracket"!
# >>> tex_optarg('[group with [nested group]]')
# Traceback (most recent call last):
-# SyntaxError: Could not extract optional argument from '[group with [nested group]]'
+# ...
+# docutils.utils.math.MathError: Could not extract optional argument from "[group with [nested group]]"!
def parse_latex_math(node, string):
@@ -888,7 +890,8 @@
elif c in "+*=<>,.!?`';@":
node = node.append(mo(c))
else:
- raise SyntaxError('Unsupported character: "%s"' % c)
+ raise MathError(f'Unsupported character: "{c}"!')
+ # TODO: append as <mi>?
return tree
# Test:
@@ -915,6 +918,9 @@
# math(msub(mi('x'), mi('α')))
# >>> parse_latex_math(math(), 'x_\\text{in}')
# math(msub(mi('x'), mtext('in')))
+# >>> parse_latex_math(math(), '2⌘')
+# Traceback (most recent call last):
+# docutils.utils.math.MathError: Unsupported character: "⌘"!
def handle_cmd(name, node, string): # noqa: C901 TODO make this less complex
@@ -1053,8 +1059,8 @@
try:
delimiter = stretchables[delimiter.lstrip('\\')]
except KeyError:
- raise SyntaxError('Unsupported "\\%s" delimiter "%s"!'
- % (name, delimiter))
+ raise MathError(f'Unsupported "\\{name}" delimiter '
+ f'"{delimiter}"!')
if size:
delimiter_attributes['maxsize'] = size
delimiter_attributes['minsize'] = size
@@ -1070,6 +1076,7 @@
return node, string
if name == 'not':
+ # negation: LaTeX just overlays next symbol with "/".
arg, string = tex_token(string)
if arg == '{':
return node, '{\\not ' + string
@@ -1077,7 +1084,7 @@
try:
arg = operators[arg[1:]]
except KeyError:
- raise SyntaxError('\\not: Cannot negate: "%s"!'%arg)
+ raise MathError(rf'"\not" cannot negate: "{arg}"!')
arg = unicodedata.normalize('NFC', arg+'\u0338')
node = node.append(mo(arg))
return node, string
@@ -1213,8 +1220,8 @@
elif node.__class__.__name__ == 'math':
node.append(new_node)
else:
- raise SyntaxError('Declaration "\\%s" must be first command '
- 'in a group.' % name)
+ raise MathError(rf'Declaration "\{name}" must be first command '
+ 'in a group!')
return new_node, string
if name.endswith('limits'):
@@ -1232,7 +1239,7 @@
if name == 'end':
return end_environment(node, string)
- raise SyntaxError('Unknown LaTeX command: \\' + name)
+ raise MathError(rf'Unknown LaTeX command "\{name}".')
# >>> handle_cmd('left', math(), '[a\\right]')
# (mrow(mo('[')), 'a\\right]')
@@ -1268,6 +1275,11 @@
# (munderover(mo('⟵'), mi('α')), '{10}')
# >>> handle_cmd('xleftarrow', math(), r'[\alpha=5]{10}')
# (munderover(mo('⟵'), mrow(mi('α'), mo('='), mn('5'))), '{10}')
+# >>> handle_cmd('left', math(), '< a)')
+# Traceback (most recent call last):
+# docutils.utils.math.MathError: Unsupported "\left" delimiter "<"!
+# >>> handle_cmd('not', math(), '{< b} c') # LaTeX ignores the braces, too.
+# (math(), '{\\not < b} c')
def handle_script_or_limit(node, c, limits=''):
@@ -1327,7 +1339,7 @@
node.append(mtable(mtr(entry), **attributes))
node = entry
else:
- raise SyntaxError('Environment not supported!')
+ raise MathError(f'Environment "{name}" not supported!')
return node, string
@@ -1342,7 +1354,7 @@
elif name == 'cases':
node = node.close()
else:
- raise SyntaxError('Environment not supported!')
+ raise MathError(f'Environment "{name}" not supported!')
return node, string
@@ -1386,15 +1398,15 @@
# {'class': 'align', 'displaystyle': True, 'columnalign': 'right left right left', 'columnspacing': '0 2em 0'}
-def tex2mathml(tex_math, inline=True):
+def tex2mathml(tex_math, as_block=False):
"""Return string with MathML code corresponding to `tex_math`.
- Set `inline` to False for displayed math.
+ Set `as_block` to ``True`` for displayed formulas.
"""
# Set up tree
math_tree = math(xmlns='http://www.w3.org/1998/Math/MathML')
node = math_tree
- if not inline:
+ if as_block:
math_tree['display'] = 'block'
rows = toplevel_code(tex_math).split(r'\\')
if len(rows) > 1:
@@ -1409,11 +1421,11 @@
# <math xmlns="http://www.w3.org/1998/Math/MathML">
# <mn>3</mn>
# </math>
-# >>> print(tex2mathml('3', inline=False))
+# >>> print(tex2mathml('3', as_block=True))
# <math xmlns="http://www.w3.org/1998/Math/MathML" display="block">
# <mn>3</mn>
# </math>
-# >>> print(tex2mathml(r'a & b \\ c & d', inline=False))
+# >>> print(tex2mathml(r'a & b \\ c & d', as_block=True))
# <math xmlns="http://www.w3.org/1998/Math/MathML" display="block">
# <mtable class="align" displaystyle="true" columnalign="right left" columnspacing="0">
# <mtr>
@@ -1434,7 +1446,7 @@
# </mtr>
# </mtable>
# </math>
-# >>> print(tex2mathml(r'a \\ b', inline=False))
+# >>> print(tex2mathml(r'a \\ b', as_block=True))
# <math xmlns="http://www.w3.org/1998/Math/MathML" display="block">
# <mtable class="align" displaystyle="true">
# <mtr>
@@ -1451,6 +1463,10 @@
# </math>
+# TODO: raise error if <sqrt> doesnot have a base character (missing children)
+# >> '\sqrt[3]'
+# Maybe also other nodes...
+
# TODO: look up more symbols from tr25, e.g.
#
#
Modified: trunk/docutils/docutils/utils/math/tex2mathml_extern.py
===================================================================
--- trunk/docutils/docutils/utils/math/tex2mathml_extern.py 2024-01-12 11:32:31 UTC (rev 9510)
+++ trunk/docutils/docutils/utils/math/tex2mathml_extern.py 2024-01-13 09:50:07 UTC (rev 9511)
@@ -1,6 +1,6 @@
# :Id: $Id$
# :Copyright: © 2015 Günter Milde.
-# :License: Released under the terms of the `2-Clause BSD license`_, in short:
+# :License: Released under the terms of the `2-Clause BSD license`__, in short:
#
# Copying and distribution of this file, with or without modification,
# are permitted in any medium without royalty provided the copyright
@@ -7,7 +7,7 @@
# notice and this notice are preserved.
# This file is offered as-is, without any warranty.
#
-# .. _2-Clause BSD license: https://opensource.org/licenses/BSD-2-Clause
+# __ https://opensource.org/licenses/BSD-2-Clause
"""Wrappers for TeX->MathML conversion by external tools
@@ -17,8 +17,11 @@
import subprocess
+from docutils import nodes
+from docutils.utils.math import MathError, pick_math_environment
+
+# `latexml` expects a complete document:
document_template = r"""\documentclass{article}
-\usepackage{amsmath}
\begin{document}
%s
\end{document}
@@ -25,161 +28,206 @@
"""
-def latexml(math_code, reporter=None):
- """Convert LaTeX math code to MathML with LaTeXML_
+def _check_result(result, details=[]):
+ # raise MathError if the conversion went wrong
+ # :details: list of doctree nodes with additional info
+ msg = ''
+ if not details and result.stderr:
+ details = [nodes.paragraph('', result.stderr, classes=['pre-wrap'])]
+ if details:
+ msg = f'TeX to MathML converter `{result.args[0]}` failed:'
+ elif result.returncode:
+ msg = (f'TeX to MathMl converter `{result.args[0]}` '
+ f'exited with Errno {result.returncode}.')
+ elif not result.stdout:
+ msg = f'TeX to MathML converter `{result.args[0]}` returned no MathML.'
+ if msg:
+ raise MathError(msg, details=details)
- .. _LaTeXML: http://dlmf.nist.gov/LaTeXML/
+
+def blahtexml(math_code, as_block=False):
+ """Convert LaTeX math code to MathML with blahtexml__.
+
+ __ http://gva.noekeon.org/blahtexml/
"""
- p = subprocess.Popen(['latexml',
- '-', # read from stdin
- '--preload=amsfonts',
- '--preload=amsmath',
- '--inputencoding=utf8',
- ],
- stdin=subprocess.PIPE,
- stdout=subprocess.PIPE,
- stderr=subprocess.PIPE,
- close_fds=True)
- p.stdin.write((document_template % math_code).encode('utf-8'))
- p.stdin.close()
- latexml_code = p.stdout.read()
- latexml_err = p.stderr.read().decode('utf-8')
- if reporter and (latexml_err.find('Error') >= 0 or not latexml_code):
- reporter.error(latexml_err)
+ args = ['blahtexml',
+ '--mathml',
+ '--indented',
+ '--spacing', 'moderate',
+ '--mathml-encoding', 'raw',
+ '--other-encoding', 'raw',
+ '--doctype-xhtml+mathml',
+ '--annotate-TeX',
+ ]
+ mathml_args = ' display="block"' if as_block else ''
- post_p = subprocess.Popen(['latexmlpost',
- '-',
- '--nonumbersections',
- '--format=xhtml',
- # '--linelength=78', # experimental
- '--'
- ],
- stdin=subprocess.PIPE,
- stdout=subprocess.PIPE,
- stderr=subprocess.PIPE,
- close_fds=True)
- post_p.stdin.write(latexml_code)
- post_p.stdin.close()
- result = post_p.stdout.read().decode('utf-8')
- post_p_err = post_p.stderr.read().decode('utf-8')
- if reporter and (post_p_err.find('Error') >= 0 or not result):
- reporter.error(post_p_err)
+ if pick_math_environment(math_code).startswith('align'):
+ math_code = r'\begin{aligned}%s\end{aligned}' % math_code
- # extract MathML code:
- start, end = result.find('<math'), result.find('</math>')+7
- result = result[start:end]
- if 'class="ltx_ERROR' in result:
- raise SyntaxError(result)
- return result
+ result = subprocess.run(args, input=math_code,
+ capture_output=True, text=True)
+ # blahtexml writes <error> messages to stdout
+ if '<error>' in result.stdout:
+ result.stderr = result.stdout[result.stdout.find('<message>')+9:
+ result.stdout.find('</message>')]
+ else:
+ result.stdout = result.stdout[result.stdout.find('<markup>')+9:
+ result.stdout.find('</markup>')]
+ _check_result(result)
+ return (f'<math xmlns="http://www.w3.org/1998/Math/MathML"{mathml_args}>'
+ f'\n{result.stdout}</math>')
-def ttm(math_code, reporter=None):
- """Convert LaTeX math code to MathML with TtM_
- .. _TtM: http://hutchinson.belmont.ma.us/tth/mml/
+def latexml(math_code, as_block=False):
+ """Convert LaTeX math code to MathML with LaTeXML__.
+
+ Comprehensive macro support but **very** slow.
+
+ __ http://dlmf.nist.gov/LaTeXML/
"""
- p = subprocess.Popen(['ttm',
- # '-i', # italic font for equations. Default roman.
- '-u', # unicode encoding. (Default iso-8859-1).
- '-r', # output raw MathML (no wrapper)
- ],
- stdin=subprocess.PIPE,
- stdout=subprocess.PIPE,
- stderr=subprocess.PIPE,
- close_fds=True)
- p.stdin.write((document_template % math_code).encode('utf-8'))
- p.stdin.close()
- result = p.stdout.read()
- err = p.stderr.read().decode('utf-8')
- if err.find('**** Unknown') >= 0:
- msg = '\n'.join(line for line in err.splitlines()
- if line.startswith('****'))
- raise SyntaxError('\nMessage from external converter TtM:\n'+msg)
- if reporter and err.find('**** Error') >= 0 or not result:
- reporter.error(err)
- start, end = result.find('<math'), result.find('</math>')+7
- return result[start:end]
+ # LaTeXML works in 2 stages, expects complete documents.
+ #
+ # The `latexmlmath`__ convenience wrapper does not support block-level
+ # (displayed) equations.
+ #
+ # __ https://metacpan.org/dist/LaTeXML/view/bin/latexmlmath
+ args1 = ['latexml',
+ '-', # read from stdin
+ '--preload=amsmath',
+ '--preload=amssymb', # also loads amsfonts
+ '--inputencoding=utf8',
+ '--',
+ ]
+ result1 = subprocess.run(args1, input=document_template % math_code,
+ capture_output=True, text=True)
+ if result1.stdout:
+ result1.stderr = '\n'.join(line for line in result1.stderr.splitlines()
+ if line.startswith('Error:')
+ or line.startswith('Warning:')
+ or line.startswith('Fatal:'))
+ _check_result(result1)
-def blahtexml(math_code, inline=True, reporter=None):
- """Convert LaTeX math code to MathML with blahtexml_
+ args2 = ['latexmlpost',
+ '-',
+ '--nonumbersections',
+ '--format=html5', # maths included as MathML
+ '--omitdoctype', # Make it simple, we only need the maths.
+ '--noscan', # ...
+ '--nocrossref',
+ '--nographicimages',
+ '--nopictureimages',
+ '--'
+ ]
+ result2 = subprocess.run(args2, input=result1.stdout,
+ capture_output=True, text=True)
+ # Extract MathML from HTML document:
+ # <table> with <math> in cells for "align", <math> element else.
+ start = result2.stdout.find('<table class="ltx_equationgroup')
+ if start != -1:
+ stop = result2.stdout.find('</table>', start)+8
+ result2.stdout = result2.stdout[start:stop].replace(
+ 'ltx_equationgroup', 'borderless align-center')
+ else:
+ result2.stdout = result2.stdout[result2.stdout.find('<math'):
+ result2.stdout.find('</math>')+7]
+ # Search for error messages
+ if result2.stdout:
+ _msg_source = result2.stdout # latexmlpost reports errors in output
+ else:
+ _msg_source = result2.stderr # just in case
+ result2.stderr = '\n'.join(line for line in _msg_source.splitlines()
+ if line.startswith('Error:')
+ or line.startswith('Warning:')
+ or line.startswith('Fatal:'))
+ _check_result(result2)
+ return result2.stdout
- .. _blahtexml: http://gva.noekeon.org/blahtexml/
+
+def pandoc(math_code, as_block=False):
+ """Convert LaTeX math code to MathML with pandoc__.
+
+ __ https://pandoc.org/
"""
- options = ['--mathml',
- '--indented',
- '--spacing', 'moderate',
- '--mathml-encoding', 'raw',
- '--other-encoding', 'raw',
- '--doctype-xhtml+mathml',
- '--annotate-TeX',
- ]
- if inline:
- mathmode_arg = ''
- else:
- mathmode_arg = ' display="block"'
- options.append('--displaymath')
+ args = ['pandoc',
+ '--mathml',
+ '--from=latex',
+ ]
+ result = subprocess.run(args, input=math_code,
+ capture_output=True, text=True)
- p = subprocess.Popen(['blahtexml']+options,
- stdin=subprocess.PIPE,
- stdout=subprocess.PIPE,
- stderr=subprocess.PIPE,
- close_fds=True)
- p.stdin.write(math_code.encode('utf-8'))
- p.stdin.close()
- result = p.stdout.read().decode('utf-8')
- err = p.stderr.read().decode('utf-8')
+ result.stdout = result.stdout[result.stdout.find('<math'):
+ result.stdout.find('</math>')+7]
+ # Pandoc (2.9.2.1) messages are pre-formatted for the terminal:
+ # 1. summary
+ # 2. math source (part)
+ # 3. error spot indicator '^' (works only in a literal block)
+ # 4. assumed problem
+ # 5. assumed solution (may be wrong or confusing)
+ # Construct a "details" list:
+ details = []
+ if result.stderr:
+ lines = result.stderr.splitlines()
+ details.append(nodes.paragraph('', lines[0]))
+ details.append(nodes.literal_block('', '\n'.join(lines[1:3])))
+ details.append(nodes.paragraph('', '\n'.join(lines[3:]),
+ classes=['pre-wrap']))
+ _check_result(result, details=details)
+ return result.stdout
- if result.find('<error>') >= 0:
- msg = result[result.find('<message>')+9:result.find('</message>')]
- raise SyntaxError('\nMessage from external converter blahtexml:\n%s'
- % msg)
- if reporter and (err.find('**** Error') >= 0 or not result):
- reporter.error(err)
- start, end = result.find('<markup>')+9, result.find('</markup>')
- result = ('<math xmlns="http://www.w3.org/1998/Math/MathML"%s>\n'
- '%s</math>\n') % (mathmode_arg, result[start:end])
- return result
+def ttm(math_code, as_block=False):
+ """Convert LaTeX math code to MathML with TtM__.
-def pandoc(math_code, reporter=None):
- """Convert LaTeX math code to MathML with pandoc_
+ Aged, limited, but fast.
- .. _pandoc: https://pandoc.org/
+ __ http://silas.psfc.mit.edu/tth/mml/
"""
- p = subprocess.Popen(['pandoc',
- '--mathml',
- '--from=latex',
- ],
- stdin=subprocess.PIPE,
- stdout=subprocess.PIPE,
- stderr=subprocess.PIPE,
- close_fds=True)
- p.stdin.write(math_code.encode('utf-8'))
- p.stdin.close()
- result = p.stdout.read().decode('utf-8')
- err = p.stderr.read().decode('utf-8').strip()
- x = p.wait()
+ args = ['ttm',
+ '-L', # source is LaTeX snippet
+ '-r'] # output MathML snippet
- if err:
- if reporter:
- reporter.error(err)
- raise SyntaxError('\nError message from external converter pandoc:\n%s'
- % err)
- if x != 0:
- raise SyntaxError('\nError code from external converter pandoc:\n%s'
- % x)
+ # Supports only ASCII and "latin extended" characters (Docutils converts
+ # most math characters to LaTeX commands before calling ttm).
+ try:
+ result = subprocess.run(args, input=math_code, capture_output=True,
+ text=True, encoding='ISO-8859-1')
+ except UnicodeEncodeError as err:
+ raise MathError(err)
- start, end = result.find('<math'), result.find('</math>')+7
- return result[start:end]
+ result.stdout = result.stdout[result.stdout.find('<math'):
+ result.stdout.find('</math>')+7]
+ if as_block:
+ result.stdout = result.stdout.replace('<math xmlns=',
+ '<math display="block" xmlns=')
+ result.stderr = '\n'.join(line[5:] + '.'
+ for line in result.stderr.splitlines()
+ if line.startswith('**** '))
+ _check_result(result)
+ return result.stdout
# self-test
if __name__ == "__main__":
- example = ('\\frac{\\partial \\sin^2(\\alpha)}{\\partial \\vec r}'
- '\\varpi \\mathbb{R} \\, \\text{Grüße}')
+ example = (r'\frac{\partial \sin^2(\alpha)}{\partial \vec r}'
+ r'\varpi \mathbb{R} \, \text{Grüße}')
+ # print(blahtexml(example, as_block=True))
+ # print(blahtexml(example))
# print(latexml('$'+example+'$'))
- # print(ttm('$'+example.replace('\\mathbb{R}', '')+'$'))
- print(blahtexml(example))
- # print(pandoc('$'+example+'$'))
+ print(pandoc('$'+example+'$'))
+ # print(ttm('$'+example.replace(r'\mathbb', r'\mathbf')+'$'))
+
+ buggy = r'\sinc \phy'
+ # buggy = '\sqrt[e]'
+ try:
+ # print(blahtexml(buggy))
+ # print(latexml(f'${buggy}$'))
+ print(pandoc(f'${buggy}$'))
+ # print(ttm(f'${buggy}$'))
+ except MathError as err:
+ print(err)
+ print(err.details)
+ for node in err.details:
+ print(node.astext())
Modified: trunk/docutils/docutils/writers/_html_base.py
===================================================================
--- trunk/docutils/docutils/writers/_html_base.py 2024-01-12 11:32:31 UTC (rev 9510)
+++ trunk/docutils/docutils/writers/_html_base.py 2024-01-13 09:50:07 UTC (rev 9511)
@@ -30,8 +30,9 @@
from docutils.parsers.rst.directives import length_or_percentage_or_unitless
from docutils.parsers.rst.directives.images import PIL
from docutils.transforms import writer_aux
-from docutils.utils.math import (unichar2tex, pick_math_environment,
- math2html, latex2mathml, tex2mathml_extern)
+from docutils.utils.math import (MathError, math2html, latex2mathml,
+ pick_math_environment,
+ tex2mathml_extern, unichar2tex)
class Writer(writers.Writer):
@@ -1271,17 +1272,19 @@
# As there is no native HTML math support, we provide alternatives
# for the math-output: LaTeX and MathJax simply wrap the content,
# HTML and MathML also convert the math_code.
- # HTML container
- math_tags = {
- # math_output: (block, inline, class-arguments)
- 'html': ('div', 'span', 'formula'),
- 'latex': ('pre', 'tt', 'math'),
- 'mathml': ('div', '', ''),
- 'mathjax': ('div', 'span', 'math'),
- }
+ # HTML element:
+ math_tags = { # format: (inline, block, [class arguments])
+ 'html': ('span', 'div', ['formula']),
+ 'latex': ('tt', 'pre', ['math']),
+ 'mathjax': ('span', 'div', ['math']),
+ 'mathml': ('', 'div', []),
+ 'problematic': ('span', 'pre', ['math', 'problematic']),
+ }
def visit_math(self, node, math_env=''):
# Also called from `visit_math_block()` (with math_env != '').
+ is_block = isinstance(node, nodes.math_block)
+ format = self.math_output
# LaTeX container
wrappers = {
# math_mode: (inline, block)
@@ -1289,6 +1292,7 @@
'latex': (None, None),
'mathml': ('$%s$', '\\begin{%s}\n%s\n\\end{%s}'),
'mathjax': (r'\(%s\)', '\\begin{%s}\n%s\n\\end{%s}'),
+ 'problematic': (None, None),
}
wrapper = wrappers[self.math_output][math_env != '']
if (self.math_output == 'mathml'
@@ -1302,73 +1306,65 @@
math_code = wrapper % (math_env, math_code, math_env)
except TypeError: # wrapper with one "%s"
math_code = wrapper % math_code
- # settings and conversion
- if self.math_output in ('latex', 'mathjax'):
- math_code = self.encode(math_code)
- if self.math_output == 'mathjax' and not self.math_header:
- if self.math_options:
- self.mathjax_url = self.math_options
- else:
- self.document.reporter.warning(
- 'No MathJax URL specified, using local fallback '
- '(see config.html).', base_node=node)
- # append configuration, if not already present in the URL:
- # input LaTeX with AMS, output common HTML
- if '?' not in self.mathjax_url:
- self.mathjax_url += '?config=TeX-AMS_CHTML'
- self.math_header = [self.mathjax_script % self.mathjax_url]
- elif self.math_output == 'html':
+
+ # preamble code and conversion
+ if format == 'html':
if self.math_options and not self.math_header:
- self.math_header = [self.stylesheet_call(
- utils.find_file_in_dirs(s, self.settings.stylesheet_dirs),
- adjust_path=True)
+ self.math_header = [
+ self.stylesheet_call(utils.find_file_in_dirs(
+ s, self.settings.stylesheet_dirs), adjust_path=True)
for s in self.math_options.split(',')]
# TODO: fix display mode in matrices and fractions
- math2html.DocumentParameters.displaymode = (math_env != '')
+ math2html.DocumentParameters.displaymode = is_block
math_code = math2html.math2html(math_code)
- elif self.math_output == 'mathml':
+ elif format == 'latex':
+ math_code = self.encode(math_code)
+ elif format == 'mathjax':
+ if not self.math_header:
+ if self.math_options:
+ self.mathjax_url = self.math_options
+ else:
+ self.document.reporter.warning(
+ 'No MathJax URL specified, using local fallback '
+ '(see config.html).', base_node=node)
+ # append MathJax configuration
+ # (input LaTeX with AMS, output common HTML):
+ if '?' not in self.mathjax_url:
+ self.mathjax_url += '?config=TeX-AMS_CHTML'
+ self.math_header = [self.mathjax_script % self.mathjax_url]
+ math_code = self.encode(math_code)
+ elif format == 'mathml':
if 'XHTML 1' in self.doctype:
self.doctype = self.doctype_mathml
self.content_type = self.content_type_mathml
- converter = self.math_options
+ if self.math_options:
+ converter = getattr(tex2mathml_extern, self.math_options)
+ else:
+ converter = latex2mathml.tex2mathml
try:
- if converter == 'latexml':
- math_code = tex2mathml_extern.latexml(
- math_code, self.document.reporter)
- elif converter == 'ttm':
- math_code = tex2mathml_extern.ttm(
- math_code, self.document.reporter)
- elif converter == 'blahtexml':
- math_code = tex2mathml_extern.blahtexml(
- math_code,
- inline=(not math_env),
- reporter=self.document.reporter)
- elif converter == 'pandoc':
- math_code = tex2mathml_extern.pandoc(
- math_code,
- reporter=self.document.reporter)
- elif not converter:
- math_code = latex2mathml.tex2mathml(
- math_code, inline=(not math_env))
+ math_code = converter(math_code, as_block=is_block)
+ except (MathError, OSError) as err:
+ details = getattr(err, 'details', [])
+ self.messages.append(self.document.reporter.warning(
+ err, *details, base_node=node))
+ math_code = self.encode(node.astext())
+ if self.settings.report_level <= 2:
+ format = 'problematic'
else:
- self.document.reporter.error('option "%s" not supported '
- 'with math-output "MathML"')
- except (OSError, SyntaxError) as err:
- self.messages.append(
- self.document.reporter.error(err, base_node=node))
- math_code = self.encode(math_code)
+ format = 'latex'
+ if isinstance(err, OSError):
+ # report missing converter only once
+ self.math_output = format
+
# append to document body
- tag = self.math_tags[self.math_output][math_env == '']
- clsarg = self.math_tags[self.math_output][2]
+ tag = self.math_tags[format][is_block]
+ suffix = '\n' if is_block else ''
if tag:
- self.body.append(self.starttag(node, tag,
- suffix='\n'*bool(math_env),
- CLASS=clsarg))
- self.body.append(math_code)
- if math_env: # block mode (equation, display)
- self.body.append('\n')
+ self.body.append(self.starttag(node, tag, suffix=suffix,
+ classes=self.math_tags[format][2]))
+ self.body.extend([math_code, suffix])
if tag:
- self.body.append('</%s>' % tag)
+ self.body.append(f'</{tag}>{suffix}')
# Content already processed:
raise nodes.SkipChildren
@@ -1380,7 +1376,6 @@
self.visit_math(node, math_env=math_env)
def depart_math_block(self, node):
- self.body.append('\n')
self.report_messages(node)
# Meta tags: 'lang' attribute replaced by 'xml:lang' in XHTML 1.1
Modified: trunk/docutils/docutils/writers/html4css1/html4css1.css
===================================================================
--- trunk/docutils/docutils/writers/html4css1/html4css1.css 2024-01-12 11:32:31 UTC (rev 9510)
+++ trunk/docutils/docutils/writers/html4css1/html4css1.css 2024-01-13 09:50:07 UTC (rev 9511)
@@ -293,7 +293,7 @@
span.pre {
white-space: pre }
-span.problematic {
+span.problematic, pre.problematic {
color: red }
span.section-subtitle {
Modified: trunk/docutils/docutils/writers/html5_polyglot/minimal.css
===================================================================
--- trunk/docutils/docutils/writers/html5_polyglot/minimal.css 2024-01-12 11:32:31 UTC (rev 9510)
+++ trunk/docutils/docutils/writers/html5_polyglot/minimal.css 2024-01-13 09:50:07 UTC (rev 9511)
@@ -50,6 +50,7 @@
/* Warnings, Errors */
.system-messages h2,
.system-message-title,
+pre.problematic,
span.problematic {
color: red;
}
@@ -62,6 +63,9 @@
/* do not wrap at hyphens and similar: */
.literal > span.pre { white-space: nowrap; }
+/* keep line-breaks (\n) visible */
+.pre-wrap { white-space: pre-wrap; }
+
/* Lists */
/* compact and simple lists: no margin between items */
Added: trunk/docutils/test/extra/test_math_conversion.py
===================================================================
--- trunk/docutils/test/extra/test_math_conversion.py (rev 0)
+++ trunk/docutils/test/extra/test_math_conversion.py 2024-01-13 09:50:07 UTC (rev 9511)
@@ -0,0 +1,82 @@
+#!/usr/bin/env python3
+# :Copyright: © 2024 Günter Milde.
+
+# Released without warranty under the terms of the
+# GNU General Public License (v. 2 or later)
+
+# :License: Released under the terms of the `2-Clause BSD license`_, in short:
+#
+# Copying and distribution of this file, with or without modification,
+# are permitted in any medium without royalty provided the copyright
+# notice and this notice are preserved.
+# This file is offered as-is, without any warranty.
+#
+# .. _2-Clause BSD license: https://opensource.org/licenses/BSD-2-Clause
+
+from pathlib import Path
+
+from docutils.core import publish_string, publish_file
+
+TEST_ROOT = Path(__file__).parent.parent
+EXTRA = TEST_ROOT / 'extra'
+FUNCTIONAL = TEST_ROOT / 'functional'
+# EXPECTED = FUNCTIONAL / 'expected'
+INPUT = FUNCTIONAL / 'input'
+OUTPUT = EXTRA / 'output'
+
+sample = r"""
+There is ":math:`\lambda\omega\tau\varsigma`" to math.
+
+====================== ========== =======
+Das ist nicht lustig. Dafür das hier
+
+.. math:: So \isses
+====================== ========== =======
+
+
+Das ist :math:`\ein` Fehler.
+
+.. math:: \int_{-\infty} 3 \sinc x \phy \, d\gamma
+
+.. sidebar:: nebenbemerkung
+
+ noch was :math:`\sqrt[2]`
+
+.. math:: \sqrt[2]
+
+2-zeilig mit align:
+
+.. math::
+ s_i & = 3 \\
+ s_j + 3 &
+
+"""
+
+source_path = INPUT / 'data' / 'math.txt'
+
+settings = {'_disable_config': True,
+ 'report_level': 2, # warning
+ # 'report_level': 3, # error
+ # 'report_level': 4, # severe
+ }
+
+math_options = (['mathml', ''],
+ ['mathml', 'ttm'],
+ ['mathml', 'blahtexml'],
+ ['mathml', 'pandoc'],
+ # ['mathml', 'latexml'], # VERY slow
+ )
+
+for math_output in math_options:
+ settings['math_output'] = math_output
+ out_path = OUTPUT / f'math_output_{"_".join(math_output).strip("_")}.html'
+ html = publish_file(source_path=str(source_path),
+ destination_path=str(out_path),
+ writer_name='html5', settings_overrides=settings)
+
+ out_path = OUTPUT / f'buggy_{"_".join(math_output).strip("_")}.html'
+ html = publish_string(sample, 'mathml-test', writer_name='html5',
+ settings_overrides=settings)
+
+ with open(out_path, "wb") as fd:
+ fd.write(html)
Property changes on: trunk/docutils/test/extra/test_math_conversion.py
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+Author Date Id Revision
\ No newline at end of property
Modified: trunk/docutils/test/test_writers/test_html5_polyglot_parts.py
===================================================================
--- trunk/docutils/test/test_writers/test_html5_polyglot_parts.py 2024-01-12 11:32:31 UTC (rev 9510)
+++ trunk/docutils/test/test_writers/test_html5_polyglot_parts.py 2024-01-13 09:50:07 UTC (rev 9511)
@@ -694,10 +694,10 @@
[r"""Broken :math:`\sin \my`.
""",
{'fragment': """\
-<p>Broken \\sin \\my.</p>
+<p>Broken <span class="math problematic">\\sin \\my</span>.</p>
<aside class="system-message">
-<p class="system-message-title">System Message: ERROR/3 (<span class="docutils literal"><string></span>, line 1)</p>
-<p>Unknown LaTeX command: \\my</p>
+<p class="system-message-title">System Message: WARNING/2 (<span class="docutils literal"><string></span>, line 1)</p>
+<p>Unknown LaTeX command "\\my".</p>
</aside>
"""}],
])
@@ -710,7 +710,7 @@
}, [
[r"""Broken :math:`\sin \my`.
""",
-{'fragment': '<p>Broken \\sin \\my.</p>\n'
+{'fragment': '<p>Broken <tt class="math">\\sin \\my</tt>.</p>\n'
}],
])
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|