|
From: <mi...@us...> - 2021-06-17 09:59:22
|
Revision: 8763
http://sourceforge.net/p/docutils/code/8763
Author: milde
Date: 2021-06-17 09:59:21 +0000 (Thu, 17 Jun 2021)
Log Message:
-----------
MathML: Refactor negating with ``\not``, simplify code.
Reorganise character data.
Allow all operators to be negated by preceding ``\not``.
Use Unicode normalization to get available pre-composed characters.
Modified Paths:
--------------
trunk/docutils/docs/ref/rst/mathematics.txt
trunk/docutils/docutils/utils/math/latex2mathml.py
Modified: trunk/docutils/docs/ref/rst/mathematics.txt
===================================================================
--- trunk/docutils/docs/ref/rst/mathematics.txt 2021-06-17 09:59:05 UTC (rev 8762)
+++ trunk/docutils/docs/ref/rst/mathematics.txt 2021-06-17 09:59:21 UTC (rev 8763)
@@ -489,9 +489,11 @@
LateX2MathML, as there is no corresponding Unicode character.
Synonyms: `\ne` ``\ne``, `\le` ``\le``, `\ge` ``\ge``,
-`\Doteq` ``\Doteq``, `\llless` ``\llless``, `\gggtr` ``\gggtr``,
-`\not=` ``\not=``, `\not\equiv` ``\not\equiv``.
+`\Doteq` ``\Doteq``, `\llless` ``\llless``, `\gggtr` ``\gggtr``.
+Symbols can be negated prepending ``\not``, e.g.
+`\not=` ``\not=``, `\not\equiv` ``\not\equiv``,
+`\not\gtrless` ``\not\gtrless``, `\not\lessgtr` ``\not\lessgtr``.
Miscellaneous relations
~~~~~~~~~~~~~~~~~~~~~~~
@@ -522,8 +524,11 @@
`\ntriangleleft` ``\ntriangleleft`` `\Subset` ``\Subset``
===================== ======================= =================== ===================== =================== =====================
-Synonyms: `\owns` ``\owns``, `\not\in` ``\not\in``.
+Synonyms: `\owns` ``\owns``.
+Symbols can be negated prepending ``\not``, e.g.
+`\not\in` ``\not\in``, `\not\ni` ``\not\ni``.
+
The commands ``\varsubsetneq``, ``\varsubsetneqq``, ``\varsupsetneq``,
and ``\varsupsetneqq`` are not supported by LateX2MathML, as there is no
corresponding Unicode character.
@@ -790,13 +795,13 @@
The text may contain math commands wrapped in ``$`` signs, e.g.
-.. math:: (-1)^{n_i} = \begin{cases} -1 \quad \text{if $n_i$ is odd} \\
+.. math:: (-1)^{n_i} = \begin{cases} -1 \quad \text{if $n_i$ is odd,} \\
+1 \quad \text{if $n_i$ is even.}
\end{cases}
.. TODO ignore {}, handle text-mode commands
-
+
.. TODO: ``\mod`` and its relatives
--------------------------
@@ -917,7 +922,9 @@
<mtd>, <mscarry>, and <math> treat their contents as a single inferred mrow
formed from all their children.
-.. math:: a = \sqrt 2 + x, b = \sqrt{1+x^2}, c = \sqrt\frac{\sin(x)}{23},
+.. math:: a = \sqrt 2 + x,\quad
+ b = \sqrt{1+x^2},\quad
+ c = \sqrt\frac{\sin(x)}{23},
inline: :math:`a = \sqrt 2 + x, b = \sqrt{1+x^2}, c = \sqrt\frac{\sin(x)}{23}`.
@@ -942,8 +949,8 @@
.. math:: \text{das ist ein {toller} text (unescaped \{ and \} is
ignored by LaTeX)}
-Big delimiters
-~~~~~~~~~~~~~~
+Big delimiters and symbols
+~~~~~~~~~~~~~~~~~~~~~~~~~~
Test ``\left``, ``\right``, and the \bigl/\bigr, … size commands
with extensible delimiters.
@@ -981,3 +988,20 @@
\quad
\left.\vert \frac{b}{a}\right\Vert\ \bigl\vert b\Bigr\Vert\ \biggl\vert b\Biggr\Vert
\quad
+
+
+Variable-sized operators:
+
+Inline: `\sum\ \int\ \oint\ \smallint\ \prod\ \coprod\ \bigwedge\
+\bigvee\ \bigcap\ \bigcup\ \biguplus\ \bigsqcup\ \bigodot\ \bigoplus\
+\bigotimes` and Display:
+
+.. math:: \sum\ \int\ \oint\ \smallint\ \prod\ \coprod\ \bigwedge
+ \ \bigvee\ \bigcap\ \bigcup\ \biguplus\ \bigsqcup\ \bigodot
+ \ \bigoplus\ \bigotimes
+ \ \iiint\ \iiiint
+
+Text
+~~~~
+
+The text may contain non-ASCII characters: `n_\text{Stoß}`.
Modified: trunk/docutils/docutils/utils/math/latex2mathml.py
===================================================================
--- trunk/docutils/docutils/utils/math/latex2mathml.py 2021-06-17 09:59:05 UTC (rev 8762)
+++ trunk/docutils/docutils/utils/math/latex2mathml.py 2021-06-17 09:59:21 UTC (rev 8763)
@@ -26,6 +26,7 @@
import collections
import re
import sys
+import unicodedata
if sys.version_info >= (3, 0):
unicode = str # noqa
@@ -35,13 +36,6 @@
# Character data
# --------------
-# Named XML entities for invalid and invisible characters
-xml_entities = {ord('<'): u'<',
- ord('>'): u'>',
- ord('&'): u'&',
- 0x2061: u'⁡',
- }
-
# LaTeX math macro to Unicode mappings.
# Character categories.
@@ -103,43 +97,60 @@
# operator, fence, or separator -> <mo>
-math_fences = {# mathfence aliases with adapted spacing
- 'lvert': u'|', # left |
- 'lVert': u'\u2016', # left ‖
- 'rvert': u'|', # right |
- 'rVert': u'\u2016', # right ‖
- 'Arrowvert': u'\u2016', # ‖
- }
+stretchables = {# extensible delimiters allowed in left/right cmds
+ 'backslash': '\\',
+ 'uparrow': u'\u2191', # ↑ UPWARDS ARROW
+ 'downarrow': u'\u2193', # ↓ DOWNWARDS ARROW
+ 'updownarrow': u'\u2195', # ↕ UP DOWN ARROW
+ 'Uparrow': u'\u21d1', # ⇑ UPWARDS DOUBLE ARROW
+ 'Downarrow': u'\u21d3', # ⇓ DOWNWARDS DOUBLE ARROW
+ 'Updownarrow': u'\u21d5', # ⇕ UP DOWN DOUBLE ARROW
+ 'lmoustache': u'\u23b0', # ⎰ UPPER LEFT OR LOWER RIGHT CURLY BRACKET SECTION
+ 'rmoustache': u'\u23b1', # ⎱ UPPER RIGHT OR LOWER LEFT CURLY BRACKET SECTION
+ 'arrowvert': u'\u23d0', # ⏐ VERTICAL LINE EXTENSION
+ 'bracevert': u'\u23aa', # ⎪ CURLY BRACKET EXTENSION
+ 'lvert': u'|', # left |
+ 'lVert': u'\u2016', # left ‖
+ 'rvert': u'|', # right |
+ 'rVert': u'\u2016', # right ‖
+ 'Arrowvert': u'\u2016', # ‖
+ }
+stretchables.update(tex2unichar.mathfence)
+stretchables.update(tex2unichar.mathopen) # Braces
+stretchables.update(tex2unichar.mathclose) # Braces
-operators = tex2unichar.mathbin # Binary symbols
+# >>> print(' '.join(sorted(set(stretchables.values()))))
+# [ \ ] { | } ‖ ↑ ↓ ↕ ⇑ ⇓ ⇕ ⌈ ⌉ ⌊ ⌋ ⌜ ⌝ ⌞ ⌟ ⎪ ⎰ ⎱ ⏐ ⟅ ⟆ ⟦ ⟧ ⟨ ⟩ ⟮ ⟯ ⦇ ⦈
+
+operators = {# negated symbols without pre-composed Unicode character
+ 'nleqq': u'\u2266\u0338', # ≦̸
+ 'ngeqq': u'\u2267\u0338', # ≧̸
+ 'nleqslant': u'\u2a7d\u0338', # ⩽̸
+ 'ngeqslant': u'\u2a7e\u0338', # ⩾̸
+ 'ngtrless': u'\u2277\u0338', # txfonts
+ 'nlessgtr': u'\u2276\u0338', # txfonts
+ 'nsubseteqq': u'\u2AC5\u0338', # ⫅̸
+ 'nsupseteqq': u'\u2AC6\u0338', # ⫆̸
+ # alias commands:
+ 'dotsb': u'\u22ef', # ⋯ with binary operators/relations
+ 'dotsc': u'\u2026', # … with commas
+ 'dotsi': u'\u22ef', # ⋯ with integrals
+ 'dotsm': u'\u22ef', # ⋯ multiplication dots
+ 'dotso': u'\u2026', # … other dots
+ # functions with movable limits (requires <mo>)
+ 'lim': 'lim',
+ 'sup': 'sup',
+ 'inf': 'inf',
+ 'max': 'max',
+ 'min': 'min',
+ }
+operators.update(tex2unichar.mathbin) # Binary symbols
operators.update(tex2unichar.mathrel) # Relation symbols, arrow symbols
operators.update(tex2unichar.mathord) # Miscellaneous symbols
operators.update(tex2unichar.mathop) # Variable-sized symbols
-operators.update(tex2unichar.mathopen) # Braces
-operators.update(tex2unichar.mathclose) # Braces
-operators.update(tex2unichar.mathfence)
-operators.update(math_fences)
-operators.update({# negated symbols without pre-composed Unicode character
- 'nleqq': u'\u2266\u0338', # ≦̸
- 'ngeqq': u'\u2267\u0338', # ≧̸
- 'nleqslant': u'\u2a7d\u0338', # ⩽̸
- 'ngeqslant': u'\u2a7e\u0338', # ⩾̸
- 'nsubseteqq': u'\u2AC5\u0338', # ⫅̸
- 'nsupseteqq': u'\u2AC6\u0338', # ⫆̸
- # alias commands:
- 'dotsb': u'\u22ef', # ⋯ with binary operators/relations
- 'dotsc': u'\u2026', # … with commas
- 'dotsi': u'\u22ef', # ⋯ with integrals
- 'dotsm': u'\u22ef', # ⋯ multiplication dots
- 'dotso': u'\u2026', # … other dots
- # functions with movable limits (requires <mo>)
- 'lim': 'lim',
- 'sup': 'sup',
- 'inf': 'inf',
- 'max': 'max',
- 'min': 'min',
- })
+operators.update(stretchables)
+
# special cases
thick_operators = {# style='font-weight: bold;'
@@ -171,38 +182,10 @@
# 'varointclockwise',))
-# pre-composed characters for negated symbols
-# see https://www.w3.org/TR/xml-entity-names/#combining
-negatables = {'=': u'\u2260',
- r'\in': u'\u2209',
- r'\equiv': u'\u2262'}
-
-# extensible delimiters allowed in left/right cmds
-stretchables = {'backslash': '\\',
- 'uparrow': u'\u2191', # ↑ UPWARDS ARROW
- 'downarrow': u'\u2193', # ↓ DOWNWARDS ARROW
- 'updownarrow': u'\u2195', # ↕ UP DOWN ARROW
- 'Uparrow': u'\u21d1', # ⇑ UPWARDS DOUBLE ARROW
- 'Downarrow': u'\u21d3', # ⇓ DOWNWARDS DOUBLE ARROW
- 'Updownarrow': u'\u21d5', # ⇕ UP DOWN DOUBLE ARROW
- 'lmoustache': u'\u23b0', # ⎰ UPPER LEFT OR LOWER RIGHT CURLY BRACKET SECTION
- 'rmoustache': u'\u23b1', # ⎱ UPPER RIGHT OR LOWER LEFT CURLY BRACKET SECTION
- 'arrowvert': u'\u23d0', # ⏐ VERTICAL LINE EXTENSION
- 'bracevert': u'\u23aa', # ⎪ CURLY BRACKET EXTENSION
- }
-stretchables.update(tex2unichar.mathfence)
-stretchables.update(tex2unichar.mathopen)
-stretchables.update(tex2unichar.mathclose)
-stretchables.update(math_fences)
-
-# >>> print(' '.join(sorted(set(stretchables.values()))))
-# [ \ ] { | } ‖ ↑ ↓ ↕ ⇑ ⇓ ⇕ ⌈ ⌉ ⌊ ⌋ ⌜ ⌝ ⌞ ⌟ ⎪ ⎰ ⎱ ⏐ ⟅ ⟆ ⟦ ⟧ ⟨ ⟩ ⟮ ⟯ ⦇ ⦈
-
# horizontal space -> <mspace>
spaces = {'qquad': '2em', # two \quad
'quad': '1em', # 18 mu
- 'qquad': '2em', # two \quad
'thickspace': '0.2778em', # 5mu = 5/18em
'medspace': '0.2222em', # 4mu = 2/9em
'thinspace': '0.1667em', # 3mu = 1/6em
@@ -210,9 +193,9 @@
'negmedspace': '-0.2222em', # -4mu = -2/9em
'negthickspace': '-0.2778em', # -5mu = -5/18em
' ': '0.25em', # inter word space
- ';': '0.2778em', # thickspace
- ':': '0.2222em', # medspace
- ',': '0.1667em', # thinspace
+ ';': '0.2778em', # 5mu thickspace
+ ':': '0.2222em', # 4mu medspace
+ ',': '0.1667em', # 3mu thinspace
'!': '-0.1667em', # negthinspace
}
@@ -320,6 +303,12 @@
parent = None
"""Parent node in MathML DOM tree."""
_level = 0 # indentation level (static class variable)
+ xml_entities = { # for invalid and invisible characters
+ ord('<'): u'<',
+ ord('>'): u'>',
+ ord('&'): u'&',
+ 0x2061: u'⁡',
+ }
def __init__(self, *children, **attributes):
"""Set up node with `children` and `attributes`.
@@ -495,7 +484,7 @@
super(MathToken, self).__init__(**attributes)
def _xml_body(self, level=0):
- return [unicode(self.data).translate(xml_entities)]
+ return [unicode(self.data).translate(self.xml_entities)]
class mi(MathToken): pass
class mn(MathToken): pass
@@ -773,7 +762,7 @@
tree = node
while len(string) > 0:
- # Take of first character:
+ # Take off first character:
c, string = string[0], string[1:]
if c == ' ':
@@ -964,11 +953,16 @@
return node, string
if name == 'not':
- arg, string = tex_token_or_group(string)
- try:
- node = node.append(mo(negatables[arg]))
- except KeyError:
- raise SyntaxError(u'"\\not: Cannot negate: %s!'%arg)
+ arg, string = tex_token(string)
+ if arg == '{':
+ return node, '{\\not ' + string
+ if arg.startswith('\\'): # LaTeX macro
+ try:
+ arg = operators[arg[1:]]
+ except KeyError:
+ raise SyntaxError(u'\\not: Cannot negate: "%s"!'%arg)
+ arg = unicodedata.normalize('NFC', arg+u'\u0338')
+ node = node.append(mo(arg))
return node, string
# arbitrary text (usually comments) -> <mtext>
@@ -1334,16 +1328,16 @@
# TODO: look up more symbols from tr25, e.g.
-#
-#
+#
+#
# Table 2.8 Using Vertical Line or Solidus Overlay
# some of the negated forms of mathematical relations that can only be
# encoded by using either U+0338 COMBINING LONG SOLIDUS OVERLAY or U+20D2
# COMBINING LONG VERTICAL LINE OVERLAY . (For issues with using 0338 in
# MathML, see Section 3.2.7, Combining Marks.
-#
+#
# Table 2.9 Variants of Mathematical Symbols using VS1?
-#
+#
# Sequence Description
# 0030 + VS1 DIGIT ZERO - short diagonal stroke form
# 2205 + VS1 EMPTY SET - zero with long diagonal stroke overlay form
@@ -1370,4 +1364,3 @@
# 2AAD + VS1 LARGER THAN OR slanted EQUAL
# 2ACB + VS1 SUBSET OF ABOVE NOT EQUAL TO - variant with stroke through bottom members
# 2ACC + VS1 SUPERSET OF ABOVE NOT EQUAL TO - variant with stroke through bottom members
-
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|