From: Stephen F. <st...@th...> - 2019-08-25 15:57:37
|
While the 'u' prefix was backported to Python 3.3 or thereabouts, 'ur' remains invalid in Python 3. Just escape all backslashes and use plain old 'u'. Signed-off-by: Stephen Finucane <st...@th...ru> --- docutils/docutils/utils/math/latex2mathml.py | 28 +- .../docutils/utils/math/tex2mathml_extern.py | 2 +- docutils/docutils/utils/smartquotes.py | 24 +- docutils/docutils/writers/latex2e/__init__.py | 280 +++++++++--------- docutils/docutils/writers/manpage.py | 8 +- .../test_rst/test_east_asian_text.py | 4 +- .../tools/dev/generate_punctuation_chars.py | 10 +- 7 files changed, 181 insertions(+), 175 deletions(-) diff --git a/docutils/docutils/utils/math/latex2mathml.py b/docutils/docutils/utils/math/latex2mathml.py index 09b9c8e02..d97005221 100644 --- a/docutils/docutils/utils/math/latex2mathml.py +++ b/docutils/docutils/utils/math/latex2mathml.py @@ -6,12 +6,12 @@ # Based on rst2mathml.py from the latex_math sandbox project # © 2005 Jens Jørgen Mortensen # :License: Released under the terms of the `2-Clause BSD license`_, in short: -# +# # Copying and distribution of this file, with or without modification, # are permitted in any medium without royalty provided the copyright # notice and this notice are preserved. # This file is offered as-is, without any warranty. -# +# # .. _2-Clause BSD license: http://www.spdx.org/licenses/BSD-2-Clause @@ -412,7 +412,7 @@ def parse_latex_math(string, inline=True): node = entry skip = 2 else: - raise SyntaxError(ur'Syntax error: "%s%s"' % (c, c2)) + raise SyntaxError(u'Syntax error: "%s%s"' % (c, c2)) elif c.isalpha(): node = node.append(mi(c)) elif c.isdigit(): @@ -453,7 +453,7 @@ def parse_latex_math(string, inline=True): node.close().append(entry) node = entry else: - raise SyntaxError(ur'Illegal character: "%s"' % c) + raise SyntaxError(u'Illegal character: "%s"' % c) string = string[skip:] return tree @@ -474,15 +474,15 @@ def handle_keyword(name, node, string): node = entry elif name == 'end': if not string.startswith('{matrix}'): - raise SyntaxError(ur'Expected "\end{matrix}"!') + raise SyntaxError(u'Expected "\\end{matrix}"!') skip += 8 node = node.close().close().close() elif name in ('text', 'mathrm'): if string[0] != '{': - raise SyntaxError(ur'Expected "\text{...}"!') + raise SyntaxError(u'Expected "\\text{...}"!') i = string.find('}') if i == -1: - raise SyntaxError(ur'Expected "\text{...}"!') + raise SyntaxError(u'Expected "\\text{...}"!') node = node.append(mtext(string[1:i])) skip += i + 1 elif name == 'sqrt': @@ -520,7 +520,7 @@ def handle_keyword(name, node, string): if string.startswith(operator): break else: - raise SyntaxError(ur'Expected something to negate: "\not ..."!') + raise SyntaxError(u'Expected something to negate: "\\not ..."!') node = node.append(mo(negatables[operator])) skip += len(operator) elif name == 'mathbf': @@ -529,12 +529,12 @@ def handle_keyword(name, node, string): node = style elif name == 'mathbb': if string[0] != '{' or not string[1].isupper() or string[2] != '}': - raise SyntaxError(ur'Expected something like "\mathbb{A}"!') + raise SyntaxError(u'Expected something like "\\mathbb{A}"!') node = node.append(mi(mathbb[string[1]])) skip += 3 elif name in ('mathscr', 'mathcal'): if string[0] != '{' or string[2] != '}': - raise SyntaxError(ur'Expected something like "\mathscr{A}"!') + raise SyntaxError(u'Expected something like "\\mathscr{A}"!') node = node.append(mi(mathscr[string[1]])) skip += 3 elif name == 'colon': # "normal" colon, not binary operator @@ -559,12 +559,10 @@ def handle_keyword(name, node, string): return node, skip def tex2mathml(tex_math, inline=True): - """Return string with MathML code corresponding to `tex_math`. - + """Return string with MathML code corresponding to `tex_math`. + `inline`=True is for inline math and `inline`=False for displayed math. """ - + mathml_tree = parse_latex_math(tex_math, inline=inline) return ''.join(mathml_tree.xml()) - - diff --git a/docutils/docutils/utils/math/tex2mathml_extern.py b/docutils/docutils/utils/math/tex2mathml_extern.py index 5bd6ad703..03bd4c1f8 100644 --- a/docutils/docutils/utils/math/tex2mathml_extern.py +++ b/docutils/docutils/utils/math/tex2mathml_extern.py @@ -141,5 +141,5 @@ def blahtexml(math_code, inline=True, reporter=None): # self-test if __name__ == "__main__": - example = ur'\frac{\partial \sin^2(\alpha)}{\partial \vec r} \varpi \, \text{Grüße}' + example = u'\\frac{\\partial \\sin^2(\\alpha)}{\\partial \\vec r} \\varpi \\, \\text{Grüße}' # noqa print(blahtexml(example).encode('utf8')) diff --git a/docutils/docutils/utils/smartquotes.py b/docutils/docutils/utils/smartquotes.py index 0f9b74007..4a0174552 100644 --- a/docutils/docutils/utils/smartquotes.py +++ b/docutils/docutils/utils/smartquotes.py @@ -658,20 +658,21 @@ def educateQuotes(text, language='en'): text = re.sub(r"'(?=\d{2}s)", smart.apostrophe, text) # Get most opening single quotes: - opening_single_quotes_regex = re.compile(ur""" + opening_single_quotes_regex = re.compile(u""" (# ?<= # look behind fails: requires fixed-width pattern - \s | # a whitespace char, or + \\s | # a whitespace char, or %s | # another separating char, or | # a non-breaking space entity, or - [–—] | # literal dashes, or + [\u2013 \u2014 ] | # literal dashes, or -- | # dumb dashes, or &[mn]dash; | # dash entities (named or %s | # decimal or - &\#x201[34]; # hex) + &\\#x201[34]; # hex) ) ' # the quote - (?=\w) # followed by a word character - """ % (open_class,dec_dashes), re.VERBOSE | re.UNICODE) + (?=\\w) # followed by a word character + """ % (open_class, dec_dashes), re.VERBOSE | re.UNICODE) + text = opening_single_quotes_regex.sub(r'\1'+smart.osquote, text) # In many locales, single closing quotes are different from apostrophe: @@ -691,20 +692,21 @@ def educateQuotes(text, language='en'): text = re.sub(r"""'""", smart.osquote, text) # Get most opening double quotes: - opening_double_quotes_regex = re.compile(ur""" + opening_double_quotes_regex = re.compile(u""" ( - \s | # a whitespace char, or + \\s | # a whitespace char, or %s | # another separating char, or | # a non-breaking space entity, or - [–—] | # literal dashes, or + [\u2013 \u2014 ] | # literal dashes, or -- | # dumb dashes, or &[mn]dash; | # dash entities (named or %s | # decimal or - &\#x201[34]; # hex) + &\\#x201[34]; # hex) ) " # the quote - (?=\w) # followed by a word character + (?=\\w) # followed by a word character """ % (open_class,dec_dashes), re.VERBOSE | re.UNICODE) + text = opening_double_quotes_regex.sub(r'\1'+smart.opquote, text) # Double closing quotes: diff --git a/docutils/docutils/writers/latex2e/__init__.py b/docutils/docutils/writers/latex2e/__init__.py index 62b52731c..abb38e455 100644 --- a/docutils/docutils/writers/latex2e/__init__.py +++ b/docutils/docutils/writers/latex2e/__init__.py @@ -705,165 +705,165 @@ class CharMaps(object): # characters that need escaping even in `alltt` environments: alltt = { - ord('\\'): ur'\textbackslash{}', - ord('{'): ur'\{', - ord('}'): ur'\}', + ord('\\'): u'\\textbackslash{}', + ord('{'): u'\\{', + ord('}'): u'\\}', } # characters that normally need escaping: special = { - ord('#'): ur'\#', - ord('$'): ur'\$', - ord('%'): ur'\%', - ord('&'): ur'\&', - ord('~'): ur'\textasciitilde{}', - ord('_'): ur'\_', - ord('^'): ur'\textasciicircum{}', + ord('#'): u'\\#', + ord('$'): u'\\$', + ord('%'): u'\\%', + ord('&'): u'\\&', + ord('~'): u'\\textasciitilde{}', + ord('_'): u'\\_', + ord('^'): u'\\textasciicircum{}', # straight double quotes are 'active' in many languages - ord('"'): ur'\textquotedbl{}', + ord('"'): u'\\textquotedbl{}', # Square brackets are ordinary chars and cannot be escaped with '\', # so we put them in a group '{[}'. (Alternative: ensure that all # macros with optional arguments are terminated with {} and text # inside any optional argument is put in a group ``[{text}]``). # Commands with optional args inside an optional arg must be put in a # group, e.g. ``\item[{\hyperref[label]{text}}]``. - ord('['): ur'{[}', - ord(']'): ur'{]}', + ord('['): u'{[}', + ord(']'): u'{]}', # the soft hyphen is unknown in 8-bit text # and not properly handled by XeTeX - 0x00AD: ur'\-', # SOFT HYPHEN + 0x00AD: u'\\-', # SOFT HYPHEN } # Unicode chars that are not recognized by LaTeX's utf8 encoding unsupported_unicode = { # TODO: ensure white space also at the beginning of a line? - # 0x00A0: ur'\leavevmode\nobreak\vadjust{}~' - 0x2000: ur'\enskip', # EN QUAD - 0x2001: ur'\quad', # EM QUAD - 0x2002: ur'\enskip', # EN SPACE - 0x2003: ur'\quad', # EM SPACE - 0x2008: ur'\,', # PUNCTUATION SPACE - 0x200b: ur'\hspace{0pt}', # ZERO WIDTH SPACE - 0x202F: ur'\,', # NARROW NO-BREAK SPACE - # 0x02d8: ur'\\u{ }', # BREVE - 0x2011: ur'\hbox{-}', # NON-BREAKING HYPHEN - 0x212b: ur'\AA', # ANGSTROM SIGN - 0x21d4: ur'\ensuremath{\Leftrightarrow}', + # 0x00A0: u'\\leavevmode\\nobreak\\vadjust{}~' + 0x2000: u'\\enskip', # EN QUAD + 0x2001: u'\\quad', # EM QUAD + 0x2002: u'\\enskip', # EN SPACE + 0x2003: u'\\quad', # EM SPACE + 0x2008: u'\\,', # PUNCTUATION SPACE + 0x200b: u'\\hspace{0pt}', # ZERO WIDTH SPACE + 0x202F: u'\\,', # NARROW NO-BREAK SPACE + # 0x02d8: u'\\\u{ }', # BREVE + 0x2011: u'\\hbox{-}', # NON-BREAKING HYPHEN + 0x212b: u'\\AA', # ANGSTROM SIGN + 0x21d4: u'\\ensuremath{\\Leftrightarrow}', # Docutils footnote symbols: - 0x2660: ur'\ensuremath{\spadesuit}', - 0x2663: ur'\ensuremath{\clubsuit}', - 0xfb00: ur'ff', # LATIN SMALL LIGATURE FF - 0xfb01: ur'fi', # LATIN SMALL LIGATURE FI - 0xfb02: ur'fl', # LATIN SMALL LIGATURE FL - 0xfb03: ur'ffi', # LATIN SMALL LIGATURE FFI - 0xfb04: ur'ffl', # LATIN SMALL LIGATURE FFL + 0x2660: u'\\ensuremath{\\spadesuit}', + 0x2663: u'\\ensuremath{\\clubsuit}', + 0xfb00: u'ff', # LATIN SMALL LIGATURE FF + 0xfb01: u'fi', # LATIN SMALL LIGATURE FI + 0xfb02: u'fl', # LATIN SMALL LIGATURE FL + 0xfb03: u'ffi', # LATIN SMALL LIGATURE FFI + 0xfb04: u'ffl', # LATIN SMALL LIGATURE FFL } # Unicode chars that are recognized by LaTeX's utf8 encoding utf8_supported_unicode = { - 0x00A0: ur'~', # NO-BREAK SPACE - 0x00AB: ur'\guillemotleft{}', # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00bb: ur'\guillemotright{}', # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x200C: ur'\textcompwordmark{}', # ZERO WIDTH NON-JOINER - 0x2013: ur'\textendash{}', - 0x2014: ur'\textemdash{}', - 0x2018: ur'\textquoteleft{}', - 0x2019: ur'\textquoteright{}', - 0x201A: ur'\quotesinglbase{}', # SINGLE LOW-9 QUOTATION MARK - 0x201C: ur'\textquotedblleft{}', - 0x201D: ur'\textquotedblright{}', - 0x201E: ur'\quotedblbase{}', # DOUBLE LOW-9 QUOTATION MARK - 0x2030: ur'\textperthousand{}', # PER MILLE SIGN - 0x2031: ur'\textpertenthousand{}', # PER TEN THOUSAND SIGN - 0x2039: ur'\guilsinglleft{}', - 0x203A: ur'\guilsinglright{}', - 0x2423: ur'\textvisiblespace{}', # OPEN BOX - 0x2020: ur'\dag{}', - 0x2021: ur'\ddag{}', - 0x2026: ur'\dots{}', - 0x2122: ur'\texttrademark{}', + 0x00A0: u'~', # NO-BREAK SPACE + 0x00AB: u'\\guillemotleft{}', # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bb: u'\\guillemotright{}', # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x200C: u'\\textcompwordmark{}', # ZERO WIDTH NON-JOINER + 0x2013: u'\\textendash{}', + 0x2014: u'\\textemdash{}', + 0x2018: u'\\textquoteleft{}', + 0x2019: u'\\textquoteright{}', + 0x201A: u'\\quotesinglbase{}', # SINGLE LOW-9 QUOTATION MARK + 0x201C: u'\\textquotedblleft{}', + 0x201D: u'\\textquotedblright{}', + 0x201E: u'\\quotedblbase{}', # DOUBLE LOW-9 QUOTATION MARK + 0x2030: u'\\textperthousand{}', # PER MILLE SIGN + 0x2031: u'\\textpertenthousand{}', # PER TEN THOUSAND SIGN + 0x2039: u'\\guilsinglleft{}', + 0x203A: u'\\guilsinglright{}', + 0x2423: u'\\textvisiblespace{}', # OPEN BOX + 0x2020: u'\\dag{}', + 0x2021: u'\\ddag{}', + 0x2026: u'\\dots{}', + 0x2122: u'\\texttrademark{}', } # recognized with 'utf8', if textcomp is loaded textcomp = { # Latin-1 Supplement - 0x00a2: ur'\textcent{}', # ¢ CENT SIGN - 0x00a4: ur'\textcurrency{}', # ¤ CURRENCY SYMBOL - 0x00a5: ur'\textyen{}', # ¥ YEN SIGN - 0x00a6: ur'\textbrokenbar{}', # ¦ BROKEN BAR - 0x00a7: ur'\textsection{}', # § SECTION SIGN - 0x00a8: ur'\textasciidieresis{}', # ¨ DIAERESIS - 0x00a9: ur'\textcopyright{}', # © COPYRIGHT SIGN - 0x00aa: ur'\textordfeminine{}', # ª FEMININE ORDINAL INDICATOR - 0x00ac: ur'\textlnot{}', # ¬ NOT SIGN - 0x00ae: ur'\textregistered{}', # ® REGISTERED SIGN - 0x00af: ur'\textasciimacron{}', # ¯ MACRON - 0x00b0: ur'\textdegree{}', # ° DEGREE SIGN - 0x00b1: ur'\textpm{}', # ± PLUS-MINUS SIGN - 0x00b2: ur'\texttwosuperior{}', # ² SUPERSCRIPT TWO - 0x00b3: ur'\textthreesuperior{}', # ³ SUPERSCRIPT THREE - 0x00b4: ur'\textasciiacute{}', # ´ ACUTE ACCENT - 0x00b5: ur'\textmu{}', # µ MICRO SIGN - 0x00b6: ur'\textparagraph{}', # ¶ PILCROW SIGN # != \textpilcrow - 0x00b9: ur'\textonesuperior{}', # ¹ SUPERSCRIPT ONE - 0x00ba: ur'\textordmasculine{}', # º MASCULINE ORDINAL INDICATOR - 0x00bc: ur'\textonequarter{}', # 1/4 FRACTION - 0x00bd: ur'\textonehalf{}', # 1/2 FRACTION - 0x00be: ur'\textthreequarters{}', # 3/4 FRACTION - 0x00d7: ur'\texttimes{}', # × MULTIPLICATION SIGN - 0x00f7: ur'\textdiv{}', # ÷ DIVISION SIGN + 0x00a2: u'\\textcent{}', # ¢ CENT SIGN + 0x00a4: u'\\textcurrency{}', # ¤ CURRENCY SYMBOL + 0x00a5: u'\\textyen{}', # ¥ YEN SIGN + 0x00a6: u'\\textbrokenbar{}', # ¦ BROKEN BAR + 0x00a7: u'\\textsection{}', # § SECTION SIGN + 0x00a8: u'\\textasciidieresis{}', # ¨ DIAERESIS + 0x00a9: u'\\textcopyright{}', # © COPYRIGHT SIGN + 0x00aa: u'\\textordfeminine{}', # ª FEMININE ORDINAL INDICATOR + 0x00ac: u'\\textlnot{}', # ¬ NOT SIGN + 0x00ae: u'\\textregistered{}', # ® REGISTERED SIGN + 0x00af: u'\\textasciimacron{}', # ¯ MACRON + 0x00b0: u'\\textdegree{}', # ° DEGREE SIGN + 0x00b1: u'\\textpm{}', # ± PLUS-MINUS SIGN + 0x00b2: u'\\texttwosuperior{}', # ² SUPERSCRIPT TWO + 0x00b3: u'\\textthreesuperior{}', # ³ SUPERSCRIPT THREE + 0x00b4: u'\\textasciiacute{}', # ´ ACUTE ACCENT + 0x00b5: u'\\textmu{}', # µ MICRO SIGN + 0x00b6: u'\\textparagraph{}', # ¶ PILCROW SIGN # != \textpilcrow + 0x00b9: u'\\textonesuperior{}', # ¹ SUPERSCRIPT ONE + 0x00ba: u'\\textordmasculine{}', # º MASCULINE ORDINAL INDICATOR + 0x00bc: u'\\textonequarter{}', # 1/4 FRACTION + 0x00bd: u'\\textonehalf{}', # 1/2 FRACTION + 0x00be: u'\\textthreequarters{}', # 3/4 FRACTION + 0x00d7: u'\\texttimes{}', # × MULTIPLICATION SIGN + 0x00f7: u'\\textdiv{}', # ÷ DIVISION SIGN # others - 0x0192: ur'\textflorin{}', # LATIN SMALL LETTER F WITH HOOK - 0x02b9: ur'\textasciiacute{}', # MODIFIER LETTER PRIME - 0x02ba: ur'\textacutedbl{}', # MODIFIER LETTER DOUBLE PRIME - 0x2016: ur'\textbardbl{}', # DOUBLE VERTICAL LINE - 0x2022: ur'\textbullet{}', # BULLET - 0x2032: ur'\textasciiacute{}', # PRIME - 0x2033: ur'\textacutedbl{}', # DOUBLE PRIME - 0x2035: ur'\textasciigrave{}', # REVERSED PRIME - 0x2036: ur'\textgravedbl{}', # REVERSED DOUBLE PRIME - 0x203b: ur'\textreferencemark{}', # REFERENCE MARK - 0x203d: ur'\textinterrobang{}', # INTERROBANG - 0x2044: ur'\textfractionsolidus{}', # FRACTION SLASH - 0x2045: ur'\textlquill{}', # LEFT SQUARE BRACKET WITH QUILL - 0x2046: ur'\textrquill{}', # RIGHT SQUARE BRACKET WITH QUILL - 0x2052: ur'\textdiscount{}', # COMMERCIAL MINUS SIGN - 0x20a1: ur'\textcolonmonetary{}', # COLON SIGN - 0x20a3: ur'\textfrenchfranc{}', # FRENCH FRANC SIGN - 0x20a4: ur'\textlira{}', # LIRA SIGN - 0x20a6: ur'\textnaira{}', # NAIRA SIGN - 0x20a9: ur'\textwon{}', # WON SIGN - 0x20ab: ur'\textdong{}', # DONG SIGN - 0x20ac: ur'\texteuro{}', # EURO SIGN - 0x20b1: ur'\textpeso{}', # PESO SIGN - 0x20b2: ur'\textguarani{}', # GUARANI SIGN - 0x2103: ur'\textcelsius{}', # DEGREE CELSIUS - 0x2116: ur'\textnumero{}', # NUMERO SIGN - 0x2117: ur'\textcircledP{}', # SOUND RECORDING COYRIGHT - 0x211e: ur'\textrecipe{}', # PRESCRIPTION TAKE - 0x2120: ur'\textservicemark{}', # SERVICE MARK - 0x2122: ur'\texttrademark{}', # TRADE MARK SIGN - 0x2126: ur'\textohm{}', # OHM SIGN - 0x2127: ur'\textmho{}', # INVERTED OHM SIGN - 0x212e: ur'\textestimated{}', # ESTIMATED SYMBOL - 0x2190: ur'\textleftarrow{}', # LEFTWARDS ARROW - 0x2191: ur'\textuparrow{}', # UPWARDS ARROW - 0x2192: ur'\textrightarrow{}', # RIGHTWARDS ARROW - 0x2193: ur'\textdownarrow{}', # DOWNWARDS ARROW - 0x2212: ur'\textminus{}', # MINUS SIGN - 0x2217: ur'\textasteriskcentered{}', # ASTERISK OPERATOR - 0x221a: ur'\textsurd{}', # SQUARE ROOT - 0x2422: ur'\textblank{}', # BLANK SYMBOL - 0x25e6: ur'\textopenbullet{}', # WHITE BULLET - 0x25ef: ur'\textbigcircle{}', # LARGE CIRCLE - 0x266a: ur'\textmusicalnote{}', # EIGHTH NOTE - 0x26ad: ur'\textmarried{}', # MARRIAGE SYMBOL - 0x26ae: ur'\textdivorced{}', # DIVORCE SYMBOL - 0x27e8: ur'\textlangle{}', # MATHEMATICAL LEFT ANGLE BRACKET - 0x27e9: ur'\textrangle{}', # MATHEMATICAL RIGHT ANGLE BRACKET + 0x0192: u'\\textflorin{}', # LATIN SMALL LETTER F WITH HOOK + 0x02b9: u'\\textasciiacute{}', # MODIFIER LETTER PRIME + 0x02ba: u'\\textacutedbl{}', # MODIFIER LETTER DOUBLE PRIME + 0x2016: u'\\textbardbl{}', # DOUBLE VERTICAL LINE + 0x2022: u'\\textbullet{}', # BULLET + 0x2032: u'\\textasciiacute{}', # PRIME + 0x2033: u'\\textacutedbl{}', # DOUBLE PRIME + 0x2035: u'\\textasciigrave{}', # REVERSED PRIME + 0x2036: u'\\textgravedbl{}', # REVERSED DOUBLE PRIME + 0x203b: u'\\textreferencemark{}', # REFERENCE MARK + 0x203d: u'\\textinterrobang{}', # INTERROBANG + 0x2044: u'\\textfractionsolidus{}', # FRACTION SLASH + 0x2045: u'\\textlquill{}', # LEFT SQUARE BRACKET WITH QUILL + 0x2046: u'\\textrquill{}', # RIGHT SQUARE BRACKET WITH QUILL + 0x2052: u'\\textdiscount{}', # COMMERCIAL MINUS SIGN + 0x20a1: u'\\textcolonmonetary{}', # COLON SIGN + 0x20a3: u'\\textfrenchfranc{}', # FRENCH FRANC SIGN + 0x20a4: u'\\textlira{}', # LIRA SIGN + 0x20a6: u'\\textnaira{}', # NAIRA SIGN + 0x20a9: u'\\textwon{}', # WON SIGN + 0x20ab: u'\\textdong{}', # DONG SIGN + 0x20ac: u'\\texteuro{}', # EURO SIGN + 0x20b1: u'\\textpeso{}', # PESO SIGN + 0x20b2: u'\\textguarani{}', # GUARANI SIGN + 0x2103: u'\\textcelsius{}', # DEGREE CELSIUS + 0x2116: u'\\textnumero{}', # NUMERO SIGN + 0x2117: u'\\textcircledP{}', # SOUND RECORDING COYRIGHT + 0x211e: u'\\textrecipe{}', # PRESCRIPTION TAKE + 0x2120: u'\\textservicemark{}', # SERVICE MARK + 0x2122: u'\\texttrademark{}', # TRADE MARK SIGN + 0x2126: u'\\textohm{}', # OHM SIGN + 0x2127: u'\\textmho{}', # INVERTED OHM SIGN + 0x212e: u'\\textestimated{}', # ESTIMATED SYMBOL + 0x2190: u'\\textleftarrow{}', # LEFTWARDS ARROW + 0x2191: u'\\textuparrow{}', # UPWARDS ARROW + 0x2192: u'\\textrightarrow{}', # RIGHTWARDS ARROW + 0x2193: u'\\textdownarrow{}', # DOWNWARDS ARROW + 0x2212: u'\\textminus{}', # MINUS SIGN + 0x2217: u'\\textasteriskcentered{}', # ASTERISK OPERATOR + 0x221a: u'\\textsurd{}', # SQUARE ROOT + 0x2422: u'\\textblank{}', # BLANK SYMBOL + 0x25e6: u'\\textopenbullet{}', # WHITE BULLET + 0x25ef: u'\\textbigcircle{}', # LARGE CIRCLE + 0x266a: u'\\textmusicalnote{}', # EIGHTH NOTE + 0x26ad: u'\\textmarried{}', # MARRIAGE SYMBOL + 0x26ae: u'\\textdivorced{}', # DIVORCE SYMBOL + 0x27e8: u'\\textlangle{}', # MATHEMATICAL LEFT ANGLE BRACKET + 0x27e9: u'\\textrangle{}', # MATHEMATICAL RIGHT ANGLE BRACKET } # Unicode chars that require a feature/package to render pifont = { - 0x2665: ur'\ding{170}', # black heartsuit - 0x2666: ur'\ding{169}', # black diamondsuit - 0x2713: ur'\ding{51}', # check mark - 0x2717: ur'\ding{55}', # check mark + 0x2665: u'\\ding{170}', # black heartsuit + 0x2666: u'\\ding{169}', # black diamondsuit + 0x2713: u'\\ding{51}', # check mark + 0x2717: u'\\ding{55}', # check mark } # TODO: greek alphabet ... ? # see also LaTeX codec @@ -1512,14 +1512,14 @@ class LaTeXTranslator(nodes.NodeVisitor): # the backslash doesn't work, so we use a mirrored slash. # \reflectbox is provided by graphicx: self.requirements['graphicx'] = self.graphicx_package - table[ord('\\')] = ur'\reflectbox{/}' + table[ord('\\')] = u'\\reflectbox{/}' # * ``< | >`` come out as different chars (except for cmtt): else: - table[ord('|')] = ur'\textbar{}' - table[ord('<')] = ur'\textless{}' - table[ord('>')] = ur'\textgreater{}' + table[ord('|')] = u'\\textbar{}' + table[ord('<')] = u'\\textless{}' + table[ord('>')] = u'\\textgreater{}' if self.insert_non_breaking_blanks: - table[ord(' ')] = ur'~' + table[ord(' ')] = u'~' # tab chars may occur in included files (literal or code) # quick-and-dirty replacement with spaces # (for better results use `--literal-block-env=lstlisting`) @@ -2619,7 +2619,7 @@ class LaTeXTranslator(nodes.NodeVisitor): math_code = '\n'.join([math_code] + self.ids_to_labels(node)) if math_env == '$': if self.alltt: - wrapper = ur'\(%s\)' + wrapper = u'\\(%s\\)' else: wrapper = u'$%s$' else: @@ -2770,9 +2770,9 @@ class LaTeXTranslator(nodes.NodeVisitor): def visit_reference(self, node): # We need to escape #, \, and % if we use the URL in a command. - special_chars = {ord('#'): ur'\#', - ord('%'): ur'\%', - ord('\\'): ur'\\', + special_chars = {ord('#'): u'\\#', + ord('%'): u'\\%', + ord('\\'): u'\\\\', } # external reference (URL) if 'refuri' in node: diff --git a/docutils/docutils/writers/manpage.py b/docutils/docutils/writers/manpage.py index 6fcaf1c6c..8531603a1 100644 --- a/docutils/docutils/writers/manpage.py +++ b/docutils/docutils/writers/manpage.py @@ -280,10 +280,10 @@ class Translator(nodes.NodeVisitor): text = node.astext() text = text.replace('\\','\\e') replace_pairs = [ - (u'-', ur'\-'), - (u'\'', ur'\(aq'), - (u'´', ur'\''), - (u'`', ur'\(ga'), + (u'-', u'\\-'), + (u'\'', u'\\(aq'), + (u'´', u"\\'"), + (u'`', u'\\(ga'), ] for (in_char, out_markup) in replace_pairs: text = text.replace(in_char, out_markup) diff --git a/docutils/test/test_parsers/test_rst/test_east_asian_text.py b/docutils/test/test_parsers/test_rst/test_east_asian_text.py index 94857227c..beb5c7d12 100755 --- a/docutils/test/test_parsers/test_rst/test_east_asian_text.py +++ b/docutils/test/test_parsers/test_rst/test_east_asian_text.py @@ -47,12 +47,12 @@ u"""\ タイトル2 ======== """], -[ur""" +[u""" +-----------------------+ | * ヒョウ:ダイ1ギョウ | | * ダイ2ギョウ | +-----------------------+ -| \* ダイ1ギョウ | +| \\* ダイ1ギョウ | | * ダイ2ギョウ | +-----------------------+ """, diff --git a/docutils/tools/dev/generate_punctuation_chars.py b/docutils/tools/dev/generate_punctuation_chars.py index 55349016d..7aeb65d94 100644 --- a/docutils/tools/dev/generate_punctuation_chars.py +++ b/docutils/tools/dev/generate_punctuation_chars.py @@ -376,10 +376,16 @@ if __name__ == '__main__': print_differences(openers, o, 'openers') if o_wide: - print('+ openers-wide = ur"""%s"""' % o_wide.encode('utf8')) + if sys.version_info < (3, 0): + print('+ openers-wide = ur"""%s"""' % o_wide.encode('utf8')) + else: + print('+ openers-wide = r"""%s"""' % o_wide.encode('utf8')) print_differences(closers, c, 'closers') if c_wide: - print('+ closers-wide = ur"""%s"""' % c_wide.encode('utf8')) + if sys.version_info < (3, 0): + print('+ closers-wide = ur"""%s"""' % c_wide.encode('utf8')) + else: + print('+ closers-wide = r"""%s"""' % c_wide.encode('utf8')) print_differences(delimiters, d + d_wide, 'delimiters') print_differences(closing_delimiters, cd, 'closing_delimiters') -- 2.21.0 |