From: <mi...@us...> - 2012-03-30 11:58:29
|
Revision: 7389 http://docutils.svn.sourceforge.net/docutils/?rev=7389&view=rev Author: milde Date: 2012-03-30 11:58:21 +0000 (Fri, 30 Mar 2012) Log Message: ----------- Avoid code duplication between xetex and latex2e writer (solves [ 3512728 ]). Modified Paths: -------------- trunk/docutils/HISTORY.txt trunk/docutils/docutils/writers/latex2e/__init__.py trunk/docutils/docutils/writers/xetex/__init__.py trunk/docutils/test/functional/expected/standalone_rst_xetex.tex Modified: trunk/docutils/HISTORY.txt =================================================================== --- trunk/docutils/HISTORY.txt 2012-03-30 07:11:39 UTC (rev 7388) +++ trunk/docutils/HISTORY.txt 2012-03-30 11:58:21 UTC (rev 7389) @@ -40,12 +40,12 @@ BinaryFileOutput. - New exceptions InputError and OutputError for IO errors in FileInput/FileOutput. - + * docutils/core.py: - No "hard" system exit on file IO errors: catch and report them in `Publisher.reportException` instead. Allows handling by a calling - application if the configuration setting `traceback` is True. + application if the configuration setting `traceback` is True. * docutils/utils.py -> docutils/utils/__init__.py @@ -84,6 +84,10 @@ - Use ``\DUtitle`` for unsupported section levels - Apply [ 3512791 ] do not compare string literals with "is" +* docutils/writers/xetex/__init__.py + + - Avoid code duplication with latex2e writer (solves [ 3512728 ]). + * docutils/writers/html4css1/__init__.py - Change default for `math-output` setting to MathJax. Modified: trunk/docutils/docutils/writers/latex2e/__init__.py =================================================================== --- trunk/docutils/docutils/writers/latex2e/__init__.py 2012-03-30 07:11:39 UTC (rev 7388) +++ trunk/docutils/docutils/writers/latex2e/__init__.py 2012-03-30 11:58:21 UTC (rev 7389) @@ -680,6 +680,161 @@ }""" +# LaTeX encoding maps +# ------------------- +# :: + +class CharMaps(object): + """LaTeX representations for active and Unicode characters.""" + + # characters that always need escaping: + special = { + ord('#'): ur'\#', + ord('$'): ur'\$', + ord('%'): ur'\%', + ord('&'): ur'\&', + ord('~'): ur'\textasciitilde{}', + ord('_'): ur'\_', + ord('^'): ur'\textasciicircum{}', + ord('\\'): ur'\textbackslash{}', + ord('{'): ur'\{', + ord('}'): ur'\}', + # Square brackets are ordinary chars and cannot be escaped with '\', + # so we put them in a group '{[}'. (Alternative: ensure that all + # macros with optional arguments are terminated with {} and text + # inside any optional argument is put in a group ``[{text}]``). + # Commands with optional args inside an optional arg must be put in a + # group, e.g. ``\item[{\hyperref[label]{text}}]``. + ord('['): ur'{[}', + ord(']'): ur'{]}', + # the soft hyphen is unknown in 8-bit text and not properly handled by XeTeX + 0x00AD: ur'\-', # SOFT HYPHEN + } + # Unicode chars that are not recognized by LaTeX's utf8 encoding + unsupported_unicode = { + 0x00A0: ur'~', # NO-BREAK SPACE + # TODO: ensure white space also at the beginning of a line? + # 0x00A0: ur'\leavevmode\nobreak\vadjust{}~' + 0x2008: ur'\,', # PUNCTUATION SPACE + 0x2011: ur'\hbox{-}', # NON-BREAKING HYPHEN + 0x202F: ur'\,', # NARROW NO-BREAK SPACE + 0x21d4: ur'$\Leftrightarrow$', + # Docutils footnote symbols: + 0x2660: ur'$\spadesuit$', + 0x2663: ur'$\clubsuit$', + } + # Unicode chars that are recognized by LaTeX's utf8 encoding + utf8_supported_unicode = { + 0x200C: ur'\textcompwordmark', # ZERO WIDTH NON-JOINER + 0x2013: ur'\textendash{}', + 0x2014: ur'\textemdash{}', + 0x2018: ur'\textquoteleft{}', + 0x2019: ur'\textquoteright{}', + 0x201A: ur'\quotesinglbase{}', # SINGLE LOW-9 QUOTATION MARK + 0x201C: ur'\textquotedblleft{}', + 0x201D: ur'\textquotedblright{}', + 0x201E: ur'\quotedblbase{}', # DOUBLE LOW-9 QUOTATION MARK + 0x2030: ur'\textperthousand{}', # PER MILLE SIGN + 0x2031: ur'\textpertenthousand{}', # PER TEN THOUSAND SIGN + 0x2039: ur'\guilsinglleft{}', + 0x203A: ur'\guilsinglright{}', + 0x2423: ur'\textvisiblespace{}', # OPEN BOX + 0x2020: ur'\dag{}', + 0x2021: ur'\ddag{}', + 0x2026: ur'\dots{}', + 0x2122: ur'\texttrademark{}', + } + # recognized with 'utf8', if textcomp is loaded + textcomp = { + # Latin-1 Supplement + 0x00a2: ur'\textcent{}', # ¢ CENT SIGN + 0x00a4: ur'\textcurrency{}', # ¤ CURRENCY SYMBOL + 0x00a5: ur'\textyen{}', # ¥ YEN SIGN + 0x00a6: ur'\textbrokenbar{}', # ¦ BROKEN BAR + 0x00a7: ur'\textsection{}', # § SECTION SIGN + 0x00a8: ur'\textasciidieresis{}', # ¨ DIAERESIS + 0x00a9: ur'\textcopyright{}', # © COPYRIGHT SIGN + 0x00aa: ur'\textordfeminine{}', # ª FEMININE ORDINAL INDICATOR + 0x00ac: ur'\textlnot{}', # ¬ NOT SIGN + 0x00ae: ur'\textregistered{}', # ® REGISTERED SIGN + 0x00af: ur'\textasciimacron{}', # ¯ MACRON + 0x00b0: ur'\textdegree{}', # ° DEGREE SIGN + 0x00b1: ur'\textpm{}', # ± PLUS-MINUS SIGN + 0x00b2: ur'\texttwosuperior{}', # ² SUPERSCRIPT TWO + 0x00b3: ur'\textthreesuperior{}', # ³ SUPERSCRIPT THREE + 0x00b4: ur'\textasciiacute{}', # ´ ACUTE ACCENT + 0x00b5: ur'\textmu{}', # µ MICRO SIGN + 0x00b6: ur'\textparagraph{}', # ¶ PILCROW SIGN # not equal to \textpilcrow + 0x00b9: ur'\textonesuperior{}', # ¹ SUPERSCRIPT ONE + 0x00ba: ur'\textordmasculine{}', # º MASCULINE ORDINAL INDICATOR + 0x00bc: ur'\textonequarter{}', # 1/4 FRACTION + 0x00bd: ur'\textonehalf{}', # 1/2 FRACTION + 0x00be: ur'\textthreequarters{}', # 3/4 FRACTION + 0x00d7: ur'\texttimes{}', # × MULTIPLICATION SIGN + 0x00f7: ur'\textdiv{}', # ÷ DIVISION SIGN + # + 0x0192: ur'\textflorin{}', # LATIN SMALL LETTER F WITH HOOK + 0x02b9: ur'\textasciiacute{}', # MODIFIER LETTER PRIME + 0x02ba: ur'\textacutedbl{}', # MODIFIER LETTER DOUBLE PRIME + 0x2016: ur'\textbardbl{}', # DOUBLE VERTICAL LINE + 0x2022: ur'\textbullet{}', # BULLET + 0x2032: ur'\textasciiacute{}', # PRIME + 0x2033: ur'\textacutedbl{}', # DOUBLE PRIME + 0x2035: ur'\textasciigrave{}', # REVERSED PRIME + 0x2036: ur'\textgravedbl{}', # REVERSED DOUBLE PRIME + 0x203b: ur'\textreferencemark{}', # REFERENCE MARK + 0x203d: ur'\textinterrobang{}', # INTERROBANG + 0x2044: ur'\textfractionsolidus{}', # FRACTION SLASH + 0x2045: ur'\textlquill{}', # LEFT SQUARE BRACKET WITH QUILL + 0x2046: ur'\textrquill{}', # RIGHT SQUARE BRACKET WITH QUILL + 0x2052: ur'\textdiscount{}', # COMMERCIAL MINUS SIGN + 0x20a1: ur'\textcolonmonetary{}', # COLON SIGN + 0x20a3: ur'\textfrenchfranc{}', # FRENCH FRANC SIGN + 0x20a4: ur'\textlira{}', # LIRA SIGN + 0x20a6: ur'\textnaira{}', # NAIRA SIGN + 0x20a9: ur'\textwon{}', # WON SIGN + 0x20ab: ur'\textdong{}', # DONG SIGN + 0x20ac: ur'\texteuro{}', # EURO SIGN + 0x20b1: ur'\textpeso{}', # PESO SIGN + 0x20b2: ur'\textguarani{}', # GUARANI SIGN + 0x2103: ur'\textcelsius{}', # DEGREE CELSIUS + 0x2116: ur'\textnumero{}', # NUMERO SIGN + 0x2117: ur'\textcircledP{}', # SOUND RECORDING COYRIGHT + 0x211e: ur'\textrecipe{}', # PRESCRIPTION TAKE + 0x2120: ur'\textservicemark{}', # SERVICE MARK + 0x2122: ur'\texttrademark{}', # TRADE MARK SIGN + 0x2126: ur'\textohm{}', # OHM SIGN + 0x2127: ur'\textmho{}', # INVERTED OHM SIGN + 0x212e: ur'\textestimated{}', # ESTIMATED SYMBOL + 0x2190: ur'\textleftarrow{}', # LEFTWARDS ARROW + 0x2191: ur'\textuparrow{}', # UPWARDS ARROW + 0x2192: ur'\textrightarrow{}', # RIGHTWARDS ARROW + 0x2193: ur'\textdownarrow{}', # DOWNWARDS ARROW + 0x2212: ur'\textminus{}', # MINUS SIGN + 0x2217: ur'\textasteriskcentered{}', # ASTERISK OPERATOR + 0x221a: ur'\textsurd{}', # SQUARE ROOT + 0x2422: ur'\textblank{}', # BLANK SYMBOL + 0x25e6: ur'\textopenbullet{}', # WHITE BULLET + 0x25ef: ur'\textbigcircle{}', # LARGE CIRCLE + 0x266a: ur'\textmusicalnote{}', # EIGHTH NOTE + 0x26ad: ur'\textmarried{}', # MARRIAGE SYMBOL + 0x26ae: ur'\textdivorced{}', # DIVORCE SYMBOL + 0x27e8: ur'\textlangle{}', # MATHEMATICAL LEFT ANGLE BRACKET + 0x27e9: ur'\textrangle{}', # MATHEMATICAL RIGHT ANGLE BRACKET + } + # Unicode chars that require a feature/package to render + pifont = { + 0x2665: ur'\ding{170}', # black heartsuit + 0x2666: ur'\ding{169}', # black diamondsuit + 0x2713: ur'\ding{51}', # check mark + 0x2717: ur'\ding{55}', # check mark + } + # TODO: greek alphabet ... ? + # see also LaTeX codec + # http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/252124 + # and unimap.py from TeXML + + class DocumentClass(object): """Details of a LaTeX document class.""" @@ -923,6 +1078,10 @@ # to other packages, as done with babel. # Dummy settings might be taken from document settings + # Write code for typesetting with 8-bit tex/pdftex (vs. xetex/luatex) engine + # overwritten by the XeTeX writer + is_xetex = False + # Config setting defaults # ----------------------- @@ -1097,7 +1256,7 @@ self.requirements['_inputenc'] = (r'\usepackage[%s]{inputenc}' % self.latex_encoding) # TeX font encoding - if self.font_encoding: + if self.font_encoding and not self.is_xetex: self.requirements['_fontenc'] = (r'\usepackage[%s]{fontenc}' % self.font_encoding) # page layout with typearea (if there are relevant document options) @@ -1239,7 +1398,7 @@ encoding = docutils_encoding.lower() if encoding in tr: return tr[encoding] - # convert: latin-1, latin_1, utf-8 and similar things + # drop hyphen or low-line from "latin-1", "latin_1", "utf-8" and similar encoding = encoding.replace('_', '').replace('-', '') # strip the error handler return encoding.split(':')[0] @@ -1247,188 +1406,24 @@ def language_label(self, docutil_label): return self.language_module.labels[docutil_label] - def ensure_math(self, text): - if not hasattr(self, 'ensure_math_re'): - chars = { # lnot,pm,twosuperior,threesuperior,mu,onesuperior,times,div - 'latin1' : '\xac\xb1\xb2\xb3\xb5\xb9\xd7\xf7' , # ¬±²³µ¹×÷ - # TODO?: use texcomp instead. - } - self.ensure_math_re = re.compile('([%s])' % chars['latin1']) - text = self.ensure_math_re.sub(r'\\ensuremath{\1}', text) - return text - def encode(self, text): """Return text with 'problematic' characters escaped. - Escape the ten special printing characters ``# $ % & ~ _ ^ \ { }``, - square brackets ``[ ]``, double quotes and (in OT1) ``< | >``. - - Separate ``-`` (and more in literal text) to prevent input ligatures. - - Translate non-supported Unicode characters. + * Escape the ten special printing characters ``# $ % & ~ _ ^ \ { }``, + square brackets ``[ ]``, double quotes and (in OT1) ``< | >``. + * Translate non-supported Unicode characters. + * Separate ``-`` (and more in literal text) to prevent input ligatures. """ if self.verbatim: return text - # Separate compound characters, e.g. '--' to '-{}-'. - separate_chars = '-' - # In monospace-font, we also separate ',,', '``' and "''" and some - # other characters which can't occur in non-literal text. - if self.literal: - separate_chars += ',`\'"<>' - # LaTeX encoding maps: - special_chars = { - ord('#'): ur'\#', - ord('$'): ur'\$', - ord('%'): ur'\%', - ord('&'): ur'\&', - ord('~'): ur'\textasciitilde{}', - ord('_'): ur'\_', - ord('^'): ur'\textasciicircum{}', - ord('\\'): ur'\textbackslash{}', - ord('{'): ur'\{', - ord('}'): ur'\}', - # Square brackets are ordinary chars and cannot be escaped with '\', - # so we put them in a group '{[}'. (Alternative: ensure that all - # macros with optional arguments are terminated with {} and text - # inside any optional argument is put in a group ``[{text}]``). - # Commands with optional args inside an optional arg must be put - # in a group, e.g. ``\item[{\hyperref[label]{text}}]``. - ord('['): ur'{[}', - ord(']'): ur'{]}' - } - # Unicode chars that are not recognized by LaTeX's utf8 encoding - unsupported_unicode_chars = { - 0x00A0: ur'~', # NO-BREAK SPACE - # TODO: ensure white space also at the beginning of a line? - # 0x00A0: ur'\leavevmode\nobreak\vadjust{}~' - 0x00AD: ur'\-', # SOFT HYPHEN - # - 0x2008: ur'\,', # PUNCTUATION SPACE - 0x2011: ur'\hbox{-}', # NON-BREAKING HYPHEN - 0x202F: ur'\,', # NARROW NO-BREAK SPACE - 0x21d4: ur'$\Leftrightarrow$', - # Docutils footnote symbols: - 0x2660: ur'$\spadesuit$', - 0x2663: ur'$\clubsuit$', - } - # Unicode chars that are recognized by LaTeX's utf8 encoding - unicode_chars = { - 0x200C: ur'\textcompwordmark', # ZERO WIDTH NON-JOINER - 0x2013: ur'\textendash{}', - 0x2014: ur'\textemdash{}', - 0x2018: ur'\textquoteleft{}', - 0x2019: ur'\textquoteright{}', - 0x201A: ur'\quotesinglbase{}', # SINGLE LOW-9 QUOTATION MARK - 0x201C: ur'\textquotedblleft{}', - 0x201D: ur'\textquotedblright{}', - 0x201E: ur'\quotedblbase{}', # DOUBLE LOW-9 QUOTATION MARK - 0x2030: ur'\textperthousand{}', # PER MILLE SIGN - 0x2031: ur'\textpertenthousand{}', # PER TEN THOUSAND SIGN - 0x2039: ur'\guilsinglleft{}', - 0x203A: ur'\guilsinglright{}', - 0x2423: ur'\textvisiblespace{}', # OPEN BOX - 0x2020: ur'\dag{}', - 0x2021: ur'\ddag{}', - 0x2026: ur'\dots{}', - 0x2122: ur'\texttrademark{}', - } - # Unicode chars that require a feature/package to render - pifont_chars = { - 0x2665: ur'\ding{170}', # black heartsuit - 0x2666: ur'\ding{169}', # black diamondsuit - 0x2713: ur'\ding{51}', # check mark - 0x2717: ur'\ding{55}', # check mark - } - # recognized with 'utf8', if textcomp is loaded - textcomp_chars = { - # Latin-1 Supplement - 0x00a2: ur'\textcent{}', # ¢ CENT SIGN - 0x00a4: ur'\textcurrency{}', # ¤ CURRENCY SYMBOL - 0x00a5: ur'\textyen{}', # ¥ YEN SIGN - 0x00a6: ur'\textbrokenbar{}', # ¦ BROKEN BAR - 0x00a7: ur'\textsection{}', # § SECTION SIGN - 0x00a8: ur'\textasciidieresis{}', # ¨ DIAERESIS - 0x00a9: ur'\textcopyright{}', # © COPYRIGHT SIGN - 0x00aa: ur'\textordfeminine{}', # ª FEMININE ORDINAL INDICATOR - 0x00ac: ur'\textlnot{}', # ¬ NOT SIGN - 0x00ae: ur'\textregistered{}', # ® REGISTERED SIGN - 0x00af: ur'\textasciimacron{}', # ¯ MACRON - 0x00b0: ur'\textdegree{}', # ° DEGREE SIGN - 0x00b1: ur'\textpm{}', # ± PLUS-MINUS SIGN - 0x00b2: ur'\texttwosuperior{}', # ² SUPERSCRIPT TWO - 0x00b3: ur'\textthreesuperior{}', # ³ SUPERSCRIPT THREE - 0x00b4: ur'\textasciiacute{}', # ´ ACUTE ACCENT - 0x00b5: ur'\textmu{}', # µ MICRO SIGN - 0x00b6: ur'\textparagraph{}', # ¶ PILCROW SIGN # not equal to \textpilcrow - 0x00b9: ur'\textonesuperior{}', # ¹ SUPERSCRIPT ONE - 0x00ba: ur'\textordmasculine{}', # º MASCULINE ORDINAL INDICATOR - 0x00bc: ur'\textonequarter{}', # 1/4 FRACTION - 0x00bd: ur'\textonehalf{}', # 1/2 FRACTION - 0x00be: ur'\textthreequarters{}', # 3/4 FRACTION - 0x00d7: ur'\texttimes{}', # × MULTIPLICATION SIGN - 0x00f7: ur'\textdiv{}', # ÷ DIVISION SIGN - # - 0x0192: ur'\textflorin{}', # LATIN SMALL LETTER F WITH HOOK - 0x02b9: ur'\textasciiacute{}', # MODIFIER LETTER PRIME - 0x02ba: ur'\textacutedbl{}', # MODIFIER LETTER DOUBLE PRIME - 0x2016: ur'\textbardbl{}', # DOUBLE VERTICAL LINE - 0x2022: ur'\textbullet{}', # BULLET - 0x2032: ur'\textasciiacute{}', # PRIME - 0x2033: ur'\textacutedbl{}', # DOUBLE PRIME - 0x2035: ur'\textasciigrave{}', # REVERSED PRIME - 0x2036: ur'\textgravedbl{}', # REVERSED DOUBLE PRIME - 0x203b: ur'\textreferencemark{}', # REFERENCE MARK - 0x203d: ur'\textinterrobang{}', # INTERROBANG - 0x2044: ur'\textfractionsolidus{}', # FRACTION SLASH - 0x2045: ur'\textlquill{}', # LEFT SQUARE BRACKET WITH QUILL - 0x2046: ur'\textrquill{}', # RIGHT SQUARE BRACKET WITH QUILL - 0x2052: ur'\textdiscount{}', # COMMERCIAL MINUS SIGN - 0x20a1: ur'\textcolonmonetary{}', # COLON SIGN - 0x20a3: ur'\textfrenchfranc{}', # FRENCH FRANC SIGN - 0x20a4: ur'\textlira{}', # LIRA SIGN - 0x20a6: ur'\textnaira{}', # NAIRA SIGN - 0x20a9: ur'\textwon{}', # WON SIGN - 0x20ab: ur'\textdong{}', # DONG SIGN - 0x20ac: ur'\texteuro{}', # EURO SIGN - 0x20b1: ur'\textpeso{}', # PESO SIGN - 0x20b2: ur'\textguarani{}', # GUARANI SIGN - 0x2103: ur'\textcelsius{}', # DEGREE CELSIUS - 0x2116: ur'\textnumero{}', # NUMERO SIGN - 0x2117: ur'\textcircledP{}', # SOUND RECORDING COYRIGHT - 0x211e: ur'\textrecipe{}', # PRESCRIPTION TAKE - 0x2120: ur'\textservicemark{}', # SERVICE MARK - 0x2122: ur'\texttrademark{}', # TRADE MARK SIGN - 0x2126: ur'\textohm{}', # OHM SIGN - 0x2127: ur'\textmho{}', # INVERTED OHM SIGN - 0x212e: ur'\textestimated{}', # ESTIMATED SYMBOL - 0x2190: ur'\textleftarrow{}', # LEFTWARDS ARROW - 0x2191: ur'\textuparrow{}', # UPWARDS ARROW - 0x2192: ur'\textrightarrow{}', # RIGHTWARDS ARROW - 0x2193: ur'\textdownarrow{}', # DOWNWARDS ARROW - 0x2212: ur'\textminus{}', # MINUS SIGN - 0x2217: ur'\textasteriskcentered{}', # ASTERISK OPERATOR - 0x221a: ur'\textsurd{}', # SQUARE ROOT - 0x2422: ur'\textblank{}', # BLANK SYMBOL - 0x25e6: ur'\textopenbullet{}', # WHITE BULLET - 0x25ef: ur'\textbigcircle{}', # LARGE CIRCLE - 0x266a: ur'\textmusicalnote{}', # EIGHTH NOTE - 0x26ad: ur'\textmarried{}', # MARRIAGE SYMBOL - 0x26ae: ur'\textdivorced{}', # DIVORCE SYMBOL - 0x27e8: ur'\textlangle{}', # MATHEMATICAL LEFT ANGLE BRACKET - 0x27e9: ur'\textrangle{}', # MATHEMATICAL RIGHT ANGLE BRACKET - } - # TODO: greek alphabet ... ? - # see also LaTeX codec - # http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/252124 - # and unimap.py from TeXML - # set up the translation table: - table = special_chars + # Set up the translation table: + table = CharMaps.special.copy() # keep the underscore in citation references if self.inside_citation_reference_label: del(table[ord('_')]) # Workarounds for OT1 font-encoding - if self.font_encoding in ['OT1', '']: + if self.font_encoding in ['OT1', ''] and not self.is_xetex: # * out-of-order characters in cmtt if self.literal: # replace underscore by underlined blank, @@ -1449,41 +1444,47 @@ # double quotes are 'active' in some languages # TODO: use \textquotedbl if font encoding starts with T? table[ord('"')] = self.babel.literal_double_quote - # Unicode chars: - table.update(unsupported_unicode_chars) - table.update(pifont_chars) - if not self.latex_encoding.startswith('utf8'): - table.update(unicode_chars) - table.update(textcomp_chars) - # Characters that require a feature/package to render - for ch in text: - if ord(ch) in pifont_chars: - self.requirements['pifont'] = '\\usepackage{pifont}' - if ord(ch) in textcomp_chars: + # Unicode replacements for 8-bit tex engines (not required with XeTeX/LuaTeX): + if not self.is_xetex: + table.update(CharMaps.unsupported_unicode) + if not self.latex_encoding.startswith('utf8'): + table.update(CharMaps.utf8_supported_unicode) + table.update(CharMaps.textcomp) + table.update(CharMaps.pifont) + # Characters that require a feature/package to render + if [True for ch in text if ord(ch) in CharMaps.textcomp]: self.requirements['textcomp'] = PreambleCmds.textcomp + if [True for ch in text if ord(ch) in CharMaps.pifont]: + self.requirements['pifont'] = '\\usepackage{pifont}' text = text.translate(table) - # Break up input ligatures - for char in separate_chars * 2: - # Do it twice ("* 2") because otherwise we would replace - # '---' by '-{}--'. - text = text.replace(char + char, char + '{}' + char) + # Break up input ligatures e.g. '--' to '-{}-'. + if not self.is_xetex: # Not required with xetex/luatex + separate_chars = '-' + # In monospace-font, we also separate ',,', '``' and "''" and some + # other characters which can't occur in non-literal text. + if self.literal: + separate_chars += ',`\'"<>' + for char in separate_chars * 2: + # Do it twice ("* 2") because otherwise we would replace + # '---' by '-{}--'. + text = text.replace(char + char, char + '{}' + char) + # Literal line breaks (in address or literal blocks): if self.insert_newline: lines = text.split('\n') - # for blank lines, insert a protected space, to avoid - # ! LaTeX Error: There's no line here to end. - lines = [line + '~'*(not line.lstrip()) - for line in lines[:-1]] + lines[-1:] - text = '\\\\\n'.join(lines) + # Add a protected space to blank lines (except the last) + # to avoid ``! LaTeX Error: There's no line here to end.`` + for i, line in enumerate(lines[:-1]): + if not line.lstrip(): + lines[i] += '~' + text = (r'\\' + '\n').join(lines) if not self.literal: text = self.babel.quote_quotes(text) if self.literal and not self.insert_non_breaking_blanks: # preserve runs of spaces but allow wrapping text = text.replace(' ', ' ~') - if not self.latex_encoding.startswith('utf8'): - text = self.ensure_math(text) return text def attval(self, text, @@ -2218,9 +2219,12 @@ self.requirements['~header'] = ''.join(self.out) self.pop_output_collector() - def to_latex_length(self, length_str, pxunit='px'): + def to_latex_length(self, length_str, pxunit=None): """Convert `length_str` with rst lenght to LaTeX length """ + if pxunit is not None: + sys.stderr.write('deprecation warning: LaTeXTranslator.to_latex_length()' + ' option `pxunit` will be removed.') match = re.match('(\d*\.?\d*)\s*(\S*)', length_str) if not match: return length_str @@ -2231,12 +2235,13 @@ # percentage: relate to current line width elif unit == '%': length_str = '%.3f\\linewidth' % (float(value)/100.0) - elif (unit == 'px') and (pxunit != 'px'): - # length unit px not defined in some tex variants (e.g. XeTeX) + elif self.is_xetex and unit == 'px': + # XeTeX does not know the length unit px. + # Use \pdfpxdimen, the macro to set the value of 1 px in pdftex. + # This way, configuring works the same for pdftex and xetex. self.fallbacks['_providelength'] = PreambleCmds.providelength - self.fallbacks['px'] = '\n\\DUprovidelength{%s}{1bp}\n' % pxunit - length_str = '%s%s' % (value, pxunit) - + self.fallbacks['px'] = '\n\\DUprovidelength{\\pdfpxdimen}{1bp}\n' + length_str = r'%s\pdfpxdimen' % value return length_str def visit_image(self, node): Modified: trunk/docutils/docutils/writers/xetex/__init__.py =================================================================== --- trunk/docutils/docutils/writers/xetex/__init__.py 2012-03-30 07:11:39 UTC (rev 7388) +++ trunk/docutils/docutils/writers/xetex/__init__.py 2012-03-30 11:58:21 UTC (rev 7389) @@ -61,7 +61,7 @@ def __init__(self): latex2e.Writer.__init__(self) - self.settings_defaults.update({'fontencoding': ''}) # use default (EU1) + self.settings_defaults.update({'fontencoding': ''}) # use default (EU1 or EU2) self.translator_class = XeLaTeXTranslator @@ -125,79 +125,10 @@ class XeLaTeXTranslator(latex2e.LaTeXTranslator): def __init__(self, document): + self.is_xetex = True # typeset with XeTeX or LuaTeX engine latex2e.LaTeXTranslator.__init__(self, document, Babel) if self.latex_encoding == 'utf8': self.requirements.pop('_inputenc', None) else: self.requirements['_inputenc'] = (r'\XeTeXinputencoding %s ' % self.latex_encoding) - - # XeTeX does not know the length unit px. - # Use \pdfpxdimen, the macro to set the value of 1 px in pdftex. - # This way, configuring works the same for pdftex and xetex. - def to_latex_length(self, length_str, px=r'\pdfpxdimen'): - """Convert string with rst lenght to LaTeX length""" - return latex2e.LaTeXTranslator.to_latex_length(self, length_str, px) - - # Simpler variant of encode, as XeTeX understands utf8 Unicode: - def encode(self, text): - """Return text with 'problematic' characters escaped. - - Escape the ten special printing characters ``# $ % & ~ _ ^ \ { }``, - square brackets ``[ ]``, double quotes and (in OT1) ``< | >``. - """ - if self.verbatim: - return text - # LaTeX encoding maps: - special_chars = { - ord('#'): ur'\#', - ord('$'): ur'\$', - ord('%'): ur'\%', - ord('&'): ur'\&', - ord('~'): ur'\textasciitilde{}', - ord('_'): ur'\_', - ord('^'): ur'\textasciicircum{}', - ord('\\'): ur'\textbackslash{}', - ord('{'): ur'\{', - ord('}'): ur'\}', - # Square brackets are ordinary chars and cannot be escaped with '\', - # so we put them in a group '{[}'. (Alternative: ensure that all - # macros with optional arguments are terminated with {} and text - # inside any optional argument is put in a group ``[{text}]``). - # Commands with optional args inside an optional arg must be put - # in a group, e.g. ``\item[{\hyperref[label]{text}}]``. - ord('['): ur'{[}', - ord(']'): ur'{]}' - } - # Unicode chars that are not properly handled by XeTeX - unsupported_unicode_chars = { - 0x00AD: ur'\-', # SOFT HYPHEN - } - # set up the translation table: - table = special_chars - # keep the underscore in citation references - if self.inside_citation_reference_label: - del(table[ord('_')]) - if self.insert_non_breaking_blanks: - table[ord(' ')] = ur'~' - if self.literal: - # double quotes are 'active' in some languages - table[ord('"')] = self.babel.literal_double_quote - else: - text = self.babel.quote_quotes(text) - # Unicode chars: - table.update(unsupported_unicode_chars) - - text = text.translate(table) - - # Literal line breaks (in address or literal blocks): - if self.insert_newline: - # for blank lines, insert a protected space, to avoid - # ! LaTeX Error: There's no line here to end. - textlines = [line + '~'*(not line.lstrip()) - for line in text.split('\n')] - text = '\\\\\n'.join(textlines) - if self.literal and not self.insert_non_breaking_blanks: - # preserve runs of spaces but allow wrapping - text = text.replace(' ', ' ~') - return text Modified: trunk/docutils/test/functional/expected/standalone_rst_xetex.tex =================================================================== --- trunk/docutils/test/functional/expected/standalone_rst_xetex.tex 2012-03-30 07:11:39 UTC (rev 7388) +++ trunk/docutils/test/functional/expected/standalone_rst_xetex.tex 2012-03-30 11:58:21 UTC (rev 7389) @@ -1269,8 +1269,8 @@ % \begin{quote}{\ttfamily \raggedright \noindent \DUrole{ln}{~8~}\#~print~integers~from~0~to~9:\\ -~\DUrole{ln}{~9~}for~i~in~range(10):\\ -~\DUrole{ln}{10~}~~~~print~i +\DUrole{ln}{~9~}for~i~in~range(10):\\ +\DUrole{ln}{10~}~~~~print~i } \end{quote} @@ -1292,7 +1292,7 @@ % \begin{quote}{\ttfamily \raggedright \noindent \DUrole{ln}{1~}..~header::~Document~header\\ -~\DUrole{ln}{2~}..~footer::~Document~footer +\DUrole{ln}{2~}..~footer::~Document~footer } \end{quote} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |