[Docutils-checkins] SF.net SVN: docutils:[7389] trunk/docutils

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Revision: 7389
          http://docutils.svn.sourceforge.net/docutils/?rev=7389&view=rev
Author:   milde
Date:     2012-03-30 11:58:21 +0000 (Fri, 30 Mar 2012)
Log Message:
-----------
Avoid code duplication between xetex and latex2e writer (solves [ 3512728 ]).

Modified Paths:
--------------
    trunk/docutils/HISTORY.txt
    trunk/docutils/docutils/writers/latex2e/__init__.py
    trunk/docutils/docutils/writers/xetex/__init__.py
    trunk/docutils/test/functional/expected/standalone_rst_xetex.tex

Modified: trunk/docutils/HISTORY.txt
===================================================================

--- trunk/docutils/HISTORY.txt	2012-03-30 07:11:39 UTC (rev 7388)
+++ trunk/docutils/HISTORY.txt	2012-03-30 11:58:21 UTC (rev 7389)
@@ -40,12 +40,12 @@
     BinaryFileOutput.
   - New exceptions InputError and OutputError for IO errors in
     FileInput/FileOutput.
-    
+
 * docutils/core.py:
 
   - No "hard" system exit on file IO errors: catch and report them in
     `Publisher.reportException` instead. Allows handling by a calling
-    application if the configuration setting `traceback` is True. 
+    application if the configuration setting `traceback` is True.
 
 * docutils/utils.py -> docutils/utils/__init__.py
 
@@ -84,6 +84,10 @@
   - Use ``\DUtitle`` for unsupported section levels
   - Apply [ 3512791 ] do not compare string literals with "is"
 
+* docutils/writers/xetex/__init__.py
+
+  - Avoid code duplication with latex2e writer (solves [ 3512728 ]).
+
 * docutils/writers/html4css1/__init__.py
 
   - Change default for `math-output` setting to MathJax.

Modified: trunk/docutils/docutils/writers/latex2e/__init__.py
===================================================================
--- trunk/docutils/docutils/writers/latex2e/__init__.py	2012-03-30 07:11:39 UTC (rev 7388)
+++ trunk/docutils/docutils/writers/latex2e/__init__.py	2012-03-30 11:58:21 UTC (rev 7389)
@@ -680,6 +680,161 @@
 }"""
 
 
+# LaTeX encoding maps
+# -------------------
+# ::
+
+class CharMaps(object):
+    """LaTeX representations for active and Unicode characters."""
+
+    # characters that always need escaping:
+    special = {
+        ord('#'): ur'\#',
+        ord('$'): ur'\$',
+        ord('%'): ur'\%',
+        ord('&'): ur'\&',
+        ord('~'): ur'\textasciitilde{}',
+        ord('_'): ur'\_',
+        ord('^'): ur'\textasciicircum{}',
+        ord('\\'): ur'\textbackslash{}',
+        ord('{'): ur'\{',
+        ord('}'): ur'\}',
+        # Square brackets are ordinary chars and cannot be escaped with '\',
+        # so we put them in a group '{[}'. (Alternative: ensure that all
+        # macros with optional arguments are terminated with {} and text
+        # inside any optional argument is put in a group ``[{text}]``).
+        # Commands with optional args inside an optional arg must be put in a
+        # group, e.g. ``\item[{\hyperref[label]{text}}]``.
+        ord('['): ur'{[}',
+        ord(']'): ur'{]}',
+        # the soft hyphen is unknown in 8-bit text and not properly handled by XeTeX
+        0x00AD: ur'\-', # SOFT HYPHEN
+    }
+    # Unicode chars that are not recognized by LaTeX's utf8 encoding
+    unsupported_unicode = {
+        0x00A0: ur'~', # NO-BREAK SPACE
+        # TODO: ensure white space also at the beginning of a line?
+        # 0x00A0: ur'\leavevmode\nobreak\vadjust{}~'
+        0x2008: ur'\,', # PUNCTUATION SPACE   
+        0x2011: ur'\hbox{-}', # NON-BREAKING HYPHEN
+        0x202F: ur'\,', # NARROW NO-BREAK SPACE
+        0x21d4: ur'$\Leftrightarrow$',
+        # Docutils footnote symbols:
+        0x2660: ur'$\spadesuit$',
+        0x2663: ur'$\clubsuit$',
+    }
+    # Unicode chars that are recognized by LaTeX's utf8 encoding
+    utf8_supported_unicode = {
+        0x200C: ur'\textcompwordmark', # ZERO WIDTH NON-JOINER
+        0x2013: ur'\textendash{}',
+        0x2014: ur'\textemdash{}',
+        0x2018: ur'\textquoteleft{}',
+        0x2019: ur'\textquoteright{}',
+        0x201A: ur'\quotesinglbase{}', # SINGLE LOW-9 QUOTATION MARK
+        0x201C: ur'\textquotedblleft{}',
+        0x201D: ur'\textquotedblright{}',
+        0x201E: ur'\quotedblbase{}', # DOUBLE LOW-9 QUOTATION MARK
+        0x2030: ur'\textperthousand{}',   # PER MILLE SIGN
+        0x2031: ur'\textpertenthousand{}', # PER TEN THOUSAND SIGN
+        0x2039: ur'\guilsinglleft{}',
+        0x203A: ur'\guilsinglright{}',
+        0x2423: ur'\textvisiblespace{}',  # OPEN BOX
+        0x2020: ur'\dag{}',
+        0x2021: ur'\ddag{}',
+        0x2026: ur'\dots{}',
+        0x2122: ur'\texttrademark{}',
+    }
+    # recognized with 'utf8', if textcomp is loaded
+    textcomp = {
+        # Latin-1 Supplement
+        0x00a2: ur'\textcent{}',          # ¢ CENT SIGN
+        0x00a4: ur'\textcurrency{}',      # ¤ CURRENCY SYMBOL
+        0x00a5: ur'\textyen{}',           # ¥ YEN SIGN
+        0x00a6: ur'\textbrokenbar{}',     # ¦ BROKEN BAR
+        0x00a7: ur'\textsection{}',       # § SECTION SIGN
+        0x00a8: ur'\textasciidieresis{}', # ¨ DIAERESIS
+        0x00a9: ur'\textcopyright{}',     # © COPYRIGHT SIGN
+        0x00aa: ur'\textordfeminine{}',   # ª FEMININE ORDINAL INDICATOR
+        0x00ac: ur'\textlnot{}',          # ¬ NOT SIGN
+        0x00ae: ur'\textregistered{}',    # ® REGISTERED SIGN
+        0x00af: ur'\textasciimacron{}',   # ¯ MACRON
+        0x00b0: ur'\textdegree{}',        # ° DEGREE SIGN
+        0x00b1: ur'\textpm{}',            # ± PLUS-MINUS SIGN
+        0x00b2: ur'\texttwosuperior{}',   # ² SUPERSCRIPT TWO
+        0x00b3: ur'\textthreesuperior{}', # ³ SUPERSCRIPT THREE
+        0x00b4: ur'\textasciiacute{}',    # ´ ACUTE ACCENT
+        0x00b5: ur'\textmu{}',            # µ MICRO SIGN
+        0x00b6: ur'\textparagraph{}',     # ¶ PILCROW SIGN # not equal to \textpilcrow
+        0x00b9: ur'\textonesuperior{}',   # ¹ SUPERSCRIPT ONE
+        0x00ba: ur'\textordmasculine{}',  # º MASCULINE ORDINAL INDICATOR
+        0x00bc: ur'\textonequarter{}',    # 1/4 FRACTION
+        0x00bd: ur'\textonehalf{}',       # 1/2 FRACTION
+        0x00be: ur'\textthreequarters{}', # 3/4 FRACTION
+        0x00d7: ur'\texttimes{}',         # × MULTIPLICATION SIGN
+        0x00f7: ur'\textdiv{}',           # ÷ DIVISION SIGN
+        #
+        0x0192: ur'\textflorin{}',        # LATIN SMALL LETTER F WITH HOOK
+        0x02b9: ur'\textasciiacute{}',    # MODIFIER LETTER PRIME
+        0x02ba: ur'\textacutedbl{}',      # MODIFIER LETTER DOUBLE PRIME
+        0x2016: ur'\textbardbl{}',        # DOUBLE VERTICAL LINE
+        0x2022: ur'\textbullet{}',        # BULLET
+        0x2032: ur'\textasciiacute{}',    # PRIME
+        0x2033: ur'\textacutedbl{}',      # DOUBLE PRIME
+        0x2035: ur'\textasciigrave{}',    # REVERSED PRIME
+        0x2036: ur'\textgravedbl{}',      # REVERSED DOUBLE PRIME
+        0x203b: ur'\textreferencemark{}', # REFERENCE MARK
+        0x203d: ur'\textinterrobang{}',   # INTERROBANG
+        0x2044: ur'\textfractionsolidus{}', # FRACTION SLASH
+        0x2045: ur'\textlquill{}',        # LEFT SQUARE BRACKET WITH QUILL
+        0x2046: ur'\textrquill{}',        # RIGHT SQUARE BRACKET WITH QUILL
+        0x2052: ur'\textdiscount{}',      # COMMERCIAL MINUS SIGN
+        0x20a1: ur'\textcolonmonetary{}', # COLON SIGN
+        0x20a3: ur'\textfrenchfranc{}',   # FRENCH FRANC SIGN
+        0x20a4: ur'\textlira{}',          # LIRA SIGN
+        0x20a6: ur'\textnaira{}',         # NAIRA SIGN
+        0x20a9: ur'\textwon{}',           # WON SIGN
+        0x20ab: ur'\textdong{}',          # DONG SIGN
+        0x20ac: ur'\texteuro{}',          # EURO SIGN
+        0x20b1: ur'\textpeso{}',          # PESO SIGN
+        0x20b2: ur'\textguarani{}',       # GUARANI SIGN
+        0x2103: ur'\textcelsius{}',       # DEGREE CELSIUS
+        0x2116: ur'\textnumero{}',        # NUMERO SIGN
+        0x2117: ur'\textcircledP{}',      # SOUND RECORDING COYRIGHT
+        0x211e: ur'\textrecipe{}',        # PRESCRIPTION TAKE
+        0x2120: ur'\textservicemark{}',   # SERVICE MARK
+        0x2122: ur'\texttrademark{}',     # TRADE MARK SIGN
+        0x2126: ur'\textohm{}',           # OHM SIGN
+        0x2127: ur'\textmho{}',           # INVERTED OHM SIGN
+        0x212e: ur'\textestimated{}',     # ESTIMATED SYMBOL
+        0x2190: ur'\textleftarrow{}',     # LEFTWARDS ARROW
+        0x2191: ur'\textuparrow{}',       # UPWARDS ARROW
+        0x2192: ur'\textrightarrow{}',    # RIGHTWARDS ARROW
+        0x2193: ur'\textdownarrow{}',     # DOWNWARDS ARROW
+        0x2212: ur'\textminus{}',         # MINUS SIGN
+        0x2217: ur'\textasteriskcentered{}', # ASTERISK OPERATOR
+        0x221a: ur'\textsurd{}',          # SQUARE ROOT
+        0x2422: ur'\textblank{}',         # BLANK SYMBOL
+        0x25e6: ur'\textopenbullet{}',    # WHITE BULLET
+        0x25ef: ur'\textbigcircle{}',     # LARGE CIRCLE
+        0x266a: ur'\textmusicalnote{}',   # EIGHTH NOTE
+        0x26ad: ur'\textmarried{}',       # MARRIAGE SYMBOL
+        0x26ae: ur'\textdivorced{}',      # DIVORCE SYMBOL
+        0x27e8: ur'\textlangle{}',        # MATHEMATICAL LEFT ANGLE BRACKET
+        0x27e9: ur'\textrangle{}',        # MATHEMATICAL RIGHT ANGLE BRACKET
+    }
+    # Unicode chars that require a feature/package to render
+    pifont = {
+        0x2665: ur'\ding{170}',     # black heartsuit
+        0x2666: ur'\ding{169}',     # black diamondsuit
+        0x2713: ur'\ding{51}',      # check mark
+        0x2717: ur'\ding{55}',      # check mark
+    }
+    # TODO: greek alphabet ... ?
+    # see also LaTeX codec
+    # http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/252124
+    # and unimap.py from TeXML
+
+
 class DocumentClass(object):
     """Details of a LaTeX document class."""
 
@@ -923,6 +1078,10 @@
     # to other packages, as done with babel.
     # Dummy settings might be taken from document settings
 
+    # Write code for typesetting with 8-bit tex/pdftex (vs. xetex/luatex) engine
+    # overwritten by the XeTeX writer
+    is_xetex = False
+
     # Config setting defaults
     # -----------------------
 
@@ -1097,7 +1256,7 @@
             self.requirements['_inputenc'] = (r'\usepackage[%s]{inputenc}'
                                               % self.latex_encoding)
         # TeX font encoding
-        if self.font_encoding:
+        if self.font_encoding and not self.is_xetex:
             self.requirements['_fontenc'] = (r'\usepackage[%s]{fontenc}' %
                                              self.font_encoding)
         # page layout with typearea (if there are relevant document options)
@@ -1239,7 +1398,7 @@
         encoding = docutils_encoding.lower()
         if encoding in tr:
             return tr[encoding]
-        # convert: latin-1, latin_1, utf-8 and similar things
+        # drop hyphen or low-line from "latin-1", "latin_1", "utf-8" and similar
         encoding = encoding.replace('_', '').replace('-', '')
         # strip the error handler
         return encoding.split(':')[0]
@@ -1247,188 +1406,24 @@
     def language_label(self, docutil_label):
         return self.language_module.labels[docutil_label]
 
-    def ensure_math(self, text):
-        if not hasattr(self, 'ensure_math_re'):
-            chars = { # lnot,pm,twosuperior,threesuperior,mu,onesuperior,times,div
-                     'latin1' : '\xac\xb1\xb2\xb3\xb5\xb9\xd7\xf7' , # ¬±²³µ¹×÷
-                     # TODO?: use texcomp instead.
-                    }
-            self.ensure_math_re = re.compile('([%s])' % chars['latin1'])
-        text = self.ensure_math_re.sub(r'\\ensuremath{\1}', text)
-        return text
-
     def encode(self, text):
         """Return text with 'problematic' characters escaped.
 
-        Escape the ten special printing characters ``# $ % & ~ _ ^ \ { }``,
-        square brackets ``[ ]``, double quotes and (in OT1) ``< | >``.
-
-        Separate ``-`` (and more in literal text) to prevent input ligatures.
-
-        Translate non-supported Unicode characters.
+        * Escape the ten special printing characters ``# $ % & ~ _ ^ \ { }``,
+          square brackets ``[ ]``, double quotes and (in OT1) ``< | >``.
+        * Translate non-supported Unicode characters.
+        * Separate ``-`` (and more in literal text) to prevent input ligatures.
         """
         if self.verbatim:
             return text
-        # Separate compound characters, e.g. '--' to '-{}-'.
-        separate_chars = '-'
-        # In monospace-font, we also separate ',,', '``' and "''" and some
-        # other characters which can't occur in non-literal text.
-        if self.literal:
-            separate_chars += ',`\'"<>'
-        # LaTeX encoding maps:
-        special_chars = {
-            ord('#'): ur'\#',
-            ord('$'): ur'\$',
-            ord('%'): ur'\%',
-            ord('&'): ur'\&',
-            ord('~'): ur'\textasciitilde{}',
-            ord('_'): ur'\_',
-            ord('^'): ur'\textasciicircum{}',
-            ord('\\'): ur'\textbackslash{}',
-            ord('{'): ur'\{',
-            ord('}'): ur'\}',
-        # Square brackets are ordinary chars and cannot be escaped with '\',
-        # so we put them in a group '{[}'. (Alternative: ensure that all
-        # macros with optional arguments are terminated with {} and text
-        # inside any optional argument is put in a group ``[{text}]``).
-        # Commands with optional args inside an optional arg must be put
-        # in a group, e.g. ``\item[{\hyperref[label]{text}}]``.
-            ord('['): ur'{[}',
-            ord(']'): ur'{]}'
-        }
-        # Unicode chars that are not recognized by LaTeX's utf8 encoding
-        unsupported_unicode_chars = {
-            0x00A0: ur'~', # NO-BREAK SPACE
-            # TODO: ensure white space also at the beginning of a line?
-            # 0x00A0: ur'\leavevmode\nobreak\vadjust{}~'
-            0x00AD: ur'\-', # SOFT HYPHEN
-            #
-            0x2008: ur'\,', # PUNCTUATION SPACE   
-            0x2011: ur'\hbox{-}', # NON-BREAKING HYPHEN
-            0x202F: ur'\,', # NARROW NO-BREAK SPACE
-            0x21d4: ur'$\Leftrightarrow$',
-            # Docutils footnote symbols:
-            0x2660: ur'$\spadesuit$',
-            0x2663: ur'$\clubsuit$',
-        }
-        # Unicode chars that are recognized by LaTeX's utf8 encoding
-        unicode_chars = {
-            0x200C: ur'\textcompwordmark', # ZERO WIDTH NON-JOINER
-            0x2013: ur'\textendash{}',
-            0x2014: ur'\textemdash{}',
-            0x2018: ur'\textquoteleft{}',
-            0x2019: ur'\textquoteright{}',
-            0x201A: ur'\quotesinglbase{}', # SINGLE LOW-9 QUOTATION MARK
-            0x201C: ur'\textquotedblleft{}',
-            0x201D: ur'\textquotedblright{}',
-            0x201E: ur'\quotedblbase{}', # DOUBLE LOW-9 QUOTATION MARK
-            0x2030: ur'\textperthousand{}',   # PER MILLE SIGN
-            0x2031: ur'\textpertenthousand{}', # PER TEN THOUSAND SIGN
-            0x2039: ur'\guilsinglleft{}',
-            0x203A: ur'\guilsinglright{}',
-            0x2423: ur'\textvisiblespace{}',  # OPEN BOX
-            0x2020: ur'\dag{}',
-            0x2021: ur'\ddag{}',
-            0x2026: ur'\dots{}',
-            0x2122: ur'\texttrademark{}',
-        }
-        # Unicode chars that require a feature/package to render
-        pifont_chars = {
-            0x2665: ur'\ding{170}',     # black heartsuit
-            0x2666: ur'\ding{169}',     # black diamondsuit
-            0x2713: ur'\ding{51}',      # check mark
-            0x2717: ur'\ding{55}',      # check mark
-        }
-        # recognized with 'utf8', if textcomp is loaded
-        textcomp_chars = {
-            # Latin-1 Supplement
-            0x00a2: ur'\textcent{}',          # ¢ CENT SIGN
-            0x00a4: ur'\textcurrency{}',      # ¤ CURRENCY SYMBOL
-            0x00a5: ur'\textyen{}',           # ¥ YEN SIGN
-            0x00a6: ur'\textbrokenbar{}',     # ¦ BROKEN BAR
-            0x00a7: ur'\textsection{}',       # § SECTION SIGN
-            0x00a8: ur'\textasciidieresis{}', # ¨ DIAERESIS
-            0x00a9: ur'\textcopyright{}',     # © COPYRIGHT SIGN
-            0x00aa: ur'\textordfeminine{}',   # ª FEMININE ORDINAL INDICATOR
-            0x00ac: ur'\textlnot{}',          # ¬ NOT SIGN
-            0x00ae: ur'\textregistered{}',    # ® REGISTERED SIGN
-            0x00af: ur'\textasciimacron{}',   # ¯ MACRON
-            0x00b0: ur'\textdegree{}',        # ° DEGREE SIGN
-            0x00b1: ur'\textpm{}',            # ± PLUS-MINUS SIGN
-            0x00b2: ur'\texttwosuperior{}',   # ² SUPERSCRIPT TWO
-            0x00b3: ur'\textthreesuperior{}', # ³ SUPERSCRIPT THREE
-            0x00b4: ur'\textasciiacute{}',    # ´ ACUTE ACCENT
-            0x00b5: ur'\textmu{}',            # µ MICRO SIGN
-            0x00b6: ur'\textparagraph{}',     # ¶ PILCROW SIGN # not equal to \textpilcrow
-            0x00b9: ur'\textonesuperior{}',   # ¹ SUPERSCRIPT ONE
-            0x00ba: ur'\textordmasculine{}',  # º MASCULINE ORDINAL INDICATOR
-            0x00bc: ur'\textonequarter{}',    # 1/4 FRACTION
-            0x00bd: ur'\textonehalf{}',       # 1/2 FRACTION
-            0x00be: ur'\textthreequarters{}', # 3/4 FRACTION
-            0x00d7: ur'\texttimes{}',         # × MULTIPLICATION SIGN
-            0x00f7: ur'\textdiv{}',           # ÷ DIVISION SIGN
-            #
-            0x0192: ur'\textflorin{}',        # LATIN SMALL LETTER F WITH HOOK
-            0x02b9: ur'\textasciiacute{}',    # MODIFIER LETTER PRIME
-            0x02ba: ur'\textacutedbl{}',      # MODIFIER LETTER DOUBLE PRIME
-            0x2016: ur'\textbardbl{}',        # DOUBLE VERTICAL LINE
-            0x2022: ur'\textbullet{}',        # BULLET
-            0x2032: ur'\textasciiacute{}',    # PRIME
-            0x2033: ur'\textacutedbl{}',      # DOUBLE PRIME
-            0x2035: ur'\textasciigrave{}',    # REVERSED PRIME
-            0x2036: ur'\textgravedbl{}',      # REVERSED DOUBLE PRIME
-            0x203b: ur'\textreferencemark{}', # REFERENCE MARK
-            0x203d: ur'\textinterrobang{}',   # INTERROBANG
-            0x2044: ur'\textfractionsolidus{}', # FRACTION SLASH
-            0x2045: ur'\textlquill{}',        # LEFT SQUARE BRACKET WITH QUILL
-            0x2046: ur'\textrquill{}',        # RIGHT SQUARE BRACKET WITH QUILL
-            0x2052: ur'\textdiscount{}',      # COMMERCIAL MINUS SIGN
-            0x20a1: ur'\textcolonmonetary{}', # COLON SIGN
-            0x20a3: ur'\textfrenchfranc{}',   # FRENCH FRANC SIGN
-            0x20a4: ur'\textlira{}',          # LIRA SIGN
-            0x20a6: ur'\textnaira{}',         # NAIRA SIGN
-            0x20a9: ur'\textwon{}',           # WON SIGN
-            0x20ab: ur'\textdong{}',          # DONG SIGN
-            0x20ac: ur'\texteuro{}',          # EURO SIGN
-            0x20b1: ur'\textpeso{}',          # PESO SIGN
-            0x20b2: ur'\textguarani{}',       # GUARANI SIGN
-            0x2103: ur'\textcelsius{}',       # DEGREE CELSIUS
-            0x2116: ur'\textnumero{}',        # NUMERO SIGN
-            0x2117: ur'\textcircledP{}',      # SOUND RECORDING COYRIGHT
-            0x211e: ur'\textrecipe{}',        # PRESCRIPTION TAKE
-            0x2120: ur'\textservicemark{}',   # SERVICE MARK
-            0x2122: ur'\texttrademark{}',     # TRADE MARK SIGN
-            0x2126: ur'\textohm{}',           # OHM SIGN
-            0x2127: ur'\textmho{}',           # INVERTED OHM SIGN
-            0x212e: ur'\textestimated{}',     # ESTIMATED SYMBOL
-            0x2190: ur'\textleftarrow{}',     # LEFTWARDS ARROW
-            0x2191: ur'\textuparrow{}',       # UPWARDS ARROW
-            0x2192: ur'\textrightarrow{}',    # RIGHTWARDS ARROW
-            0x2193: ur'\textdownarrow{}',     # DOWNWARDS ARROW
-            0x2212: ur'\textminus{}',         # MINUS SIGN
-            0x2217: ur'\textasteriskcentered{}', # ASTERISK OPERATOR
-            0x221a: ur'\textsurd{}',          # SQUARE ROOT
-            0x2422: ur'\textblank{}',         # BLANK SYMBOL
-            0x25e6: ur'\textopenbullet{}',    # WHITE BULLET
-            0x25ef: ur'\textbigcircle{}',     # LARGE CIRCLE
-            0x266a: ur'\textmusicalnote{}',   # EIGHTH NOTE
-            0x26ad: ur'\textmarried{}',       # MARRIAGE SYMBOL
-            0x26ae: ur'\textdivorced{}',      # DIVORCE SYMBOL
-            0x27e8: ur'\textlangle{}',        # MATHEMATICAL LEFT ANGLE BRACKET
-            0x27e9: ur'\textrangle{}',        # MATHEMATICAL RIGHT ANGLE BRACKET
-        }
-        # TODO: greek alphabet ... ?
-        # see also LaTeX codec
-        # http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/252124
-        # and unimap.py from TeXML
 
-        # set up the translation table:
-        table = special_chars
+        # Set up the translation table:
+        table = CharMaps.special.copy()
         # keep the underscore in citation references
         if self.inside_citation_reference_label:
             del(table[ord('_')])
         # Workarounds for OT1 font-encoding
-        if self.font_encoding in ['OT1', '']:
+        if self.font_encoding in ['OT1', ''] and not self.is_xetex:
             # * out-of-order characters in cmtt
             if self.literal:
                 # replace underscore by underlined blank,
@@ -1449,41 +1444,47 @@
             # double quotes are 'active' in some languages
             # TODO: use \textquotedbl if font encoding starts with T?
             table[ord('"')] = self.babel.literal_double_quote
-        # Unicode chars:
-        table.update(unsupported_unicode_chars)
-        table.update(pifont_chars)
-        if not self.latex_encoding.startswith('utf8'):
-            table.update(unicode_chars)
-            table.update(textcomp_chars)
-        # Characters that require a feature/package to render
-        for ch in text:
-            if ord(ch) in pifont_chars:
-                self.requirements['pifont'] = '\\usepackage{pifont}'
-            if ord(ch) in textcomp_chars:
+        # Unicode replacements for 8-bit tex engines (not required with XeTeX/LuaTeX):
+        if not self.is_xetex:
+            table.update(CharMaps.unsupported_unicode)
+            if not self.latex_encoding.startswith('utf8'):
+                table.update(CharMaps.utf8_supported_unicode)
+                table.update(CharMaps.textcomp)
+            table.update(CharMaps.pifont)
+            # Characters that require a feature/package to render
+            if [True for ch in text if ord(ch) in CharMaps.textcomp]:
                 self.requirements['textcomp'] = PreambleCmds.textcomp
+            if [True for ch in text if ord(ch) in CharMaps.pifont]:
+                    self.requirements['pifont'] = '\\usepackage{pifont}'
 
         text = text.translate(table)
 
-        # Break up input ligatures
-        for char in separate_chars * 2:
-            # Do it twice ("* 2") because otherwise we would replace
-            # '---' by '-{}--'.
-            text = text.replace(char + char, char + '{}' + char)
+        # Break up input ligatures e.g. '--' to '-{}-'.
+        if not self.is_xetex: # Not required with xetex/luatex
+            separate_chars = '-'
+            # In monospace-font, we also separate ',,', '``' and "''" and some
+            # other characters which can't occur in non-literal text.
+            if self.literal:
+                separate_chars += ',`\'"<>'
+            for char in separate_chars * 2:
+                # Do it twice ("* 2") because otherwise we would replace
+                # '---' by '-{}--'.
+                text = text.replace(char + char, char + '{}' + char)
+
         # Literal line breaks (in address or literal blocks):
         if self.insert_newline:
             lines = text.split('\n')
-            # for blank lines, insert a protected space, to avoid
-            # ! LaTeX Error: There's no line here to end.
-            lines = [line + '~'*(not line.lstrip())
-                     for line in lines[:-1]] + lines[-1:]
-            text = '\\\\\n'.join(lines)
+            # Add a protected space to blank lines (except the last)
+            # to avoid ``! LaTeX Error: There's no line here to end.``
+            for i, line in enumerate(lines[:-1]):
+                if not line.lstrip():
+                    lines[i] += '~'
+            text = (r'\\' + '\n').join(lines)
         if not self.literal:
             text = self.babel.quote_quotes(text)
         if self.literal and not self.insert_non_breaking_blanks:
             # preserve runs of spaces but allow wrapping
             text = text.replace('  ', ' ~')
-        if not self.latex_encoding.startswith('utf8'):
-            text = self.ensure_math(text)
         return text
 
     def attval(self, text,
@@ -2218,9 +2219,12 @@
         self.requirements['~header'] = ''.join(self.out)
         self.pop_output_collector()
 
-    def to_latex_length(self, length_str, pxunit='px'):
+    def to_latex_length(self, length_str, pxunit=None):
         """Convert `length_str` with rst lenght to LaTeX length
         """
+        if pxunit is not None:
+            sys.stderr.write('deprecation warning: LaTeXTranslator.to_latex_length()'
+                             ' option `pxunit` will be removed.')
         match = re.match('(\d*\.?\d*)\s*(\S*)', length_str)
         if not match:
             return length_str
@@ -2231,12 +2235,13 @@
         # percentage: relate to current line width
         elif unit == '%':
             length_str = '%.3f\\linewidth' % (float(value)/100.0)
-        elif (unit == 'px') and (pxunit != 'px'):
-            # length unit px not defined in some tex variants (e.g. XeTeX)
+        elif self.is_xetex and unit == 'px':
+            # XeTeX does not know the length unit px.
+            # Use \pdfpxdimen, the macro to set the value of 1 px in pdftex.
+            # This way, configuring works the same for pdftex and xetex.
             self.fallbacks['_providelength'] = PreambleCmds.providelength
-            self.fallbacks['px'] = '\n\\DUprovidelength{%s}{1bp}\n' % pxunit
-            length_str = '%s%s' % (value, pxunit)
-
+            self.fallbacks['px'] = '\n\\DUprovidelength{\\pdfpxdimen}{1bp}\n'
+            length_str = r'%s\pdfpxdimen' % value
         return length_str
 
     def visit_image(self, node):

Modified: trunk/docutils/docutils/writers/xetex/__init__.py
===================================================================
--- trunk/docutils/docutils/writers/xetex/__init__.py	2012-03-30 07:11:39 UTC (rev 7388)
+++ trunk/docutils/docutils/writers/xetex/__init__.py	2012-03-30 11:58:21 UTC (rev 7389)
@@ -61,7 +61,7 @@
 
     def __init__(self):
         latex2e.Writer.__init__(self)
-        self.settings_defaults.update({'fontencoding': ''}) # use default (EU1)
+        self.settings_defaults.update({'fontencoding': ''}) # use default (EU1 or EU2)
         self.translator_class = XeLaTeXTranslator
 
 
@@ -125,79 +125,10 @@
 class XeLaTeXTranslator(latex2e.LaTeXTranslator):
 
     def __init__(self, document):
+        self.is_xetex = True  # typeset with XeTeX or LuaTeX engine
         latex2e.LaTeXTranslator.__init__(self, document, Babel)
         if self.latex_encoding == 'utf8':
             self.requirements.pop('_inputenc', None)
         else:
             self.requirements['_inputenc'] = (r'\XeTeXinputencoding %s '
                                               % self.latex_encoding)
-
-    # XeTeX does not know the length unit px.
-    # Use \pdfpxdimen, the macro to set the value of 1 px in pdftex.
-    # This way, configuring works the same for pdftex and xetex.
-    def to_latex_length(self, length_str, px=r'\pdfpxdimen'):
-        """Convert string with rst lenght to LaTeX length"""
-        return latex2e.LaTeXTranslator.to_latex_length(self, length_str, px)
-
-    # Simpler variant of encode, as XeTeX understands utf8 Unicode:
-    def encode(self, text):
-        """Return text with 'problematic' characters escaped.
-
-        Escape the ten special printing characters ``# $ % & ~ _ ^ \ { }``,
-        square brackets ``[ ]``, double quotes and (in OT1) ``< | >``.
-        """
-        if self.verbatim:
-            return text
-        # LaTeX encoding maps:
-        special_chars = {
-            ord('#'): ur'\#',
-            ord('$'): ur'\$',
-            ord('%'): ur'\%',
-            ord('&'): ur'\&',
-            ord('~'): ur'\textasciitilde{}',
-            ord('_'): ur'\_',
-            ord('^'): ur'\textasciicircum{}',
-            ord('\\'): ur'\textbackslash{}',
-            ord('{'): ur'\{',
-            ord('}'): ur'\}',
-        # Square brackets are ordinary chars and cannot be escaped with '\',
-        # so we put them in a group '{[}'. (Alternative: ensure that all
-        # macros with optional arguments are terminated with {} and text
-        # inside any optional argument is put in a group ``[{text}]``).
-        # Commands with optional args inside an optional arg must be put
-        # in a group, e.g. ``\item[{\hyperref[label]{text}}]``.
-            ord('['): ur'{[}',
-            ord(']'): ur'{]}'
-        }
-        # Unicode chars that are not properly handled by XeTeX
-        unsupported_unicode_chars = {
-            0x00AD: ur'\-', # SOFT HYPHEN
-        }
-        # set up the translation table:
-        table = special_chars
-        # keep the underscore in citation references
-        if self.inside_citation_reference_label:
-            del(table[ord('_')])
-        if self.insert_non_breaking_blanks:
-            table[ord(' ')] = ur'~'
-        if self.literal:
-            # double quotes are 'active' in some languages
-            table[ord('"')] = self.babel.literal_double_quote
-        else:
-            text = self.babel.quote_quotes(text)
-        # Unicode chars:
-        table.update(unsupported_unicode_chars)
-
-        text = text.translate(table)
-
-        # Literal line breaks (in address or literal blocks):
-        if self.insert_newline:
-            # for blank lines, insert a protected space, to avoid
-            # ! LaTeX Error: There's no line here to end.
-            textlines = [line + '~'*(not line.lstrip())
-                         for line in text.split('\n')]
-            text = '\\\\\n'.join(textlines)
-        if self.literal and not self.insert_non_breaking_blanks:
-            # preserve runs of spaces but allow wrapping
-            text = text.replace('  ', ' ~')
-        return text

Modified: trunk/docutils/test/functional/expected/standalone_rst_xetex.tex
===================================================================
--- trunk/docutils/test/functional/expected/standalone_rst_xetex.tex	2012-03-30 07:11:39 UTC (rev 7388)
+++ trunk/docutils/test/functional/expected/standalone_rst_xetex.tex	2012-03-30 11:58:21 UTC (rev 7389)
@@ -1269,8 +1269,8 @@
 %
 \begin{quote}{\ttfamily \raggedright \noindent
 \DUrole{ln}{~8~}\#~print~integers~from~0~to~9:\\
-~\DUrole{ln}{~9~}for~i~in~range(10):\\
-~\DUrole{ln}{10~}~~~~print~i
+\DUrole{ln}{~9~}for~i~in~range(10):\\
+\DUrole{ln}{10~}~~~~print~i
 }
 \end{quote}
 
@@ -1292,7 +1292,7 @@
 %
 \begin{quote}{\ttfamily \raggedright \noindent
 \DUrole{ln}{1~}..~header::~Document~header\\
-~\DUrole{ln}{2~}..~footer::~Document~footer
+\DUrole{ln}{2~}..~footer::~Document~footer
 }
 \end{quote}
 

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.