|
From: <mi...@us...> - 2012-03-30 11:58:29
|
Revision: 7389
http://docutils.svn.sourceforge.net/docutils/?rev=7389&view=rev
Author: milde
Date: 2012-03-30 11:58:21 +0000 (Fri, 30 Mar 2012)
Log Message:
-----------
Avoid code duplication between xetex and latex2e writer (solves [ 3512728 ]).
Modified Paths:
--------------
trunk/docutils/HISTORY.txt
trunk/docutils/docutils/writers/latex2e/__init__.py
trunk/docutils/docutils/writers/xetex/__init__.py
trunk/docutils/test/functional/expected/standalone_rst_xetex.tex
Modified: trunk/docutils/HISTORY.txt
===================================================================
--- trunk/docutils/HISTORY.txt 2012-03-30 07:11:39 UTC (rev 7388)
+++ trunk/docutils/HISTORY.txt 2012-03-30 11:58:21 UTC (rev 7389)
@@ -40,12 +40,12 @@
BinaryFileOutput.
- New exceptions InputError and OutputError for IO errors in
FileInput/FileOutput.
-
+
* docutils/core.py:
- No "hard" system exit on file IO errors: catch and report them in
`Publisher.reportException` instead. Allows handling by a calling
- application if the configuration setting `traceback` is True.
+ application if the configuration setting `traceback` is True.
* docutils/utils.py -> docutils/utils/__init__.py
@@ -84,6 +84,10 @@
- Use ``\DUtitle`` for unsupported section levels
- Apply [ 3512791 ] do not compare string literals with "is"
+* docutils/writers/xetex/__init__.py
+
+ - Avoid code duplication with latex2e writer (solves [ 3512728 ]).
+
* docutils/writers/html4css1/__init__.py
- Change default for `math-output` setting to MathJax.
Modified: trunk/docutils/docutils/writers/latex2e/__init__.py
===================================================================
--- trunk/docutils/docutils/writers/latex2e/__init__.py 2012-03-30 07:11:39 UTC (rev 7388)
+++ trunk/docutils/docutils/writers/latex2e/__init__.py 2012-03-30 11:58:21 UTC (rev 7389)
@@ -680,6 +680,161 @@
}"""
+# LaTeX encoding maps
+# -------------------
+# ::
+
+class CharMaps(object):
+ """LaTeX representations for active and Unicode characters."""
+
+ # characters that always need escaping:
+ special = {
+ ord('#'): ur'\#',
+ ord('$'): ur'\$',
+ ord('%'): ur'\%',
+ ord('&'): ur'\&',
+ ord('~'): ur'\textasciitilde{}',
+ ord('_'): ur'\_',
+ ord('^'): ur'\textasciicircum{}',
+ ord('\\'): ur'\textbackslash{}',
+ ord('{'): ur'\{',
+ ord('}'): ur'\}',
+ # Square brackets are ordinary chars and cannot be escaped with '\',
+ # so we put them in a group '{[}'. (Alternative: ensure that all
+ # macros with optional arguments are terminated with {} and text
+ # inside any optional argument is put in a group ``[{text}]``).
+ # Commands with optional args inside an optional arg must be put in a
+ # group, e.g. ``\item[{\hyperref[label]{text}}]``.
+ ord('['): ur'{[}',
+ ord(']'): ur'{]}',
+ # the soft hyphen is unknown in 8-bit text and not properly handled by XeTeX
+ 0x00AD: ur'\-', # SOFT HYPHEN
+ }
+ # Unicode chars that are not recognized by LaTeX's utf8 encoding
+ unsupported_unicode = {
+ 0x00A0: ur'~', # NO-BREAK SPACE
+ # TODO: ensure white space also at the beginning of a line?
+ # 0x00A0: ur'\leavevmode\nobreak\vadjust{}~'
+ 0x2008: ur'\,', # PUNCTUATION SPACE
+ 0x2011: ur'\hbox{-}', # NON-BREAKING HYPHEN
+ 0x202F: ur'\,', # NARROW NO-BREAK SPACE
+ 0x21d4: ur'$\Leftrightarrow$',
+ # Docutils footnote symbols:
+ 0x2660: ur'$\spadesuit$',
+ 0x2663: ur'$\clubsuit$',
+ }
+ # Unicode chars that are recognized by LaTeX's utf8 encoding
+ utf8_supported_unicode = {
+ 0x200C: ur'\textcompwordmark', # ZERO WIDTH NON-JOINER
+ 0x2013: ur'\textendash{}',
+ 0x2014: ur'\textemdash{}',
+ 0x2018: ur'\textquoteleft{}',
+ 0x2019: ur'\textquoteright{}',
+ 0x201A: ur'\quotesinglbase{}', # SINGLE LOW-9 QUOTATION MARK
+ 0x201C: ur'\textquotedblleft{}',
+ 0x201D: ur'\textquotedblright{}',
+ 0x201E: ur'\quotedblbase{}', # DOUBLE LOW-9 QUOTATION MARK
+ 0x2030: ur'\textperthousand{}', # PER MILLE SIGN
+ 0x2031: ur'\textpertenthousand{}', # PER TEN THOUSAND SIGN
+ 0x2039: ur'\guilsinglleft{}',
+ 0x203A: ur'\guilsinglright{}',
+ 0x2423: ur'\textvisiblespace{}', # OPEN BOX
+ 0x2020: ur'\dag{}',
+ 0x2021: ur'\ddag{}',
+ 0x2026: ur'\dots{}',
+ 0x2122: ur'\texttrademark{}',
+ }
+ # recognized with 'utf8', if textcomp is loaded
+ textcomp = {
+ # Latin-1 Supplement
+ 0x00a2: ur'\textcent{}', # ¢ CENT SIGN
+ 0x00a4: ur'\textcurrency{}', # ¤ CURRENCY SYMBOL
+ 0x00a5: ur'\textyen{}', # ¥ YEN SIGN
+ 0x00a6: ur'\textbrokenbar{}', # ¦ BROKEN BAR
+ 0x00a7: ur'\textsection{}', # § SECTION SIGN
+ 0x00a8: ur'\textasciidieresis{}', # ¨ DIAERESIS
+ 0x00a9: ur'\textcopyright{}', # © COPYRIGHT SIGN
+ 0x00aa: ur'\textordfeminine{}', # ª FEMININE ORDINAL INDICATOR
+ 0x00ac: ur'\textlnot{}', # ¬ NOT SIGN
+ 0x00ae: ur'\textregistered{}', # ® REGISTERED SIGN
+ 0x00af: ur'\textasciimacron{}', # ¯ MACRON
+ 0x00b0: ur'\textdegree{}', # ° DEGREE SIGN
+ 0x00b1: ur'\textpm{}', # ± PLUS-MINUS SIGN
+ 0x00b2: ur'\texttwosuperior{}', # ² SUPERSCRIPT TWO
+ 0x00b3: ur'\textthreesuperior{}', # ³ SUPERSCRIPT THREE
+ 0x00b4: ur'\textasciiacute{}', # ´ ACUTE ACCENT
+ 0x00b5: ur'\textmu{}', # µ MICRO SIGN
+ 0x00b6: ur'\textparagraph{}', # ¶ PILCROW SIGN # not equal to \textpilcrow
+ 0x00b9: ur'\textonesuperior{}', # ¹ SUPERSCRIPT ONE
+ 0x00ba: ur'\textordmasculine{}', # º MASCULINE ORDINAL INDICATOR
+ 0x00bc: ur'\textonequarter{}', # 1/4 FRACTION
+ 0x00bd: ur'\textonehalf{}', # 1/2 FRACTION
+ 0x00be: ur'\textthreequarters{}', # 3/4 FRACTION
+ 0x00d7: ur'\texttimes{}', # × MULTIPLICATION SIGN
+ 0x00f7: ur'\textdiv{}', # ÷ DIVISION SIGN
+ #
+ 0x0192: ur'\textflorin{}', # LATIN SMALL LETTER F WITH HOOK
+ 0x02b9: ur'\textasciiacute{}', # MODIFIER LETTER PRIME
+ 0x02ba: ur'\textacutedbl{}', # MODIFIER LETTER DOUBLE PRIME
+ 0x2016: ur'\textbardbl{}', # DOUBLE VERTICAL LINE
+ 0x2022: ur'\textbullet{}', # BULLET
+ 0x2032: ur'\textasciiacute{}', # PRIME
+ 0x2033: ur'\textacutedbl{}', # DOUBLE PRIME
+ 0x2035: ur'\textasciigrave{}', # REVERSED PRIME
+ 0x2036: ur'\textgravedbl{}', # REVERSED DOUBLE PRIME
+ 0x203b: ur'\textreferencemark{}', # REFERENCE MARK
+ 0x203d: ur'\textinterrobang{}', # INTERROBANG
+ 0x2044: ur'\textfractionsolidus{}', # FRACTION SLASH
+ 0x2045: ur'\textlquill{}', # LEFT SQUARE BRACKET WITH QUILL
+ 0x2046: ur'\textrquill{}', # RIGHT SQUARE BRACKET WITH QUILL
+ 0x2052: ur'\textdiscount{}', # COMMERCIAL MINUS SIGN
+ 0x20a1: ur'\textcolonmonetary{}', # COLON SIGN
+ 0x20a3: ur'\textfrenchfranc{}', # FRENCH FRANC SIGN
+ 0x20a4: ur'\textlira{}', # LIRA SIGN
+ 0x20a6: ur'\textnaira{}', # NAIRA SIGN
+ 0x20a9: ur'\textwon{}', # WON SIGN
+ 0x20ab: ur'\textdong{}', # DONG SIGN
+ 0x20ac: ur'\texteuro{}', # EURO SIGN
+ 0x20b1: ur'\textpeso{}', # PESO SIGN
+ 0x20b2: ur'\textguarani{}', # GUARANI SIGN
+ 0x2103: ur'\textcelsius{}', # DEGREE CELSIUS
+ 0x2116: ur'\textnumero{}', # NUMERO SIGN
+ 0x2117: ur'\textcircledP{}', # SOUND RECORDING COYRIGHT
+ 0x211e: ur'\textrecipe{}', # PRESCRIPTION TAKE
+ 0x2120: ur'\textservicemark{}', # SERVICE MARK
+ 0x2122: ur'\texttrademark{}', # TRADE MARK SIGN
+ 0x2126: ur'\textohm{}', # OHM SIGN
+ 0x2127: ur'\textmho{}', # INVERTED OHM SIGN
+ 0x212e: ur'\textestimated{}', # ESTIMATED SYMBOL
+ 0x2190: ur'\textleftarrow{}', # LEFTWARDS ARROW
+ 0x2191: ur'\textuparrow{}', # UPWARDS ARROW
+ 0x2192: ur'\textrightarrow{}', # RIGHTWARDS ARROW
+ 0x2193: ur'\textdownarrow{}', # DOWNWARDS ARROW
+ 0x2212: ur'\textminus{}', # MINUS SIGN
+ 0x2217: ur'\textasteriskcentered{}', # ASTERISK OPERATOR
+ 0x221a: ur'\textsurd{}', # SQUARE ROOT
+ 0x2422: ur'\textblank{}', # BLANK SYMBOL
+ 0x25e6: ur'\textopenbullet{}', # WHITE BULLET
+ 0x25ef: ur'\textbigcircle{}', # LARGE CIRCLE
+ 0x266a: ur'\textmusicalnote{}', # EIGHTH NOTE
+ 0x26ad: ur'\textmarried{}', # MARRIAGE SYMBOL
+ 0x26ae: ur'\textdivorced{}', # DIVORCE SYMBOL
+ 0x27e8: ur'\textlangle{}', # MATHEMATICAL LEFT ANGLE BRACKET
+ 0x27e9: ur'\textrangle{}', # MATHEMATICAL RIGHT ANGLE BRACKET
+ }
+ # Unicode chars that require a feature/package to render
+ pifont = {
+ 0x2665: ur'\ding{170}', # black heartsuit
+ 0x2666: ur'\ding{169}', # black diamondsuit
+ 0x2713: ur'\ding{51}', # check mark
+ 0x2717: ur'\ding{55}', # check mark
+ }
+ # TODO: greek alphabet ... ?
+ # see also LaTeX codec
+ # http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/252124
+ # and unimap.py from TeXML
+
+
class DocumentClass(object):
"""Details of a LaTeX document class."""
@@ -923,6 +1078,10 @@
# to other packages, as done with babel.
# Dummy settings might be taken from document settings
+ # Write code for typesetting with 8-bit tex/pdftex (vs. xetex/luatex) engine
+ # overwritten by the XeTeX writer
+ is_xetex = False
+
# Config setting defaults
# -----------------------
@@ -1097,7 +1256,7 @@
self.requirements['_inputenc'] = (r'\usepackage[%s]{inputenc}'
% self.latex_encoding)
# TeX font encoding
- if self.font_encoding:
+ if self.font_encoding and not self.is_xetex:
self.requirements['_fontenc'] = (r'\usepackage[%s]{fontenc}' %
self.font_encoding)
# page layout with typearea (if there are relevant document options)
@@ -1239,7 +1398,7 @@
encoding = docutils_encoding.lower()
if encoding in tr:
return tr[encoding]
- # convert: latin-1, latin_1, utf-8 and similar things
+ # drop hyphen or low-line from "latin-1", "latin_1", "utf-8" and similar
encoding = encoding.replace('_', '').replace('-', '')
# strip the error handler
return encoding.split(':')[0]
@@ -1247,188 +1406,24 @@
def language_label(self, docutil_label):
return self.language_module.labels[docutil_label]
- def ensure_math(self, text):
- if not hasattr(self, 'ensure_math_re'):
- chars = { # lnot,pm,twosuperior,threesuperior,mu,onesuperior,times,div
- 'latin1' : '\xac\xb1\xb2\xb3\xb5\xb9\xd7\xf7' , # ¬±²³µ¹×÷
- # TODO?: use texcomp instead.
- }
- self.ensure_math_re = re.compile('([%s])' % chars['latin1'])
- text = self.ensure_math_re.sub(r'\\ensuremath{\1}', text)
- return text
-
def encode(self, text):
"""Return text with 'problematic' characters escaped.
- Escape the ten special printing characters ``# $ % & ~ _ ^ \ { }``,
- square brackets ``[ ]``, double quotes and (in OT1) ``< | >``.
-
- Separate ``-`` (and more in literal text) to prevent input ligatures.
-
- Translate non-supported Unicode characters.
+ * Escape the ten special printing characters ``# $ % & ~ _ ^ \ { }``,
+ square brackets ``[ ]``, double quotes and (in OT1) ``< | >``.
+ * Translate non-supported Unicode characters.
+ * Separate ``-`` (and more in literal text) to prevent input ligatures.
"""
if self.verbatim:
return text
- # Separate compound characters, e.g. '--' to '-{}-'.
- separate_chars = '-'
- # In monospace-font, we also separate ',,', '``' and "''" and some
- # other characters which can't occur in non-literal text.
- if self.literal:
- separate_chars += ',`\'"<>'
- # LaTeX encoding maps:
- special_chars = {
- ord('#'): ur'\#',
- ord('$'): ur'\$',
- ord('%'): ur'\%',
- ord('&'): ur'\&',
- ord('~'): ur'\textasciitilde{}',
- ord('_'): ur'\_',
- ord('^'): ur'\textasciicircum{}',
- ord('\\'): ur'\textbackslash{}',
- ord('{'): ur'\{',
- ord('}'): ur'\}',
- # Square brackets are ordinary chars and cannot be escaped with '\',
- # so we put them in a group '{[}'. (Alternative: ensure that all
- # macros with optional arguments are terminated with {} and text
- # inside any optional argument is put in a group ``[{text}]``).
- # Commands with optional args inside an optional arg must be put
- # in a group, e.g. ``\item[{\hyperref[label]{text}}]``.
- ord('['): ur'{[}',
- ord(']'): ur'{]}'
- }
- # Unicode chars that are not recognized by LaTeX's utf8 encoding
- unsupported_unicode_chars = {
- 0x00A0: ur'~', # NO-BREAK SPACE
- # TODO: ensure white space also at the beginning of a line?
- # 0x00A0: ur'\leavevmode\nobreak\vadjust{}~'
- 0x00AD: ur'\-', # SOFT HYPHEN
- #
- 0x2008: ur'\,', # PUNCTUATION SPACE
- 0x2011: ur'\hbox{-}', # NON-BREAKING HYPHEN
- 0x202F: ur'\,', # NARROW NO-BREAK SPACE
- 0x21d4: ur'$\Leftrightarrow$',
- # Docutils footnote symbols:
- 0x2660: ur'$\spadesuit$',
- 0x2663: ur'$\clubsuit$',
- }
- # Unicode chars that are recognized by LaTeX's utf8 encoding
- unicode_chars = {
- 0x200C: ur'\textcompwordmark', # ZERO WIDTH NON-JOINER
- 0x2013: ur'\textendash{}',
- 0x2014: ur'\textemdash{}',
- 0x2018: ur'\textquoteleft{}',
- 0x2019: ur'\textquoteright{}',
- 0x201A: ur'\quotesinglbase{}', # SINGLE LOW-9 QUOTATION MARK
- 0x201C: ur'\textquotedblleft{}',
- 0x201D: ur'\textquotedblright{}',
- 0x201E: ur'\quotedblbase{}', # DOUBLE LOW-9 QUOTATION MARK
- 0x2030: ur'\textperthousand{}', # PER MILLE SIGN
- 0x2031: ur'\textpertenthousand{}', # PER TEN THOUSAND SIGN
- 0x2039: ur'\guilsinglleft{}',
- 0x203A: ur'\guilsinglright{}',
- 0x2423: ur'\textvisiblespace{}', # OPEN BOX
- 0x2020: ur'\dag{}',
- 0x2021: ur'\ddag{}',
- 0x2026: ur'\dots{}',
- 0x2122: ur'\texttrademark{}',
- }
- # Unicode chars that require a feature/package to render
- pifont_chars = {
- 0x2665: ur'\ding{170}', # black heartsuit
- 0x2666: ur'\ding{169}', # black diamondsuit
- 0x2713: ur'\ding{51}', # check mark
- 0x2717: ur'\ding{55}', # check mark
- }
- # recognized with 'utf8', if textcomp is loaded
- textcomp_chars = {
- # Latin-1 Supplement
- 0x00a2: ur'\textcent{}', # ¢ CENT SIGN
- 0x00a4: ur'\textcurrency{}', # ¤ CURRENCY SYMBOL
- 0x00a5: ur'\textyen{}', # ¥ YEN SIGN
- 0x00a6: ur'\textbrokenbar{}', # ¦ BROKEN BAR
- 0x00a7: ur'\textsection{}', # § SECTION SIGN
- 0x00a8: ur'\textasciidieresis{}', # ¨ DIAERESIS
- 0x00a9: ur'\textcopyright{}', # © COPYRIGHT SIGN
- 0x00aa: ur'\textordfeminine{}', # ª FEMININE ORDINAL INDICATOR
- 0x00ac: ur'\textlnot{}', # ¬ NOT SIGN
- 0x00ae: ur'\textregistered{}', # ® REGISTERED SIGN
- 0x00af: ur'\textasciimacron{}', # ¯ MACRON
- 0x00b0: ur'\textdegree{}', # ° DEGREE SIGN
- 0x00b1: ur'\textpm{}', # ± PLUS-MINUS SIGN
- 0x00b2: ur'\texttwosuperior{}', # ² SUPERSCRIPT TWO
- 0x00b3: ur'\textthreesuperior{}', # ³ SUPERSCRIPT THREE
- 0x00b4: ur'\textasciiacute{}', # ´ ACUTE ACCENT
- 0x00b5: ur'\textmu{}', # µ MICRO SIGN
- 0x00b6: ur'\textparagraph{}', # ¶ PILCROW SIGN # not equal to \textpilcrow
- 0x00b9: ur'\textonesuperior{}', # ¹ SUPERSCRIPT ONE
- 0x00ba: ur'\textordmasculine{}', # º MASCULINE ORDINAL INDICATOR
- 0x00bc: ur'\textonequarter{}', # 1/4 FRACTION
- 0x00bd: ur'\textonehalf{}', # 1/2 FRACTION
- 0x00be: ur'\textthreequarters{}', # 3/4 FRACTION
- 0x00d7: ur'\texttimes{}', # × MULTIPLICATION SIGN
- 0x00f7: ur'\textdiv{}', # ÷ DIVISION SIGN
- #
- 0x0192: ur'\textflorin{}', # LATIN SMALL LETTER F WITH HOOK
- 0x02b9: ur'\textasciiacute{}', # MODIFIER LETTER PRIME
- 0x02ba: ur'\textacutedbl{}', # MODIFIER LETTER DOUBLE PRIME
- 0x2016: ur'\textbardbl{}', # DOUBLE VERTICAL LINE
- 0x2022: ur'\textbullet{}', # BULLET
- 0x2032: ur'\textasciiacute{}', # PRIME
- 0x2033: ur'\textacutedbl{}', # DOUBLE PRIME
- 0x2035: ur'\textasciigrave{}', # REVERSED PRIME
- 0x2036: ur'\textgravedbl{}', # REVERSED DOUBLE PRIME
- 0x203b: ur'\textreferencemark{}', # REFERENCE MARK
- 0x203d: ur'\textinterrobang{}', # INTERROBANG
- 0x2044: ur'\textfractionsolidus{}', # FRACTION SLASH
- 0x2045: ur'\textlquill{}', # LEFT SQUARE BRACKET WITH QUILL
- 0x2046: ur'\textrquill{}', # RIGHT SQUARE BRACKET WITH QUILL
- 0x2052: ur'\textdiscount{}', # COMMERCIAL MINUS SIGN
- 0x20a1: ur'\textcolonmonetary{}', # COLON SIGN
- 0x20a3: ur'\textfrenchfranc{}', # FRENCH FRANC SIGN
- 0x20a4: ur'\textlira{}', # LIRA SIGN
- 0x20a6: ur'\textnaira{}', # NAIRA SIGN
- 0x20a9: ur'\textwon{}', # WON SIGN
- 0x20ab: ur'\textdong{}', # DONG SIGN
- 0x20ac: ur'\texteuro{}', # EURO SIGN
- 0x20b1: ur'\textpeso{}', # PESO SIGN
- 0x20b2: ur'\textguarani{}', # GUARANI SIGN
- 0x2103: ur'\textcelsius{}', # DEGREE CELSIUS
- 0x2116: ur'\textnumero{}', # NUMERO SIGN
- 0x2117: ur'\textcircledP{}', # SOUND RECORDING COYRIGHT
- 0x211e: ur'\textrecipe{}', # PRESCRIPTION TAKE
- 0x2120: ur'\textservicemark{}', # SERVICE MARK
- 0x2122: ur'\texttrademark{}', # TRADE MARK SIGN
- 0x2126: ur'\textohm{}', # OHM SIGN
- 0x2127: ur'\textmho{}', # INVERTED OHM SIGN
- 0x212e: ur'\textestimated{}', # ESTIMATED SYMBOL
- 0x2190: ur'\textleftarrow{}', # LEFTWARDS ARROW
- 0x2191: ur'\textuparrow{}', # UPWARDS ARROW
- 0x2192: ur'\textrightarrow{}', # RIGHTWARDS ARROW
- 0x2193: ur'\textdownarrow{}', # DOWNWARDS ARROW
- 0x2212: ur'\textminus{}', # MINUS SIGN
- 0x2217: ur'\textasteriskcentered{}', # ASTERISK OPERATOR
- 0x221a: ur'\textsurd{}', # SQUARE ROOT
- 0x2422: ur'\textblank{}', # BLANK SYMBOL
- 0x25e6: ur'\textopenbullet{}', # WHITE BULLET
- 0x25ef: ur'\textbigcircle{}', # LARGE CIRCLE
- 0x266a: ur'\textmusicalnote{}', # EIGHTH NOTE
- 0x26ad: ur'\textmarried{}', # MARRIAGE SYMBOL
- 0x26ae: ur'\textdivorced{}', # DIVORCE SYMBOL
- 0x27e8: ur'\textlangle{}', # MATHEMATICAL LEFT ANGLE BRACKET
- 0x27e9: ur'\textrangle{}', # MATHEMATICAL RIGHT ANGLE BRACKET
- }
- # TODO: greek alphabet ... ?
- # see also LaTeX codec
- # http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/252124
- # and unimap.py from TeXML
- # set up the translation table:
- table = special_chars
+ # Set up the translation table:
+ table = CharMaps.special.copy()
# keep the underscore in citation references
if self.inside_citation_reference_label:
del(table[ord('_')])
# Workarounds for OT1 font-encoding
- if self.font_encoding in ['OT1', '']:
+ if self.font_encoding in ['OT1', ''] and not self.is_xetex:
# * out-of-order characters in cmtt
if self.literal:
# replace underscore by underlined blank,
@@ -1449,41 +1444,47 @@
# double quotes are 'active' in some languages
# TODO: use \textquotedbl if font encoding starts with T?
table[ord('"')] = self.babel.literal_double_quote
- # Unicode chars:
- table.update(unsupported_unicode_chars)
- table.update(pifont_chars)
- if not self.latex_encoding.startswith('utf8'):
- table.update(unicode_chars)
- table.update(textcomp_chars)
- # Characters that require a feature/package to render
- for ch in text:
- if ord(ch) in pifont_chars:
- self.requirements['pifont'] = '\\usepackage{pifont}'
- if ord(ch) in textcomp_chars:
+ # Unicode replacements for 8-bit tex engines (not required with XeTeX/LuaTeX):
+ if not self.is_xetex:
+ table.update(CharMaps.unsupported_unicode)
+ if not self.latex_encoding.startswith('utf8'):
+ table.update(CharMaps.utf8_supported_unicode)
+ table.update(CharMaps.textcomp)
+ table.update(CharMaps.pifont)
+ # Characters that require a feature/package to render
+ if [True for ch in text if ord(ch) in CharMaps.textcomp]:
self.requirements['textcomp'] = PreambleCmds.textcomp
+ if [True for ch in text if ord(ch) in CharMaps.pifont]:
+ self.requirements['pifont'] = '\\usepackage{pifont}'
text = text.translate(table)
- # Break up input ligatures
- for char in separate_chars * 2:
- # Do it twice ("* 2") because otherwise we would replace
- # '---' by '-{}--'.
- text = text.replace(char + char, char + '{}' + char)
+ # Break up input ligatures e.g. '--' to '-{}-'.
+ if not self.is_xetex: # Not required with xetex/luatex
+ separate_chars = '-'
+ # In monospace-font, we also separate ',,', '``' and "''" and some
+ # other characters which can't occur in non-literal text.
+ if self.literal:
+ separate_chars += ',`\'"<>'
+ for char in separate_chars * 2:
+ # Do it twice ("* 2") because otherwise we would replace
+ # '---' by '-{}--'.
+ text = text.replace(char + char, char + '{}' + char)
+
# Literal line breaks (in address or literal blocks):
if self.insert_newline:
lines = text.split('\n')
- # for blank lines, insert a protected space, to avoid
- # ! LaTeX Error: There's no line here to end.
- lines = [line + '~'*(not line.lstrip())
- for line in lines[:-1]] + lines[-1:]
- text = '\\\\\n'.join(lines)
+ # Add a protected space to blank lines (except the last)
+ # to avoid ``! LaTeX Error: There's no line here to end.``
+ for i, line in enumerate(lines[:-1]):
+ if not line.lstrip():
+ lines[i] += '~'
+ text = (r'\\' + '\n').join(lines)
if not self.literal:
text = self.babel.quote_quotes(text)
if self.literal and not self.insert_non_breaking_blanks:
# preserve runs of spaces but allow wrapping
text = text.replace(' ', ' ~')
- if not self.latex_encoding.startswith('utf8'):
- text = self.ensure_math(text)
return text
def attval(self, text,
@@ -2218,9 +2219,12 @@
self.requirements['~header'] = ''.join(self.out)
self.pop_output_collector()
- def to_latex_length(self, length_str, pxunit='px'):
+ def to_latex_length(self, length_str, pxunit=None):
"""Convert `length_str` with rst lenght to LaTeX length
"""
+ if pxunit is not None:
+ sys.stderr.write('deprecation warning: LaTeXTranslator.to_latex_length()'
+ ' option `pxunit` will be removed.')
match = re.match('(\d*\.?\d*)\s*(\S*)', length_str)
if not match:
return length_str
@@ -2231,12 +2235,13 @@
# percentage: relate to current line width
elif unit == '%':
length_str = '%.3f\\linewidth' % (float(value)/100.0)
- elif (unit == 'px') and (pxunit != 'px'):
- # length unit px not defined in some tex variants (e.g. XeTeX)
+ elif self.is_xetex and unit == 'px':
+ # XeTeX does not know the length unit px.
+ # Use \pdfpxdimen, the macro to set the value of 1 px in pdftex.
+ # This way, configuring works the same for pdftex and xetex.
self.fallbacks['_providelength'] = PreambleCmds.providelength
- self.fallbacks['px'] = '\n\\DUprovidelength{%s}{1bp}\n' % pxunit
- length_str = '%s%s' % (value, pxunit)
-
+ self.fallbacks['px'] = '\n\\DUprovidelength{\\pdfpxdimen}{1bp}\n'
+ length_str = r'%s\pdfpxdimen' % value
return length_str
def visit_image(self, node):
Modified: trunk/docutils/docutils/writers/xetex/__init__.py
===================================================================
--- trunk/docutils/docutils/writers/xetex/__init__.py 2012-03-30 07:11:39 UTC (rev 7388)
+++ trunk/docutils/docutils/writers/xetex/__init__.py 2012-03-30 11:58:21 UTC (rev 7389)
@@ -61,7 +61,7 @@
def __init__(self):
latex2e.Writer.__init__(self)
- self.settings_defaults.update({'fontencoding': ''}) # use default (EU1)
+ self.settings_defaults.update({'fontencoding': ''}) # use default (EU1 or EU2)
self.translator_class = XeLaTeXTranslator
@@ -125,79 +125,10 @@
class XeLaTeXTranslator(latex2e.LaTeXTranslator):
def __init__(self, document):
+ self.is_xetex = True # typeset with XeTeX or LuaTeX engine
latex2e.LaTeXTranslator.__init__(self, document, Babel)
if self.latex_encoding == 'utf8':
self.requirements.pop('_inputenc', None)
else:
self.requirements['_inputenc'] = (r'\XeTeXinputencoding %s '
% self.latex_encoding)
-
- # XeTeX does not know the length unit px.
- # Use \pdfpxdimen, the macro to set the value of 1 px in pdftex.
- # This way, configuring works the same for pdftex and xetex.
- def to_latex_length(self, length_str, px=r'\pdfpxdimen'):
- """Convert string with rst lenght to LaTeX length"""
- return latex2e.LaTeXTranslator.to_latex_length(self, length_str, px)
-
- # Simpler variant of encode, as XeTeX understands utf8 Unicode:
- def encode(self, text):
- """Return text with 'problematic' characters escaped.
-
- Escape the ten special printing characters ``# $ % & ~ _ ^ \ { }``,
- square brackets ``[ ]``, double quotes and (in OT1) ``< | >``.
- """
- if self.verbatim:
- return text
- # LaTeX encoding maps:
- special_chars = {
- ord('#'): ur'\#',
- ord('$'): ur'\$',
- ord('%'): ur'\%',
- ord('&'): ur'\&',
- ord('~'): ur'\textasciitilde{}',
- ord('_'): ur'\_',
- ord('^'): ur'\textasciicircum{}',
- ord('\\'): ur'\textbackslash{}',
- ord('{'): ur'\{',
- ord('}'): ur'\}',
- # Square brackets are ordinary chars and cannot be escaped with '\',
- # so we put them in a group '{[}'. (Alternative: ensure that all
- # macros with optional arguments are terminated with {} and text
- # inside any optional argument is put in a group ``[{text}]``).
- # Commands with optional args inside an optional arg must be put
- # in a group, e.g. ``\item[{\hyperref[label]{text}}]``.
- ord('['): ur'{[}',
- ord(']'): ur'{]}'
- }
- # Unicode chars that are not properly handled by XeTeX
- unsupported_unicode_chars = {
- 0x00AD: ur'\-', # SOFT HYPHEN
- }
- # set up the translation table:
- table = special_chars
- # keep the underscore in citation references
- if self.inside_citation_reference_label:
- del(table[ord('_')])
- if self.insert_non_breaking_blanks:
- table[ord(' ')] = ur'~'
- if self.literal:
- # double quotes are 'active' in some languages
- table[ord('"')] = self.babel.literal_double_quote
- else:
- text = self.babel.quote_quotes(text)
- # Unicode chars:
- table.update(unsupported_unicode_chars)
-
- text = text.translate(table)
-
- # Literal line breaks (in address or literal blocks):
- if self.insert_newline:
- # for blank lines, insert a protected space, to avoid
- # ! LaTeX Error: There's no line here to end.
- textlines = [line + '~'*(not line.lstrip())
- for line in text.split('\n')]
- text = '\\\\\n'.join(textlines)
- if self.literal and not self.insert_non_breaking_blanks:
- # preserve runs of spaces but allow wrapping
- text = text.replace(' ', ' ~')
- return text
Modified: trunk/docutils/test/functional/expected/standalone_rst_xetex.tex
===================================================================
--- trunk/docutils/test/functional/expected/standalone_rst_xetex.tex 2012-03-30 07:11:39 UTC (rev 7388)
+++ trunk/docutils/test/functional/expected/standalone_rst_xetex.tex 2012-03-30 11:58:21 UTC (rev 7389)
@@ -1269,8 +1269,8 @@
%
\begin{quote}{\ttfamily \raggedright \noindent
\DUrole{ln}{~8~}\#~print~integers~from~0~to~9:\\
-~\DUrole{ln}{~9~}for~i~in~range(10):\\
-~\DUrole{ln}{10~}~~~~print~i
+\DUrole{ln}{~9~}for~i~in~range(10):\\
+\DUrole{ln}{10~}~~~~print~i
}
\end{quote}
@@ -1292,7 +1292,7 @@
%
\begin{quote}{\ttfamily \raggedright \noindent
\DUrole{ln}{1~}..~header::~Document~header\\
-~\DUrole{ln}{2~}..~footer::~Document~footer
+\DUrole{ln}{2~}..~footer::~Document~footer
}
\end{quote}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|