[Docutils-checkins] r6124 - in trunk/docutils: HISTORY.txt docutils/writers/html4css1/init.py

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 454-5900

Author: milde
Date: 2009-09-11 12:35:56 +0200 (Fri, 11 Sep 2009)
New Revision: 6124

Modified:
   trunk/docutils/HISTORY.txt
   trunk/docutils/docutils/writers/html4css1/__init__.py
   trunk/docutils/docutils/writers/latex2e/__init__.py
   trunk/docutils/test/functional/expected/standalone_rst_latex.tex
Log:
Use `translate` method instead of repeated `replace` calls.

Modified: trunk/docutils/HISTORY.txt
===================================================================

--- trunk/docutils/HISTORY.txt	2009-09-11 10:18:31 UTC (rev 6123)
+++ trunk/docutils/HISTORY.txt	2009-09-11 10:35:56 UTC (rev 6124)
@@ -43,6 +43,7 @@
   - Fix [ 1692788 ] allow UTF-8 in style sheets.
   - Fix [ 2781629 ] support non-ASCII chars in file names.
   - Apply [ 2845002 ] let ``--no-raw`` disable raw *roles* too.
+  - Fix [ 2831643 ] by renaming DirectiveError.message to DirectiveError.msg
 
 * reStructuredText:
 
@@ -86,6 +87,7 @@
     separated list of stylesheets.
   - Address [ 1938891 ] Inline literal text creates "pre" span only when
     needed to prevent inter-word line wraps.
+  - Use `translate` method instead of repeated `replace` calls.
 
 * docutils/writers/newlatex2e/__init__.py:
 
@@ -132,6 +134,7 @@
   - Typeset generic topic as "quote block with title".
   - Use template (file and configuration option).
   - Render doctest blocks as literal blocks (fixes [ 1586058 ]).
+  - Use .translate() instead of repeated .replace() calls.
 
 * docutils/writers/manpage.py
 

Modified: trunk/docutils/docutils/writers/html4css1/__init__.py
===================================================================
--- trunk/docutils/docutils/writers/html4css1/__init__.py	2009-09-11 10:18:31 UTC (rev 6123)
+++ trunk/docutils/docutils/writers/html4css1/__init__.py	2009-09-11 10:35:56 UTC (rev 6124)
@@ -308,14 +308,15 @@
     def encode(self, text):
         """Encode special characters in `text` & return."""
         # @@@ A codec to do these and all other HTML entities would be nice.
-        text = text.replace("&", "&amp;")
-        text = text.replace("<", "&lt;")
-        text = text.replace('"', "&quot;")
-        text = text.replace(">", "&gt;")
-        text = text.replace("@", "&#64;") # may thwart some address harvesters
-        # Replace the non-breaking space character with the HTML entity:
-        text = text.replace(u'\u00a0', "&nbsp;")
-        return text
+        text = unicode(text)
+        return text.translate({
+            ord('&'): u'&amp;',
+            ord('<'): u'&lt;',
+            ord('"'): u'&quot;',
+            ord('>'): u'&gt;',
+            ord('@'): u'&#64;', # may thwart some address harvesters
+            # TODO: convert non-breaking space only if needed?
+            0xa0: u'&nbsp;'}) # non-breaking space
 
     def cloak_mailto(self, uri):
         """Try to hide a mailto: URL from harvesters."""

Modified: trunk/docutils/docutils/writers/latex2e/__init__.py
===================================================================
--- trunk/docutils/docutils/writers/latex2e/__init__.py	2009-09-11 10:18:31 UTC (rev 6123)
+++ trunk/docutils/docutils/writers/latex2e/__init__.py	2009-09-11 10:35:56 UTC (rev 6124)
@@ -1,3 +1,4 @@
+# -*- coding: utf8 -*-
 # $Id$
 # Author: Engelbert Gruber <gr...@us...>
 # Copyright: This module has been placed in the public domain.
@@ -290,12 +291,12 @@
         self.setup = '' # language dependent configuration code
         # double quotes are "active" in some languages (e.g. German).
         # TODO: use \textquotedbl in OT1 font encoding?
-        self.literal_double_quote = '"'
+        self.literal_double_quote = u'"'
         if self.language.startswith('de'):
-            self.quotes = ('{\\glqq}', '\\grqq{}')
-            self.literal_double_quote = '\\dq{}'
+            self.quotes = (r'\glqq{}', r'\grqq{}')
+            self.literal_double_quote = ur'\dq{}'
         if self.language.startswith('it'):
-            self.literal_double_quote = r'{\char`\"}'
+            self.literal_double_quote = ur'{\char`\"}'
         if self.language.startswith('es'):
             # reset tilde ~ to the original binding (nobreakspace):
             self.setup = ('\n'
@@ -1075,139 +1076,125 @@
     def language_label(self, docutil_label):
         return self.language.labels[docutil_label]
 
-    latex_equivalents = {
-        u'\u00A0' : '~',
-        u'\u2013' : '{--}',
-        u'\u2014' : '{---}',
-        u'\u2018' : '`',
-        u'\u2019' : "'",
-        u'\u201A' : ',',
-        u'\u201C' : '``',
-        u'\u201D' : "''",
-        u'\u201E' : ',,',
-        u'\u2020' : '\\dag{}',
-        u'\u2021' : '\\ddag{}',
-        u'\u2026' : '\\dots{}',
-        u'\u2122' : '\\texttrademark{}',
-        u'\u21d4' : '$\\Leftrightarrow$',
-        # greek alphabet ?
-    }
-
-    def unicode_to_latex(self,text):
-        # see LaTeX codec
-        # http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/252124
-        # Only some special chracters are translated, for documents with many
-        # utf-8 chars one should use the LaTeX unicode package.
-        for uchar in self.latex_equivalents.keys():
-            text = text.replace(uchar,self.latex_equivalents[uchar])
-        return text
-
     def ensure_math(self, text):
         if not hasattr(self, 'ensure_math_re'):
             chars = { # lnot,pm,twosuperior,threesuperior,mu,onesuperior,times,div
-                     'latin1' : '\xac\xb1\xb2\xb3\xb5\xb9\xd7\xf7' ,
-                     # TODO?: also latin5 and latin9
+                     'latin1' : '\xac\xb1\xb2\xb3\xb5\xb9\xd7\xf7' , # ¬±²³µ¹×÷
+                     # TODO?: use texcomp instead.
                     }
             self.ensure_math_re = re.compile('([%s])' % chars['latin1'])
         text = self.ensure_math_re.sub(r'\\ensuremath{\1}', text)
         return text
 
     def encode(self, text):
-        """Return text with special characters escaped.
+        """Return text with 'problematic' characters escaped.
 
         Escape the ten special printing characters ``# $ % & ~ _ ^ \ { }``,
         square brackets ``[ ]``, double quotes and (in OT1) ``< | >``.
 
         Separate ``-`` (and more in literal text) to prevent input ligatures.
+
+        Translate non-supported Unicode characters.
         """
         if self.verbatim:
             return text
-        # Bootstrapping: backslash and braces are also part of replacements
-        # 1. backslash (terminated below)
-        text = text.replace('\\', r'\textbackslash')
-        # 2. braces
-        text = text.replace('{', r'\{')
-        text = text.replace('}', r'\}')
-        # 3. terminate \textbackslash
-        text = text.replace('\\textbackslash', '\\textbackslash{}')
-        # 4. the remaining special printing characters
-        text = text.replace('#', r'\#')
-        text = text.replace('$', r'\$')
-        text = text.replace('%', r'\%')
-        text = text.replace('&', r'\&')
-        text = text.replace('~', r'\textasciitilde{}')
-        if not self.inside_citation_reference_label:
-            text = text.replace('_', r'\_')
-        text = text.replace('^', r'\textasciicircum{}')
-        # Square brackets
-        #
-        #   \item and all the other commands with optional arguments check
-        #   if the token right after the macro name is an opening bracket.
-        #   In that case the contents between that bracket and the following
-        #   closing bracket on the same grouping level are taken as the
-        #   optional argument. What makes this unintuitive is the fact that
-        #   the square brackets aren't grouping characters themselves, so in
-        #   your last example \item[[...]] the optional argument consists of
-        #   [... (without the closing bracket).
-        #
-        # How to escape?
-        #
+        # Separate compound characters, e.g. '--' to '-{}-'.
+        separate_chars = '-'
+        # In monospace-font, we also separate ',,', '``' and "''" and some
+        # other characters which can't occur in non-literal text.
+        if self.literal_block or self.literal:
+            separate_chars += ',`\'"<>'
+        # LaTeX encoding maps:
+        special_chars2latex = {
+            ord('#'): ur'\#',
+            ord('$'): ur'\$',
+            ord('%'): ur'\%',
+            ord('&'): ur'\&',
+            ord('~'): ur'\textasciitilde{}',
+            ord('_'): ur'\_',
+            ord('^'): ur'\textasciicircum{}',
+            ord('\\'): ur'\textbackslash{}',
+            ord('{'): ur'\{',
+            ord('}'): ur'\}',
         # Square brackets are ordinary chars and cannot be escaped with '\',
         # so we put them in a group '{[}'. (Alternative: ensure that all
         # macros with optional arguments are terminated with {} and text
         # inside any optional argument is put in a group ``[{text}]``).
         # Commands with optional args inside an optional arg must be put
         # in a group, e.g. ``\item[{\hyperref[label]{text}}]``.
-        text = text.replace('[', '{[}')
-        text = text.replace(']', '{]}')
+            ord('['): ur'{[}',
+            ord(']'): ur'{]}',
+        # Unicode chars that are not recognized by LaTeX's utf8 encoding
+            0x21d4: ur'$\Leftrightarrow$',
+            0x00A0: ur'~', # NO-BREAK SPACE
+        }
+        # Unicode chars that are recognized by LaTeX's utf8 encoding
+        unicode2latex = {
+            0x2013: ur'\textendash{}',
+            0x2014: ur'\textemdash{}',
+            0x2018: ur'`',
+            0x2019: ur"'",
+            0x201A: ur'\quotesinglbase{}', # SINGLE LOW-9 QUOTATION MARK
+            0x201C: ur'\textquotedblleft{}',
+            0x201D: ur'\textquotedblright{}',
+            0x201E: ur'\quotedblbase', # DOUBLE LOW-9 QUOTATION MARK
+            0x2020: ur'\dag{}',
+            0x2021: ur'\ddag{}',
+            0x2026: ur'\dots{}',
+            0x2122: ur'\texttrademark{}',
+            # TODO: greek alphabet ... ?
+            # see also LaTeX codec
+            # http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/252124
+            # and unimap.py from TeXML
+        }
+        # set up the translation table:
+        table = special_chars2latex
+        # keep the underscore in citation references
+        if self.inside_citation_reference_label:
+            del(table[ord('_')])
         # Workarounds for OT1 font-encoding
         if self.font_encoding in ['OT1', '']:
             # * out-of-order characters in cmtt
             if self.literal_block or self.literal:
-                # replace underscore by underlined blank, because this has
-                # correct width.
-                text = text.replace(r'\_', r'\underline{ }')
+                # replace underscore by underlined blank,
+                # because this has correct width.
+                table[ord('_')] = u'\\underline{~}'
                 # the backslash doesn't work, so we use a mirrored slash.
                 # \reflectbox is provided by graphicx:
                 self.requirements['graphicx'] = self.graphicx_package
-                text = text.replace(r'\textbackslash', r'\reflectbox{/}')
+                table[ord('\\')] = ur'\reflectbox{/}'
             # * ``< | >`` come out as different chars (except for cmtt):
             else:
-                text = text.replace('|', '\\textbar{}')
-                text = text.replace('<', '\\textless{}')
-                text = text.replace('>', '\\textgreater{}')
-        #
-        # Separate compound characters, e.g. '--' to '-{}-'.  (The
-        # actual separation is done later; see below.)
-        separate_chars = '-'
+                table[ord('|')] = ur'\textbar{}'
+                table[ord('<')] = ur'\textless{}'
+                table[ord('>')] = ur'\textgreater{}'
+        if self.insert_non_breaking_blanks:
+            table[ord(' ')] = ur'~'
         if self.literal_block or self.literal:
-            # In monospace-font, we also separate ',,', '``' and "''"
-            # and some other characters which can't occur in
-            # non-literal text.
-            separate_chars += ',`\'"<>'
             # double quotes are 'active' in some languages
-            text = text.replace('"', self.babel.literal_double_quote)
+            table[ord('"')] = self.babel.literal_double_quote
         else:
             text = self.babel.quote_quotes(text)
+        # Unicode chars:
+        if not self.latex_encoding.startswith('utf8'):
+            table.update(unicode2latex)
+
+        text = text.translate(table)
+
+        # Break up input ligatures
         for char in separate_chars * 2:
             # Do it twice ("* 2") because otherwise we would replace
             # '---' by '-{}--'.
             text = text.replace(char + char, char + '{}' + char)
+        # Literal line breaks (in address or literal blocks):
         if self.insert_newline or self.literal_block:
-            # Literal line breaks (in address or literal blocks):
             # for blank lines, insert a protected space, to avoid
             # ! LaTeX Error: There's no line here to end.
             textlines = [line + '~'*(not line.lstrip())
                          for line in text.split('\n')]
             text = '\\\\\n'.join(textlines)
-        if self.insert_non_breaking_blanks:
-            text = text.replace(' ', '~')
         if not self.latex_encoding.startswith('utf8'):
-            text = self.unicode_to_latex(text)
             text = self.ensure_math(text)
-        if self.latex_encoding == 'utf8':
-            # no-break space is not supported by (plain) utf8 input encoding
-            text = text.replace(u'\u00A0', '~')
         return text
 
     def attval(self, text,
@@ -1827,11 +1814,12 @@
         self.out.append(self.context.pop())
 
     def visit_footer(self, node):
-        self.out = self.requirements
+        self.out = []
         self.out.append(r'\newcommand{\DUfooter}{')
 
     def depart_footer(self, node):
         self.out.append('}')
+        self.requirements['~footer'] = ''.join(self.out)
         self.out = self.body
 
     def visit_footnote(self, node):
@@ -1918,11 +1906,12 @@
         pass
 
     def visit_header(self, node):
-        self.out = self.requirements
+        self.out = []
         self.out.append(r'\newcommand{\DUheader}{')
 
     def depart_header(self, node):
         self.out.append('}')
+        self.requirements['~header'] = ''.join(self.out)
         self.out = self.body
 
     def visit_hint(self, node):

Modified: trunk/docutils/test/functional/expected/standalone_rst_latex.tex
===================================================================
--- trunk/docutils/test/functional/expected/standalone_rst_latex.tex	2009-09-11 10:18:31 UTC (rev 6123)
+++ trunk/docutils/test/functional/expected/standalone_rst_latex.tex	2009-09-11 10:35:56 UTC (rev 6124)
@@ -1777,7 +1777,7 @@
 
 \texttt{!!!"{}"{}"\#\#\#\$\$\$\%\%\%\&\&\&'{}'{}'((()))***+++,{},{},-{}-{}-...///:::}
 
-\texttt{;;;<{}<{}<===>{}>{}>???@@@{[}{[}{[}\reflectbox{/}{}\reflectbox{/}{}\reflectbox{/}{}{]}{]}{]}\textasciicircum{}\textasciicircum{}\textasciicircum{}\underline{ }\underline{ }\underline{ }`{}`{}`\{\{\{|||\}\}\}\textasciitilde{}\textasciitilde{}\textasciitilde{}}
+\texttt{;;;<{}<{}<===>{}>{}>???@@@{[}{[}{[}\reflectbox{/}\reflectbox{/}\reflectbox{/}{]}{]}{]}\textasciicircum{}\textasciicircum{}\textasciicircum{}\underline{~}\underline{~}\underline{~}`{}`{}`\{\{\{|||\}\}\}\textasciitilde{}\textasciitilde{}\textasciitilde{}}
 
 \texttt{xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx}
 
@@ -1822,13 +1822,13 @@
  \\
 \hline
 
-{--}
+\textendash{}
  & 
 en-dash
  \\
 \hline
 
-{---}
+\textemdash{}
  & 
 em-dash
  \\
@@ -1846,25 +1846,25 @@
  \\
 \hline
 
-,
+\quotesinglbase{}
  & 
 low single comma quotation mark
  \\
 \hline
 
-``
+\textquotedblleft{}
  & 
 double turned comma quotation mark
  \\
 \hline
 
-''
+\textquotedblright{}
  & 
 double comma quotation mark
  \\
 \hline
 
-,,
+\quotedblbase
  & 
 low double comma quotation mark
  \\
@@ -1933,7 +1933,7 @@
 The special chars verbatim:
 %
 \begin{quote}{\ttfamily \raggedright \noindent
-\#~\$~\%~\&~\textasciitilde{}~\underline{~}~\textasciicircum{}~\reflectbox{/}{}~\{~\}
+\#~\$~\%~\&~\textasciitilde{}~\underline{~}~\textasciicircum{}~\reflectbox{/}~\{~\}
 }
 \end{quote}
 





[Docutils-checkins] r6124 - in trunk/docutils: HISTORY.txt docutils/writers/html4css1/__init__.py

[Docutils-checkins] r6124 - in trunk/docutils: HISTORY.txt docutils/writers/html4css1/__init__.py docutils/writers/latex2e/__init__.py test/functional/expected/standalone_rst_latex.tex

[Docutils-checkins] r6124 - in trunk/docutils: HISTORY.txt docutils/writers/html4css1/init.py

[Docutils-checkins] r6124 - in trunk/docutils: HISTORY.txt docutils/writers/html4css1/init.py docutils/writers/latex2e/init.py test/functional/expected/standalone_rst_latex.tex