[Epydoc-commits] SF.net SVN: epydoc: [1321] trunk/epydoc/src/epydoc/markup/doctest.py

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Revision: 1321
Author:   edloper
Date:     2006-08-24 11:45:12 -0700 (Thu, 24 Aug 2006)
ViewCVS:  http://svn.sourceforge.net/epydoc/?rev=1321&view=rev

Log Message:
-----------
- Improvements to doctest block colorization.

Modified Paths:
--------------
    trunk/epydoc/src/epydoc/markup/doctest.py
Modified: trunk/epydoc/src/epydoc/markup/doctest.py
===================================================================

--- trunk/epydoc/src/epydoc/markup/doctest.py	2006-08-24 18:09:56 UTC (rev 1320)
+++ trunk/epydoc/src/epydoc/markup/doctest.py	2006-08-24 18:45:12 UTC (rev 1321)
@@ -49,48 +49,50 @@
     return '\\pysrc%s{%s}' % (tag, plaintext_to_latex(s))
 
 # Regular expressions for colorize_doctestblock
-_KEYWORDS = ["del", "from", "lambda", "return", "and", "or", "is", 
-             "global", "not", "try", "break", "else", "if", "elif", 
-             "while", "class", "except", "import", "pass", "raise",
-             "continue", "finally", "in", "print", "def", "for"]
-_KEYWORD = '|'.join([r'(\b%s\b)' % _KW for _KW in _KEYWORDS])
-_STRING = (r"""
-[uU]?[rR]?
-  (?:              # Single-quote (') strings
-  '''(?:                 # Tripple-quoted can contain...
-      [^']               | # a non-quote
-      \\'                | # a backslashed quote
-      '{1,2}(?!')          # one or two quotes
-    )*''' |
-  '(?:                   # Non-tripple quoted can contain...
-     [^']                | # a non-quote
-     \\'                   # a backslashded quote
-   )*'(?!') | """+
-r'''               # Double-quote (") strings
-  """(?:                 # Tripple-quoted can contain...
-      [^"]               | # a non-quote
-      \\"                | # a backslashed single
-      "{1,2}(?!")          # one or two quotes
-    )*""" |
-  "(?:                   # Non-tripple quoted can contain...
-     [^"]                | # a non-quote
-     \\"                   # a backslashded quote
-   )*"(?!")
-)''')
+# set of keywords as listed in the Python Language Reference 2.4.1
+# added 'as' as well since IDLE already colorizes it as a keyword.
+# The documentation states that 'None' will become a keyword
+# eventually, but IDLE currently handles that as a builtin.
+_KEYWORDS = """
+and       del       for       is        raise    
+assert    elif      from      lambda    return   
+break     else      global    not       try      
+class     except    if        or        while    
+continue  exec      import    pass      yield    
+def       finally   in        print
+as
+""".split()
+_KEYWORD = '|'.join([r'\b%s\b' % _KW for _KW in _KEYWORDS])
+
+_BUILTINS = [_BI for _BI in dir(__builtins__) if not _BI.startswith('__')]
+_BUILTIN = '|'.join([r'\b%s\b' % _BI for _BI in _BUILTINS])
+
+_STRING = '|'.join([r'("""("""|.*?((?!").)"""))', r'("("|.*?((?!").)"))',
+                    r"('''('''|.*?[^\\']'''))", r"('('|.*?[^\\']'))"])
 _COMMENT = '(#.*?$)'
-_PROMPT = r'^\s*(?:>>>|\.\.\.)(?:\s|$)'
+_PROMPT1 = r'^\s*>>>(?:\s|$)'
+_PROMPT2 = r'^\s*\.\.\.(?:\s|$)'
 
-PROMPT_RE = re.compile('(%s)' % _PROMPT, re.MULTILINE | re.DOTALL)
+PROMPT_RE = re.compile('(%s|%s)' % (_PROMPT1, _PROMPT2),
+		       re.MULTILINE | re.DOTALL)
+PROMPT2_RE = re.compile('(%s)' % _PROMPT2, re.MULTILINE | re.DOTALL)
 '''The regular expression used to find Python prompts (">>>" and
 "...") in doctest blocks.'''
 
-DOCTEST_RE = re.compile(
-    '(?P<STRING>%s)|(?P<COMMENT>%s)|(?P<KEYWORD>%s)|(?P<PROMPT>%s)|.+?' %
-    (_STRING, _COMMENT, _KEYWORD, _PROMPT), re.MULTILINE | re.DOTALL)
+EXCEPT_RE = re.compile(r'(.*)(^Traceback \(most recent call last\):.*)',
+                       re.DOTALL | re.MULTILINE)
+
+DOCTEST_DIRECTIVE_RE = re.compile(r'#\s*doctest:.*')
+
+DOCTEST_RE = re.compile(r"""(?P<STRING>%s)|(?P<COMMENT>%s)|"""
+                        r"""(?P<KEYWORD>(%s))|(?P<BUILTIN>(%s))|"""
+                        r"""(?P<PROMPT1>%s)|(?P<PROMPT2>%s)|.+?""" %
+  (_STRING, _COMMENT, _KEYWORD, _BUILTIN, _PROMPT1, _PROMPT2),
+  re.MULTILINE | re.DOTALL)
 '''The regular expression used by L{_doctest_sub} to colorize doctest
 blocks.'''
 
-def colorize_doctest(s, markup_func):
+def colorize_doctest(s, markup_func, inline=False, strip_directives=False):
     """
     Colorize the given doctest string C{s} using C{markup_func()}.
     C{markup_func()} should be a function that takes a substring and a
@@ -100,10 +102,13 @@
         ...     return '<span class="%s">%s</span>' % (tag, s)
 
     The tags that will be passed to the markup function are: 
-        - C{prompt} -- a Python prompt (>>> or ...)
+        - C{prompt} -- the Python PS1 prompt (>>>)
+	- C{more} -- the Python PS2 prompt (...)
         - C{keyword} -- a Python keyword (for, if, etc.)
+        - C{builtin} -- a Python builtin name (abs, dir, etc.)
         - C{string} -- a string literal
         - C{comment} -- a comment
+	- C{except} -- an exception traceback (up to the next >>>)
         - C{output} -- the output from a doctest block.
         - C{other} -- anything else (does *not* include output.)
     """
@@ -112,11 +117,18 @@
     result = []
     out = result.append
 
+    if strip_directives:
+        s = DOCTEST_DIRECTIVE_RE.sub('', s)
+
     def subfunc(match):
-        if match.group('PROMPT'):
+        if match.group('PROMPT1'):
             return markup_func(match.group(), 'prompt')
+	if match.group('PROMPT2'):
+	    return markup_func(match.group(), 'more')
         if match.group('KEYWORD'):
             return markup_func(match.group(), 'keyword')
+        if match.group('BUILTIN'):
+            return markup_func(match.group(), 'builtin')
         if match.group('COMMENT'):
             return markup_func(match.group(), 'comment')
         if match.group('STRING') and '\n' not in match.group():
@@ -124,17 +136,33 @@
         elif match.group('STRING'):
             # It's a multiline string; colorize the string & prompt
             # portion of each line.
-            pieces = [markup_func(s, ['string','prompt'][i%2])
-                      for i, s in enumerate(PROMPT_RE.split(match.group()))]
+            pieces = [markup_func(s, ['string','more'][i%2])
+                      for i, s in enumerate(PROMPT2_RE.split(match.group()))]
             return ''.join(pieces)
         else:
             return markup_func(match.group(), 'other')
-    
+
+    if inline:
+	pysrc = DOCTEST_RE.sub(subfunc, s)
+	return pysrc.strip()
+
+    # need to add a third state here for correctly formatting exceptions
+
     for line in s.split('\n')+['\n']:
         if PROMPT_RE.match(line):
             pysrc.append(line)
             if pyout:
-                result.append(markup_func('\n'.join(pyout).strip(), 'output'))
+                pyout = '\n'.join(pyout).strip()
+                m = EXCEPT_RE.match(pyout)
+                if m:
+                    pyout, pyexc = m.group(1).strip(), m.group(2).strip()
+                    if pyout:
+                        print ('Warning: doctest does not allow for mixed '
+                               'output and exceptions!')
+                        result.append(markup_func(pyout, 'output'))
+                    result.append(markup_func(pyexc, 'except'))
+                else:
+                    result.append(markup_func(pyout, 'output'))
                 pyout = []
         else:
             pyout.append(line)
@@ -147,5 +175,20 @@
     remainder = '\n'.join(pyout).strip()
     if remainder:
         result.append(markup_func(remainder, 'output'))
+    result = '\n'.join(result)
+
+    # Merge adjacent spans w/ the same class.  I.e, convert:
+    #   <span class="x">foo</span><span class="x">foo</span>
+    # to:
+    #   <span class="x">foofoo</span>
+    prev_span_class = [None]
+    def subfunc(match):
+        if match.group(2) == prev_span_class[0]:
+            prev_span_class[0] = match.group(2)
+            return match.group(1) or ''
+        else:
+            prev_span_class[0] = match.group(2)
+            return match.group()
+    result = re.sub(r'</span>(\n?)<span class="([^"]+)">', subfunc, result)
         
-    return '\n'.join(result)
+    return result


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.