[Epydoc-commits] SF.net SVN: epydoc: [1440] trunk/epydoc/src/epydoc/markup/doctest.py
Brought to you by:
edloper
|
From: <ed...@us...> - 2007-02-07 06:37:26
|
Revision: 1440
http://svn.sourceforge.net/epydoc/?rev=1440&view=rev
Author: edloper
Date: 2007-02-06 22:37:21 -0800 (Tue, 06 Feb 2007)
Log Message:
-----------
- Improved/refactored version of doctest colorizer.
Modified Paths:
--------------
trunk/epydoc/src/epydoc/markup/doctest.py
Modified: trunk/epydoc/src/epydoc/markup/doctest.py
===================================================================
--- trunk/epydoc/src/epydoc/markup/doctest.py 2007-02-07 06:21:26 UTC (rev 1439)
+++ trunk/epydoc/src/epydoc/markup/doctest.py 2007-02-07 06:37:21 UTC (rev 1440)
@@ -19,176 +19,292 @@
import re
from epydoc.util import plaintext_to_html, plaintext_to_latex
+__all__ = ['doctest_to_html', 'doctest_to_latex',
+ 'DoctestColorizer', 'XMLDoctestColorizer',
+ 'HTMLDoctestColorizer', 'LaTeXDoctestColorizer']
+
def doctest_to_html(s):
"""
Perform syntax highlighting on the given doctest string, and
return the resulting HTML code. This code consists of a C{<pre>}
block with class=py-doctest. Syntax highlighting is performed
- using the following css classes: 'py-prompt', 'py-keyword',
- 'py-string', 'py-comment', and 'py-output'.
+ using the following css classes:
+
+ - C{py-prompt} -- the Python PS1 prompt (>>>)
+ - C{py-more} -- the Python PS2 prompt (...)
+ - C{py-keyword} -- a Python keyword (for, if, etc.)
+ - C{py-builtin} -- a Python builtin name (abs, dir, etc.)
+ - C{py-string} -- a string literal
+ - C{py-comment} -- a comment
+ - C{py-except} -- an exception traceback (up to the next >>>)
+ - C{py-output} -- the output from a doctest block.
+ - C{py-defname} -- the name of a function or class defined by
+ a C{def} or C{class} statement.
"""
- return ('<pre class="py-doctest">\n%s\n</pre>\n' %
- colorize_doctest(s, _tag_span_html).strip())
+ return HTMLDoctestColorizer().colorize_doctest(s)
def doctest_to_latex(s):
"""
Perform syntax highlighting on the given doctest string, and
return the resulting LaTeX code. This code consists of an
- C{alltt} environment. Syntax highlighting is performed using five
- new latex commands, which must be defined externally:
- '\pysrcprompt', '\pysrckeyword', '\pysrcstring', '\pysrccomment',
- and '\pysrcoutput'.
+ C{alltt} environment. Syntax highlighting is performed using
+ the following new latex commands, which must be defined externally:
+ - C{\pysrcprompt} -- the Python PS1 prompt (>>>)
+ - C{\pysrcmore} -- the Python PS2 prompt (...)
+ - C{\pysrckeyword} -- a Python keyword (for, if, etc.)
+ - C{\pysrcbuiltin} -- a Python builtin name (abs, dir, etc.)
+ - C{\pysrcstring} -- a string literal
+ - C{\pysrccomment} -- a comment
+ - C{\pysrcexcept} -- an exception traceback (up to the next >>>)
+ - C{\pysrcoutput} -- the output from a doctest block.
+ - C{\pysrcdefname} -- the name of a function or class defined by
+ a C{def} or C{class} statement.
"""
- return ('\\begin{alltt}\n%s\n\\end{alltt}\n' %
- colorize_doctest(s, _tag_span_latex).strip())
+ return LaTeXDoctestColorizer().colorize_doctest(s)
-def _tag_span_html(s, tag):
- return '<span class="py-%s">%s</span>' % (tag, plaintext_to_html(s))
+class DoctestColorizer:
+ """
+ An abstract base class for performing syntax highlighting on
+ doctest blocks and other bits of Python code. Subclasses should
+ provide definitions for:
-def _tag_span_latex(s, tag):
- return '\\pysrc%s{%s}' % (tag, plaintext_to_latex(s))
+ - The L{markup()} method, which takes a substring and a tag, and
+ returns a colorized version of the substring.
+ - The L{PREFIX} and L{SUFFIX} variables, which will be added
+ to the beginning and end of the strings returned by
+ L{colorize_codeblock} and L{colorize_doctest}.
+ """
-# Regular expressions for colorize_doctestblock
-# set of keywords as listed in the Python Language Reference 2.4.1
-# added 'as' as well since IDLE already colorizes it as a keyword.
-# The documentation states that 'None' will become a keyword
-# eventually, but IDLE currently handles that as a builtin.
-_KEYWORDS = """
-and del for is raise
-assert elif from lambda return
-break else global not try
-class except if or while
-continue exec import pass yield
-def finally in print
-as
-""".split()
-_KEYWORD = '|'.join([r'\b%s\b' % _KW for _KW in _KEYWORDS])
+ #: A string that is added to the beginning of the strings
+ #: returned by L{colorize_codeblock} and L{colorize_doctest}.
+ #: Typically, this string begins a preformatted area.
+ PREFIX = None
-_BUILTINS = [_BI for _BI in dir(__builtins__) if not _BI.startswith('__')]
-_BUILTIN = '|'.join([r'\b%s\b' % _BI for _BI in _BUILTINS])
+ #: A string that is added to the end of the strings
+ #: returned by L{colorize_codeblock} and L{colorize_doctest}.
+ #: Typically, this string ends a preformatted area.
+ SUFFIX = None
-_STRING = '|'.join([r'("""("""|.*?((?!").)"""))', r'("("|.*?((?!").)"))',
- r"('''('''|.*?[^\\']'''))", r"('('|.*?[^\\']'))"])
-_COMMENT = '(#.*?$)'
-_PROMPT1 = r'^\s*>>>(?:\s|$)'
-_PROMPT2 = r'^\s*\.\.\.(?:\s|$)'
+ #: A list of the names of all Python keywords. ('as' is included
+ #: even though it is technically not a keyword.)
+ _KEYWORDS = ("and del for is raise"
+ "assert elif from lambda return"
+ "break else global not try"
+ "class except if or while"
+ "continue exec import pass yield"
+ "def finally in print as").split()
-PROMPT_RE = re.compile('(%s|%s)' % (_PROMPT1, _PROMPT2),
- re.MULTILINE | re.DOTALL)
-PROMPT2_RE = re.compile('(%s)' % _PROMPT2, re.MULTILINE | re.DOTALL)
-'''The regular expression used to find Python prompts (">>>" and
-"...") in doctest blocks.'''
+ #: A list of all Python builtins.
+ _BUILTINS = [_BI for _BI in dir(__builtins__)
+ if not _BI.startswith('__')]
-EXCEPT_RE = re.compile(r'(.*)(^Traceback \(most recent call last\):.*)',
- re.DOTALL | re.MULTILINE)
+ #: A regexp group that matches keywords.
+ _KEYWORD_GRP = '|'.join([r'\b%s\b' % _KW for _KW in _KEYWORDS])
-DOCTEST_DIRECTIVE_RE = re.compile(r'#\s*doctest:.*')
+ #: A regexp group that matches Python builtins.
+ _BUILTIN_GRP = (r'(?<!\.)(?:%s)' % '|'.join([r'\b%s\b' % _BI
+ for _BI in _BUILTINS]))
-DOCTEST_RE = re.compile(r"""(?P<STRING>%s)|(?P<COMMENT>%s)|"""
- r"""(?P<KEYWORD>(%s))|(?P<BUILTIN>(%s))|"""
- r"""(?P<PROMPT1>%s)|(?P<PROMPT2>%s)|.+?""" %
- (_STRING, _COMMENT, _KEYWORD, _BUILTIN, _PROMPT1, _PROMPT2),
- re.MULTILINE | re.DOTALL)
-'''The regular expression used by L{_doctest_sub} to colorize doctest
-blocks.'''
+ #: A regexp group that matches Python strings.
+ _STRING_GRP = '|'.join(
+ [r'("""("""|.*?((?!").)"""))', r'("("|.*?((?!").)"))',
+ r"('''('''|.*?[^\\']'''))", r"('('|.*?[^\\']'))"])
-def colorize_doctest(s, markup_func, inline=False, strip_directives=False):
- """
- Colorize the given doctest string C{s} using C{markup_func()}.
- C{markup_func()} should be a function that takes a substring and a
- tag, and returns a colorized version of the substring. E.g.:
+ #: A regexp group that matches Python comments.
+ _COMMENT_GRP = '(#.*?$)'
- >>> def html_markup_func(s, tag):
- ... return '<span class="%s">%s</span>' % (tag, s)
+ #: A regexp group that matches Python ">>>" prompts.
+ _PROMPT1_GRP = r'^[ \t]*>>>(?:[ \t]|$)'
+
+ #: A regexp group that matches Python "..." prompts.
+ _PROMPT2_GRP = r'^[ \t]*\.\.\.(?:[ \t]|$)'
- The tags that will be passed to the markup function are:
- - C{prompt} -- the Python PS1 prompt (>>>)
- - C{more} -- the Python PS2 prompt (...)
- - C{keyword} -- a Python keyword (for, if, etc.)
- - C{builtin} -- a Python builtin name (abs, dir, etc.)
- - C{string} -- a string literal
- - C{comment} -- a comment
- - C{except} -- an exception traceback (up to the next >>>)
- - C{output} -- the output from a doctest block.
- - C{other} -- anything else (does *not* include output.)
- """
- pysrc = [] # the source code part of a docstest block (lines)
- pyout = [] # the output part of a doctest block (lines)
- result = []
- out = result.append
+ #: A regexp group that matches function and class definitions.
+ _DEFINE_GRP = r'\b(?:def|class)[ \t]+\w+'
- if strip_directives:
- s = DOCTEST_DIRECTIVE_RE.sub('', s)
+ #: A regexp that matches Python prompts
+ PROMPT_RE = re.compile('(%s|%s)' % (_PROMPT1_GRP, _PROMPT2_GRP),
+ re.MULTILINE | re.DOTALL)
- def subfunc(match):
+ #: A regexp that matches Python "..." prompts.
+ PROMPT2_RE = re.compile('(%s)' % _PROMPT2_GRP,
+ re.MULTILINE | re.DOTALL)
+
+ #: A regexp that matches doctest exception blocks.
+ EXCEPT_RE = re.compile(r'^[ \t]*Traceback \(most recent call last\):.*',
+ re.DOTALL | re.MULTILINE)
+
+ #: A regexp that matches doctest directives.
+ DOCTEST_DIRECTIVE_RE = re.compile(r'#[ \t]*doctest:.*')
+
+ #: A regexp that matches all of the regions of a doctest block
+ #: that should be colored.
+ DOCTEST_RE = re.compile(
+ r'(.*?)((?P<STRING>%s)|(?P<COMMENT>%s)|(?P<DEFINE>%s)|'
+ r'(?P<KEYWORD>%s)|(?P<BUILTIN>%s)|'
+ r'(?P<PROMPT1>%s)|(?P<PROMPT2>%s)|(?P<EOS>\Z))' % (
+ _STRING_GRP, _COMMENT_GRP, _DEFINE_GRP, _KEYWORD_GRP, _BUILTIN_GRP,
+ _PROMPT1_GRP, _PROMPT2_GRP), re.MULTILINE | re.DOTALL)
+
+ #: This regular expression is used to find doctest examples in a
+ #: string. This is copied from the standard Python doctest.py
+ #: module (after the refactoring in Python 2.4+).
+ DOCTEST_EXAMPLE_RE = re.compile(r'''
+ # Source consists of a PS1 line followed by zero or more PS2 lines.
+ (?P<source>
+ (?:^(?P<indent> [ ]*) >>> .*) # PS1 line
+ (?:\n [ ]* \.\.\. .*)* # PS2 lines
+ \n?)
+ # Want consists of any non-blank lines that do not start with PS1.
+ (?P<want> (?:(?![ ]*$) # Not a blank line
+ (?![ ]*>>>) # Not a line starting with PS1
+ .*$\n? # But any other line
+ )*)
+ ''', re.MULTILINE | re.VERBOSE)
+
+ def colorize_inline(self, s):
+ """
+ Colorize a string containing Python code. Do not add the
+ L{PREFIX} and L{SUFFIX} strings to the returned value. This
+ method is intended for generating syntax-highlighted strings
+ that are appropriate for inclusion as inline expressions.
+ """
+ return self.DOCTEST_RE.sub(self.subfunc, s)
+
+ def colorize_codeblock(self, s):
+ """
+ Colorize a string containing only Python code. This method
+ differs from L{colorize_doctest} in that it will not search
+ for doctest prompts when deciding how to colorize the string.
+ """
+ body = self.DOCTEST_RE.sub(self.subfunc, s)
+ return self.PREFIX + body + self.SUFFIX
+
+ def colorize_doctest(self, s, strip_directives=False):
+ """
+ Colorize a string containing one or more doctest examples.
+ """
+ output = []
+ charno = 0
+ for m in self.DOCTEST_EXAMPLE_RE.finditer(s):
+ # Parse the doctest example:
+ pysrc, want = m.group('source', 'want')
+ # Pre-example text:
+ output.append(s[charno:m.start()])
+ # Example source code:
+ output.append(self.DOCTEST_RE.sub(self.subfunc, pysrc))
+ # Example output:
+ if want:
+ if self.EXCEPT_RE.match(want):
+ output += [self.markup(line, 'except')+'\n'
+ for line in want.split('\n')]
+ else:
+ output += [self.markup(line, 'output')+'\n'
+ for line in want.split('\n')]
+ # Update charno
+ charno = m.end()
+ # Add any remaining post-example text.
+ output.append(s[charno:])
+
+ return self.PREFIX + ''.join(output) + self.SUFFIX
+
+ def subfunc(self, match):
+ other, text = match.group(1, 2)
+ #print 'M %20r %20r' % (other, text) # <- for debugging
+ if other:
+ other = '\n'.join([self.markup(line, 'other')
+ for line in other.split('\n')])
+
if match.group('PROMPT1'):
- return markup_func(match.group(), 'prompt')
- if match.group('PROMPT2'):
- return markup_func(match.group(), 'more')
- if match.group('KEYWORD'):
- return markup_func(match.group(), 'keyword')
- if match.group('BUILTIN'):
- return markup_func(match.group(), 'builtin')
- if match.group('COMMENT'):
- return markup_func(match.group(), 'comment')
- if match.group('STRING') and '\n' not in match.group():
- return markup_func(match.group(), 'string')
+ return other + self.markup(text, 'prompt')
+ elif match.group('PROMPT2'):
+ return other + self.markup(text, 'more')
+ elif match.group('KEYWORD'):
+ return other + self.markup(text, 'keyword')
+ elif match.group('BUILTIN'):
+ return other + self.markup(text, 'builtin')
+ elif match.group('COMMENT'):
+ return other + self.markup(text, 'comment')
+ elif match.group('STRING') and '\n' not in text:
+ return other + self.markup(text, 'string')
elif match.group('STRING'):
# It's a multiline string; colorize the string & prompt
# portion of each line.
- pieces = [markup_func(s, ['string','more'][i%2])
- for i, s in enumerate(PROMPT2_RE.split(match.group()))]
- return ''.join(pieces)
+ pieces = []
+ for line in text.split('\n'):
+ if self.PROMPT2_RE.match(line):
+ if len(line) > 4:
+ pieces.append(self.markup(line[:4], 'more') +
+ self.markup(line[4:], 'string'))
+ else:
+ pieces.append(self.markup(line[:4], 'more'))
+ elif line:
+ pieces.append(self.markup(line, 'string'))
+ else:
+ pieces.append('')
+ return other + '\n'.join(pieces)
+ elif match.group('DEFINE'):
+ m = re.match('(?P<def>\w+)(?P<space>\s+)(?P<name>\w+)', text)
+ return other + (self.markup(m.group('def'), 'keyword') +
+ self.markup(m.group('space'), 'other') +
+ self.markup(m.group('name'), 'defname'))
+ elif match.group('EOS') is not None:
+ return other
else:
- return markup_func(match.group(), 'other')
+ assert 0, 'Unexpected match!'
- if inline:
- pysrc = DOCTEST_RE.sub(subfunc, s)
- return pysrc.strip()
+ def markup(self, s, tag):
+ """
+ Apply syntax highlighting to a single substring from a doctest
+ block. C{s} is the substring, and C{tag} is the tag that
+ should be applied to the substring. C{tag} will be one of the
+ following strings:
+
+ - C{prompt} -- the Python PS1 prompt (>>>)
+ - C{more} -- the Python PS2 prompt (...)
+ - C{keyword} -- a Python keyword (for, if, etc.)
+ - C{builtin} -- a Python builtin name (abs, dir, etc.)
+ - C{string} -- a string literal
+ - C{comment} -- a comment
+ - C{except} -- an exception traceback (up to the next >>>)
+ - C{output} -- the output from a doctest block.
+ - C{defname} -- the name of a function or class defined by
+ a C{def} or C{class} statement.
+ - C{other} -- anything else (does *not* include output.)
+ """
+ raise AssertionError("Abstract method")
- # need to add a third state here for correctly formatting exceptions
+class XMLDoctestColorizer(DoctestColorizer):
+ """
+ A subclass of DoctestColorizer that generates XML-like output.
+ This class is mainly intended to be used for testing purposes.
+ """
+ PREFIX = '<colorized>\n'
+ SUFFIX = '</colorized>\n'
+ def markup(self, s, tag):
+ s = s.replace('&', '&').replace('<', '<').replace('>', '>')
+ if tag == 'other': return s
+ else: return '<%s>%s</%s>' % (tag, s, tag)
- for line in s.split('\n')+['\n']:
- if PROMPT_RE.match(line):
- pysrc.append(line)
- if pyout:
- pyout = '\n'.join(pyout).strip()
- m = EXCEPT_RE.match(pyout)
- if m:
- pyout, pyexc = m.group(1).strip(), m.group(2).strip()
- if pyout:
- print ('Warning: doctest does not allow for mixed '
- 'output and exceptions!')
- result.append(markup_func(pyout, 'output'))
- result.append(markup_func(pyexc, 'except'))
- else:
- result.append(markup_func(pyout, 'output'))
- pyout = []
+class HTMLDoctestColorizer(DoctestColorizer):
+ """A subclass of DoctestColorizer that generates HTML output."""
+ PREFIX = '<pre class="py-doctest">\n'
+ SUFFIX = '</pre>\n'
+ def markup(self, s, tag):
+ if tag == 'other':
+ return plaintext_to_html(s)
else:
- pyout.append(line)
- if pysrc:
- pysrc = DOCTEST_RE.sub(subfunc, '\n'.join(pysrc))
- result.append(pysrc.strip())
- #result.append(markup_func(pysrc.strip(), 'python'))
- pysrc = []
+ return ('<span class="py-%s">%s</span>' %
+ (tag, plaintext_to_html(s)))
- remainder = '\n'.join(pyout).strip()
- if remainder:
- result.append(markup_func(remainder, 'output'))
- result = '\n'.join(result)
+class LaTeXDoctestColorizer(DoctestColorizer):
+ """A subclass of DoctestColorizer that generates LaTeX output."""
+ PREFIX = '\\begin{alltt}\n'
+ SUFFIX = '\\end{alltt}\n'
+ def markup(self, s, tag):
+ if tag == 'other':
+ return plaintext_to_latex(s)
+ else:
+ return '\\pysrc%s{%s}' % (tag, plaintext_to_latex(s))
- # Merge adjacent spans w/ the same class. I.e, convert:
- # <span class="x">foo</span><span class="x">foo</span>
- # to:
- # <span class="x">foofoo</span>
- prev_span_class = [None]
- def subfunc(match):
- if match.group(2) == prev_span_class[0]:
- prev_span_class[0] = match.group(2)
- return match.group(1) or ''
- else:
- prev_span_class[0] = match.group(2)
- return match.group()
- result = re.sub(r'</span>(\n?)<span class="([^"]+)">', subfunc, result)
- return result
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|