[Epydoc-commits] SF.net SVN: epydoc: [1440] trunk/epydoc/src/epydoc/markup/doctest.py
Brought to you by:
edloper
From: <ed...@us...> - 2007-02-07 06:37:26
|
Revision: 1440 http://svn.sourceforge.net/epydoc/?rev=1440&view=rev Author: edloper Date: 2007-02-06 22:37:21 -0800 (Tue, 06 Feb 2007) Log Message: ----------- - Improved/refactored version of doctest colorizer. Modified Paths: -------------- trunk/epydoc/src/epydoc/markup/doctest.py Modified: trunk/epydoc/src/epydoc/markup/doctest.py =================================================================== --- trunk/epydoc/src/epydoc/markup/doctest.py 2007-02-07 06:21:26 UTC (rev 1439) +++ trunk/epydoc/src/epydoc/markup/doctest.py 2007-02-07 06:37:21 UTC (rev 1440) @@ -19,176 +19,292 @@ import re from epydoc.util import plaintext_to_html, plaintext_to_latex +__all__ = ['doctest_to_html', 'doctest_to_latex', + 'DoctestColorizer', 'XMLDoctestColorizer', + 'HTMLDoctestColorizer', 'LaTeXDoctestColorizer'] + def doctest_to_html(s): """ Perform syntax highlighting on the given doctest string, and return the resulting HTML code. This code consists of a C{<pre>} block with class=py-doctest. Syntax highlighting is performed - using the following css classes: 'py-prompt', 'py-keyword', - 'py-string', 'py-comment', and 'py-output'. + using the following css classes: + + - C{py-prompt} -- the Python PS1 prompt (>>>) + - C{py-more} -- the Python PS2 prompt (...) + - C{py-keyword} -- a Python keyword (for, if, etc.) + - C{py-builtin} -- a Python builtin name (abs, dir, etc.) + - C{py-string} -- a string literal + - C{py-comment} -- a comment + - C{py-except} -- an exception traceback (up to the next >>>) + - C{py-output} -- the output from a doctest block. + - C{py-defname} -- the name of a function or class defined by + a C{def} or C{class} statement. """ - return ('<pre class="py-doctest">\n%s\n</pre>\n' % - colorize_doctest(s, _tag_span_html).strip()) + return HTMLDoctestColorizer().colorize_doctest(s) def doctest_to_latex(s): """ Perform syntax highlighting on the given doctest string, and return the resulting LaTeX code. This code consists of an - C{alltt} environment. Syntax highlighting is performed using five - new latex commands, which must be defined externally: - '\pysrcprompt', '\pysrckeyword', '\pysrcstring', '\pysrccomment', - and '\pysrcoutput'. + C{alltt} environment. Syntax highlighting is performed using + the following new latex commands, which must be defined externally: + - C{\pysrcprompt} -- the Python PS1 prompt (>>>) + - C{\pysrcmore} -- the Python PS2 prompt (...) + - C{\pysrckeyword} -- a Python keyword (for, if, etc.) + - C{\pysrcbuiltin} -- a Python builtin name (abs, dir, etc.) + - C{\pysrcstring} -- a string literal + - C{\pysrccomment} -- a comment + - C{\pysrcexcept} -- an exception traceback (up to the next >>>) + - C{\pysrcoutput} -- the output from a doctest block. + - C{\pysrcdefname} -- the name of a function or class defined by + a C{def} or C{class} statement. """ - return ('\\begin{alltt}\n%s\n\\end{alltt}\n' % - colorize_doctest(s, _tag_span_latex).strip()) + return LaTeXDoctestColorizer().colorize_doctest(s) -def _tag_span_html(s, tag): - return '<span class="py-%s">%s</span>' % (tag, plaintext_to_html(s)) +class DoctestColorizer: + """ + An abstract base class for performing syntax highlighting on + doctest blocks and other bits of Python code. Subclasses should + provide definitions for: -def _tag_span_latex(s, tag): - return '\\pysrc%s{%s}' % (tag, plaintext_to_latex(s)) + - The L{markup()} method, which takes a substring and a tag, and + returns a colorized version of the substring. + - The L{PREFIX} and L{SUFFIX} variables, which will be added + to the beginning and end of the strings returned by + L{colorize_codeblock} and L{colorize_doctest}. + """ -# Regular expressions for colorize_doctestblock -# set of keywords as listed in the Python Language Reference 2.4.1 -# added 'as' as well since IDLE already colorizes it as a keyword. -# The documentation states that 'None' will become a keyword -# eventually, but IDLE currently handles that as a builtin. -_KEYWORDS = """ -and del for is raise -assert elif from lambda return -break else global not try -class except if or while -continue exec import pass yield -def finally in print -as -""".split() -_KEYWORD = '|'.join([r'\b%s\b' % _KW for _KW in _KEYWORDS]) + #: A string that is added to the beginning of the strings + #: returned by L{colorize_codeblock} and L{colorize_doctest}. + #: Typically, this string begins a preformatted area. + PREFIX = None -_BUILTINS = [_BI for _BI in dir(__builtins__) if not _BI.startswith('__')] -_BUILTIN = '|'.join([r'\b%s\b' % _BI for _BI in _BUILTINS]) + #: A string that is added to the end of the strings + #: returned by L{colorize_codeblock} and L{colorize_doctest}. + #: Typically, this string ends a preformatted area. + SUFFIX = None -_STRING = '|'.join([r'("""("""|.*?((?!").)"""))', r'("("|.*?((?!").)"))', - r"('''('''|.*?[^\\']'''))", r"('('|.*?[^\\']'))"]) -_COMMENT = '(#.*?$)' -_PROMPT1 = r'^\s*>>>(?:\s|$)' -_PROMPT2 = r'^\s*\.\.\.(?:\s|$)' + #: A list of the names of all Python keywords. ('as' is included + #: even though it is technically not a keyword.) + _KEYWORDS = ("and del for is raise" + "assert elif from lambda return" + "break else global not try" + "class except if or while" + "continue exec import pass yield" + "def finally in print as").split() -PROMPT_RE = re.compile('(%s|%s)' % (_PROMPT1, _PROMPT2), - re.MULTILINE | re.DOTALL) -PROMPT2_RE = re.compile('(%s)' % _PROMPT2, re.MULTILINE | re.DOTALL) -'''The regular expression used to find Python prompts (">>>" and -"...") in doctest blocks.''' + #: A list of all Python builtins. + _BUILTINS = [_BI for _BI in dir(__builtins__) + if not _BI.startswith('__')] -EXCEPT_RE = re.compile(r'(.*)(^Traceback \(most recent call last\):.*)', - re.DOTALL | re.MULTILINE) + #: A regexp group that matches keywords. + _KEYWORD_GRP = '|'.join([r'\b%s\b' % _KW for _KW in _KEYWORDS]) -DOCTEST_DIRECTIVE_RE = re.compile(r'#\s*doctest:.*') + #: A regexp group that matches Python builtins. + _BUILTIN_GRP = (r'(?<!\.)(?:%s)' % '|'.join([r'\b%s\b' % _BI + for _BI in _BUILTINS])) -DOCTEST_RE = re.compile(r"""(?P<STRING>%s)|(?P<COMMENT>%s)|""" - r"""(?P<KEYWORD>(%s))|(?P<BUILTIN>(%s))|""" - r"""(?P<PROMPT1>%s)|(?P<PROMPT2>%s)|.+?""" % - (_STRING, _COMMENT, _KEYWORD, _BUILTIN, _PROMPT1, _PROMPT2), - re.MULTILINE | re.DOTALL) -'''The regular expression used by L{_doctest_sub} to colorize doctest -blocks.''' + #: A regexp group that matches Python strings. + _STRING_GRP = '|'.join( + [r'("""("""|.*?((?!").)"""))', r'("("|.*?((?!").)"))', + r"('''('''|.*?[^\\']'''))", r"('('|.*?[^\\']'))"]) -def colorize_doctest(s, markup_func, inline=False, strip_directives=False): - """ - Colorize the given doctest string C{s} using C{markup_func()}. - C{markup_func()} should be a function that takes a substring and a - tag, and returns a colorized version of the substring. E.g.: + #: A regexp group that matches Python comments. + _COMMENT_GRP = '(#.*?$)' - >>> def html_markup_func(s, tag): - ... return '<span class="%s">%s</span>' % (tag, s) + #: A regexp group that matches Python ">>>" prompts. + _PROMPT1_GRP = r'^[ \t]*>>>(?:[ \t]|$)' + + #: A regexp group that matches Python "..." prompts. + _PROMPT2_GRP = r'^[ \t]*\.\.\.(?:[ \t]|$)' - The tags that will be passed to the markup function are: - - C{prompt} -- the Python PS1 prompt (>>>) - - C{more} -- the Python PS2 prompt (...) - - C{keyword} -- a Python keyword (for, if, etc.) - - C{builtin} -- a Python builtin name (abs, dir, etc.) - - C{string} -- a string literal - - C{comment} -- a comment - - C{except} -- an exception traceback (up to the next >>>) - - C{output} -- the output from a doctest block. - - C{other} -- anything else (does *not* include output.) - """ - pysrc = [] # the source code part of a docstest block (lines) - pyout = [] # the output part of a doctest block (lines) - result = [] - out = result.append + #: A regexp group that matches function and class definitions. + _DEFINE_GRP = r'\b(?:def|class)[ \t]+\w+' - if strip_directives: - s = DOCTEST_DIRECTIVE_RE.sub('', s) + #: A regexp that matches Python prompts + PROMPT_RE = re.compile('(%s|%s)' % (_PROMPT1_GRP, _PROMPT2_GRP), + re.MULTILINE | re.DOTALL) - def subfunc(match): + #: A regexp that matches Python "..." prompts. + PROMPT2_RE = re.compile('(%s)' % _PROMPT2_GRP, + re.MULTILINE | re.DOTALL) + + #: A regexp that matches doctest exception blocks. + EXCEPT_RE = re.compile(r'^[ \t]*Traceback \(most recent call last\):.*', + re.DOTALL | re.MULTILINE) + + #: A regexp that matches doctest directives. + DOCTEST_DIRECTIVE_RE = re.compile(r'#[ \t]*doctest:.*') + + #: A regexp that matches all of the regions of a doctest block + #: that should be colored. + DOCTEST_RE = re.compile( + r'(.*?)((?P<STRING>%s)|(?P<COMMENT>%s)|(?P<DEFINE>%s)|' + r'(?P<KEYWORD>%s)|(?P<BUILTIN>%s)|' + r'(?P<PROMPT1>%s)|(?P<PROMPT2>%s)|(?P<EOS>\Z))' % ( + _STRING_GRP, _COMMENT_GRP, _DEFINE_GRP, _KEYWORD_GRP, _BUILTIN_GRP, + _PROMPT1_GRP, _PROMPT2_GRP), re.MULTILINE | re.DOTALL) + + #: This regular expression is used to find doctest examples in a + #: string. This is copied from the standard Python doctest.py + #: module (after the refactoring in Python 2.4+). + DOCTEST_EXAMPLE_RE = re.compile(r''' + # Source consists of a PS1 line followed by zero or more PS2 lines. + (?P<source> + (?:^(?P<indent> [ ]*) >>> .*) # PS1 line + (?:\n [ ]* \.\.\. .*)* # PS2 lines + \n?) + # Want consists of any non-blank lines that do not start with PS1. + (?P<want> (?:(?![ ]*$) # Not a blank line + (?![ ]*>>>) # Not a line starting with PS1 + .*$\n? # But any other line + )*) + ''', re.MULTILINE | re.VERBOSE) + + def colorize_inline(self, s): + """ + Colorize a string containing Python code. Do not add the + L{PREFIX} and L{SUFFIX} strings to the returned value. This + method is intended for generating syntax-highlighted strings + that are appropriate for inclusion as inline expressions. + """ + return self.DOCTEST_RE.sub(self.subfunc, s) + + def colorize_codeblock(self, s): + """ + Colorize a string containing only Python code. This method + differs from L{colorize_doctest} in that it will not search + for doctest prompts when deciding how to colorize the string. + """ + body = self.DOCTEST_RE.sub(self.subfunc, s) + return self.PREFIX + body + self.SUFFIX + + def colorize_doctest(self, s, strip_directives=False): + """ + Colorize a string containing one or more doctest examples. + """ + output = [] + charno = 0 + for m in self.DOCTEST_EXAMPLE_RE.finditer(s): + # Parse the doctest example: + pysrc, want = m.group('source', 'want') + # Pre-example text: + output.append(s[charno:m.start()]) + # Example source code: + output.append(self.DOCTEST_RE.sub(self.subfunc, pysrc)) + # Example output: + if want: + if self.EXCEPT_RE.match(want): + output += [self.markup(line, 'except')+'\n' + for line in want.split('\n')] + else: + output += [self.markup(line, 'output')+'\n' + for line in want.split('\n')] + # Update charno + charno = m.end() + # Add any remaining post-example text. + output.append(s[charno:]) + + return self.PREFIX + ''.join(output) + self.SUFFIX + + def subfunc(self, match): + other, text = match.group(1, 2) + #print 'M %20r %20r' % (other, text) # <- for debugging + if other: + other = '\n'.join([self.markup(line, 'other') + for line in other.split('\n')]) + if match.group('PROMPT1'): - return markup_func(match.group(), 'prompt') - if match.group('PROMPT2'): - return markup_func(match.group(), 'more') - if match.group('KEYWORD'): - return markup_func(match.group(), 'keyword') - if match.group('BUILTIN'): - return markup_func(match.group(), 'builtin') - if match.group('COMMENT'): - return markup_func(match.group(), 'comment') - if match.group('STRING') and '\n' not in match.group(): - return markup_func(match.group(), 'string') + return other + self.markup(text, 'prompt') + elif match.group('PROMPT2'): + return other + self.markup(text, 'more') + elif match.group('KEYWORD'): + return other + self.markup(text, 'keyword') + elif match.group('BUILTIN'): + return other + self.markup(text, 'builtin') + elif match.group('COMMENT'): + return other + self.markup(text, 'comment') + elif match.group('STRING') and '\n' not in text: + return other + self.markup(text, 'string') elif match.group('STRING'): # It's a multiline string; colorize the string & prompt # portion of each line. - pieces = [markup_func(s, ['string','more'][i%2]) - for i, s in enumerate(PROMPT2_RE.split(match.group()))] - return ''.join(pieces) + pieces = [] + for line in text.split('\n'): + if self.PROMPT2_RE.match(line): + if len(line) > 4: + pieces.append(self.markup(line[:4], 'more') + + self.markup(line[4:], 'string')) + else: + pieces.append(self.markup(line[:4], 'more')) + elif line: + pieces.append(self.markup(line, 'string')) + else: + pieces.append('') + return other + '\n'.join(pieces) + elif match.group('DEFINE'): + m = re.match('(?P<def>\w+)(?P<space>\s+)(?P<name>\w+)', text) + return other + (self.markup(m.group('def'), 'keyword') + + self.markup(m.group('space'), 'other') + + self.markup(m.group('name'), 'defname')) + elif match.group('EOS') is not None: + return other else: - return markup_func(match.group(), 'other') + assert 0, 'Unexpected match!' - if inline: - pysrc = DOCTEST_RE.sub(subfunc, s) - return pysrc.strip() + def markup(self, s, tag): + """ + Apply syntax highlighting to a single substring from a doctest + block. C{s} is the substring, and C{tag} is the tag that + should be applied to the substring. C{tag} will be one of the + following strings: + + - C{prompt} -- the Python PS1 prompt (>>>) + - C{more} -- the Python PS2 prompt (...) + - C{keyword} -- a Python keyword (for, if, etc.) + - C{builtin} -- a Python builtin name (abs, dir, etc.) + - C{string} -- a string literal + - C{comment} -- a comment + - C{except} -- an exception traceback (up to the next >>>) + - C{output} -- the output from a doctest block. + - C{defname} -- the name of a function or class defined by + a C{def} or C{class} statement. + - C{other} -- anything else (does *not* include output.) + """ + raise AssertionError("Abstract method") - # need to add a third state here for correctly formatting exceptions +class XMLDoctestColorizer(DoctestColorizer): + """ + A subclass of DoctestColorizer that generates XML-like output. + This class is mainly intended to be used for testing purposes. + """ + PREFIX = '<colorized>\n' + SUFFIX = '</colorized>\n' + def markup(self, s, tag): + s = s.replace('&', '&').replace('<', '<').replace('>', '>') + if tag == 'other': return s + else: return '<%s>%s</%s>' % (tag, s, tag) - for line in s.split('\n')+['\n']: - if PROMPT_RE.match(line): - pysrc.append(line) - if pyout: - pyout = '\n'.join(pyout).strip() - m = EXCEPT_RE.match(pyout) - if m: - pyout, pyexc = m.group(1).strip(), m.group(2).strip() - if pyout: - print ('Warning: doctest does not allow for mixed ' - 'output and exceptions!') - result.append(markup_func(pyout, 'output')) - result.append(markup_func(pyexc, 'except')) - else: - result.append(markup_func(pyout, 'output')) - pyout = [] +class HTMLDoctestColorizer(DoctestColorizer): + """A subclass of DoctestColorizer that generates HTML output.""" + PREFIX = '<pre class="py-doctest">\n' + SUFFIX = '</pre>\n' + def markup(self, s, tag): + if tag == 'other': + return plaintext_to_html(s) else: - pyout.append(line) - if pysrc: - pysrc = DOCTEST_RE.sub(subfunc, '\n'.join(pysrc)) - result.append(pysrc.strip()) - #result.append(markup_func(pysrc.strip(), 'python')) - pysrc = [] + return ('<span class="py-%s">%s</span>' % + (tag, plaintext_to_html(s))) - remainder = '\n'.join(pyout).strip() - if remainder: - result.append(markup_func(remainder, 'output')) - result = '\n'.join(result) +class LaTeXDoctestColorizer(DoctestColorizer): + """A subclass of DoctestColorizer that generates LaTeX output.""" + PREFIX = '\\begin{alltt}\n' + SUFFIX = '\\end{alltt}\n' + def markup(self, s, tag): + if tag == 'other': + return plaintext_to_latex(s) + else: + return '\\pysrc%s{%s}' % (tag, plaintext_to_latex(s)) - # Merge adjacent spans w/ the same class. I.e, convert: - # <span class="x">foo</span><span class="x">foo</span> - # to: - # <span class="x">foofoo</span> - prev_span_class = [None] - def subfunc(match): - if match.group(2) == prev_span_class[0]: - prev_span_class[0] = match.group(2) - return match.group(1) or '' - else: - prev_span_class[0] = match.group(2) - return match.group() - result = re.sub(r'</span>(\n?)<span class="([^"]+)">', subfunc, result) - return result This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |