[Epydoc-commits] SF.net SVN: epydoc: [1344] trunk/epydoc/src/epydoc
Brought to you by:
edloper
From: <ed...@us...> - 2006-09-02 01:40:41
|
Revision: 1344 http://svn.sourceforge.net/epydoc/?rev=1344&view=rev Author: edloper Date: 2006-09-01 18:40:35 -0700 (Fri, 01 Sep 2006) Log Message: ----------- - Replaced xml.dom.minidom with a *very* simple tree representation for parsed epytext. (Using the new Element class). This should significantly speed up some of the epytext processing steps. Modified Paths: -------------- trunk/epydoc/src/epydoc/markup/epytext.py trunk/epydoc/src/epydoc/test/epytext.doctest Modified: trunk/epydoc/src/epydoc/markup/epytext.py =================================================================== --- trunk/epydoc/src/epydoc/markup/epytext.py 2006-09-02 01:23:23 UTC (rev 1343) +++ trunk/epydoc/src/epydoc/markup/epytext.py 2006-09-02 01:40:35 UTC (rev 1344) @@ -9,7 +9,8 @@ """ Parser for epytext strings. Epytext is a lightweight markup whose primary intended application is Python documentation strings. This -parser converts Epytext strings to a XML/DOM representation. Epytext +parser converts Epytext strings to a simple DOM-like representation +(encoded as a tree of L{Element} objects and strings). Epytext strings can contain the following X{structural blocks}: - X{epytext}: The top-level element of the DOM tree. @@ -105,13 +106,48 @@ # 5. testing import re, string, types, sys, os.path -from xml.dom.minidom import Document, Text -import xml.dom.minidom from epydoc.markup import * from epydoc.util import wordwrap, plaintext_to_html, plaintext_to_latex from epydoc.markup.doctest import doctest_to_html, doctest_to_latex ################################################## +## DOM-Like Encoding +################################################## + +class Element: + """ + A very simple DOM-like representation for parsed epytext + documents. Each epytext document is encoded as a tree whose nodes + are L{Element} objects, and whose leaves are C{string}s. Each + node is marked by a I{tag} and zero or more I{attributes}. Each + attribute is a mapping from a string key to a string value. + """ + def __init__(self, tag, *children, **attribs): + self.tag = tag + """A string tag indicating the type of this element. + @type: C{string}""" + + self.children = list(children) + """A list of the children of this element. + @type: C{list} of (C{string} or C{Element})""" + + self.attribs = attribs + """A dictionary mapping attribute names to attribute values + for this element. + @type: C{dict} from C{string} to C{string}""" + + def __str__(self): + """ + Return a string representation of this element, using XML + notation. + @bug: Doesn't escape '<' or '&' or '>'. + """ + attribs = ''.join([' %s=%r' % t for t in self.attribs.items()]) + return ('<%s%s>' % (self.tag, attribs) + + ''.join([str(child) for child in self.children]) + + '</%s>' % self.tag) + +################################################## ## Constants ################################################## @@ -196,7 +232,7 @@ ignored. @type errors: C{list} of L{ParseError} @return: a DOM tree encoding the contents of an epytext string. - @rtype: L{xml.dom.minidom.Document} + @rtype: C{Element} @raise ParseError: If C{errors} is C{None} and an error is encountered while parsing. """ @@ -217,8 +253,8 @@ # Have we encountered a field yet? encountered_field = 0 - # Create an XML document to hold the epytext. - doc = Document() + # Create an document to hold the epytext. + doc = Element('epytext') # Maintain two parallel stacks: one contains DOM elements, and # gives the ancestors of the current block. The other contains @@ -230,13 +266,13 @@ # corresponds to). No 2 consecutive indent_stack values will be # ever be "None." Use initial dummy elements in the stack, so we # don't have to worry about bounds checking. - stack = [None, doc.createElement('epytext')] + stack = [None, doc] indent_stack = [-1, None] for token in tokens: # Uncomment this for debugging: #print ('%s: %s\n%s: %s\n' % - # (''.join(['%-11s' % (t and t.tagName) for t in stack]), + # (''.join(['%-11s' % (t and t.tag) for t in stack]), # token.tag, ''.join(['%-11s' % i for i in indent_stack]), # token.indent)) @@ -253,11 +289,11 @@ # If Token has type LBLOCK, add the new literal block elif token.tag == Token.LBLOCK: - stack[-1].appendChild(token.to_dom(doc)) + stack[-1].children.append(token.to_dom(doc)) # If Token has type DTBLOCK, add the new doctest block elif token.tag == Token.DTBLOCK: - stack[-1].appendChild(token.to_dom(doc)) + stack[-1].children.append(token.to_dom(doc)) # If Token has type BULLET, add the new list/list item/field elif token.tag == Token.BULLET: @@ -266,7 +302,7 @@ assert 0, 'Unknown token type: '+token.tag # Check if the DOM element we just added was a field.. - if stack[-1].tagName == 'field': + if stack[-1].tag == 'field': encountered_field = 1 elif encountered_field == 1: if len(stack) <= 3: @@ -282,7 +318,6 @@ return None # Return the top-level epytext DOM element. - doc.appendChild(stack[1]) return doc def _pop_completed_blocks(token, stack, indent_stack): @@ -305,10 +340,10 @@ # Dedent to a list item, if it is follwed by another list # item with the same indentation. elif (token.tag == 'bullet' and indent==indent_stack[-2] and - stack[-1].tagName in ('li', 'field')): pop=1 + stack[-1].tag in ('li', 'field')): pop=1 # End of a list (no more list items available) - elif (stack[-1].tagName in ('ulist', 'olist') and + elif (stack[-1].tag in ('ulist', 'olist') and (token.tag != 'bullet' or token.contents[-1] == ':')): pop=1 @@ -326,7 +361,7 @@ if para_token.indent == indent_stack[-1]: # Colorize the paragraph and add it. para = _colorize(doc, para_token, errors) - stack[-1].appendChild(para) + stack[-1].children.append(para) else: estr = "Improper paragraph indentation." errors.append(StructuringError(estr, para_token.startline)) @@ -341,7 +376,7 @@ # Check for errors. for tok in stack[2:]: - if tok.tagName != "section": + if tok.tag != "section": estr = "Headings must occur at the top level." errors.append(StructuringError(estr, heading_token.startline)) break @@ -358,10 +393,10 @@ head = _colorize(doc, heading_token, errors, 'heading') # Add the section's and heading's DOM elements. - sec = doc.createElement("section") - stack[-1].appendChild(sec) + sec = Element("section") + stack[-1].children.append(sec) stack.append(sec) - sec.appendChild(head) + sec.children.append(head) indent_stack.append(None) def _add_list(doc, bullet_token, stack, indent_stack, errors): @@ -382,11 +417,11 @@ # Is this a new list? newlist = 0 - if stack[-1].tagName != list_type: + if stack[-1].tag != list_type: newlist = 1 - elif list_type == 'olist' and stack[-1].tagName == 'olist': - old_listitem = stack[-1].childNodes[-1] - old_bullet = old_listitem.getAttribute("bullet").split('.')[:-1] + elif list_type == 'olist' and stack[-1].tag == 'olist': + old_listitem = stack[-1].children[-1] + old_bullet = old_listitem.attribs.get("bullet").split('.')[:-1] new_bullet = bullet_token.contents.split('.')[:-1] if (new_bullet[:-1] != old_bullet[:-1] or int(new_bullet[-1]) != int(old_bullet[-1])+1): @@ -394,7 +429,7 @@ # Create the new list. if newlist: - if stack[-1].tagName is 'fieldlist': + if stack[-1].tag is 'fieldlist': # The new list item is not a field list item (since this # is a new list); but it's indented the same as the field # list. This either means that they forgot to indent the @@ -403,7 +438,7 @@ # just warn about that (to avoid confusion). estr = "Lists must be indented." errors.append(StructuringError(estr, bullet_token.startline)) - if stack[-1].tagName in ('ulist', 'olist', 'fieldlist'): + if stack[-1].tag in ('ulist', 'olist', 'fieldlist'): stack.pop() indent_stack.pop() @@ -419,7 +454,7 @@ if list_type == 'fieldlist': # Fieldlist should be at the top-level. for tok in stack[2:]: - if tok.tagName != "section": + if tok.tag != "section": estr = "Fields must be at the top level." errors.append( StructuringError(estr, bullet_token.startline)) @@ -428,41 +463,40 @@ indent_stack[2:] = [] # Add the new list. - lst = doc.createElement(list_type) - stack[-1].appendChild(lst) + lst = Element(list_type) + stack[-1].children.append(lst) stack.append(lst) indent_stack.append(bullet_token.indent) if list_type == 'olist': start = bullet_token.contents.split('.')[:-1] if start != '1': - lst.setAttribute("start", start[-1]) + lst.attribs["start"] = start[-1] # Fields are treated somewhat specially: A "fieldlist" # node is created to make the parsing simpler, but fields # are adjoined directly into the "epytext" node, not into # the "fieldlist" node. if list_type == 'fieldlist': - li = doc.createElement("field") + li = Element("field") token_words = bullet_token.contents[1:-1].split(None, 1) - tag_elt = doc.createElement("tag") - tag_elt.appendChild(doc.createTextNode(token_words[0])) - li.appendChild(tag_elt) + tag_elt = Element("tag") + tag_elt.children.append(token_words[0]) + li.children.append(tag_elt) if len(token_words) > 1: - arg_elt = doc.createElement("arg") - arg_elt.appendChild(doc.createTextNode(token_words[1])) - li.appendChild(arg_elt) + arg_elt = Element("arg") + arg_elt.children.append(token_words[1]) + li.children.append(arg_elt) else: - li = doc.createElement("li") + li = Element("li") if list_type == 'olist': - li.setAttribute("bullet", bullet_token.contents) + li.attribs["bullet"] = bullet_token.contents # Add the bullet. - stack[-1].appendChild(li) + stack[-1].children.append(li) stack.append(li) indent_stack.append(None) - ################################################## ## Tokenization ################################################## @@ -570,10 +604,10 @@ def to_dom(self, doc): """ @return: a DOM representation of this C{Token}. - @rtype: L{xml.dom.minidom.Element} + @rtype: L{Element} """ - e = doc.createElement(self.tag) - e.appendChild(doc.createTextNode(self.contents)) + e = Element(self.tag) + e.children.append(self.contents) return e # Construct regular expressions for recognizing bullets. These are @@ -941,7 +975,7 @@ # the text currently being analyzed. New elements are pushed when # "{" is encountered, and old elements are popped when "}" is # encountered. - stack = [doc.createElement(tagName)] + stack = [Element(tagName)] # This is just used to make error-reporting friendlier. It's a # stack parallel to "stack" containing the index of each element's @@ -967,20 +1001,20 @@ if match.group() == '{': if (end>0) and 'A' <= str[end-1] <= 'Z': if (end-1) > start: - stack[-1].appendChild(doc.createTextNode(str[start:end-1])) + stack[-1].children.append(str[start:end-1]) if not _COLORIZING_TAGS.has_key(str[end-1]): estr = "Unknown inline markup tag." errors.append(ColorizingError(estr, token, end-1)) - stack.append(doc.createElement('unknown')) + stack.append(Element('unknown')) else: tag = _COLORIZING_TAGS[str[end-1]] - stack.append(doc.createElement(tag)) + stack.append(Element(tag)) else: if end > start: - stack[-1].appendChild(doc.createTextNode(str[start:end])) - stack.append(doc.createElement('litbrace')) + stack[-1].children.append(str[start:end]) + stack.append(Element('litbrace')) openbrace_stack.append(end) - stack[-2].appendChild(stack[-1]) + stack[-2].children.append(stack[-1]) # Close braces end colorizing elements. elif match.group() == '}': @@ -993,62 +1027,51 @@ # Add any remaining text. if end > start: - stack[-1].appendChild(doc.createTextNode(str[start:end])) + stack[-1].children.append(str[start:end]) # Special handling for symbols: - if stack[-1].tagName == 'symbol': - if (len(stack[-1].childNodes) != 1 or - not isinstance(stack[-1].childNodes[0], Text)): + if stack[-1].tag == 'symbol': + if (len(stack[-1].children) != 1 or + not isinstance(stack[-1].children[0], basestring)): estr = "Invalid symbol code." errors.append(ColorizingError(estr, token, end)) else: - symb = stack[-1].childNodes[0].data + symb = stack[-1].children[0] if _SYMBOLS.has_key(symb): # It's a symbol - symbol = doc.createElement('symbol') - stack[-2].removeChild(stack[-1]) - stack[-2].appendChild(symbol) - symbol.appendChild(doc.createTextNode(symb)) + stack[-2].children[-1] = Element('symbol', symb) else: estr = "Invalid symbol code." errors.append(ColorizingError(estr, token, end)) # Special handling for escape elements: - if stack[-1].tagName == 'escape': - if (len(stack[-1].childNodes) != 1 or - not isinstance(stack[-1].childNodes[0], Text)): + if stack[-1].tag == 'escape': + if (len(stack[-1].children) != 1 or + not isinstance(stack[-1].children[0], basestring)): estr = "Invalid escape code." errors.append(ColorizingError(estr, token, end)) else: - escp = stack[-1].childNodes[0].data + escp = stack[-1].children[0] if _ESCAPES.has_key(escp): # It's an escape from _ESCPAES - stack[-2].removeChild(stack[-1]) - escp = _ESCAPES[escp] - stack[-2].appendChild(doc.createTextNode(escp)) + stack[-2].children[-1] = _ESCAPES[escp] elif len(escp) == 1: # It's a single-character escape (eg E{.}) - stack[-2].removeChild(stack[-1]) - stack[-2].appendChild(doc.createTextNode(escp)) + stack[-2].children[-1] = escp else: estr = "Invalid escape code." errors.append(ColorizingError(estr, token, end)) # Special handling for literal braces elements: - if stack[-1].tagName == 'litbrace': - variables = stack[-1].childNodes - stack[-2].removeChild(stack[-1]) - stack[-2].appendChild(doc.createTextNode('{')) - for child in variables: - stack[-2].appendChild(child) - stack[-2].appendChild(doc.createTextNode('}')) + if stack[-1].tag == 'litbrace': + stack[-2].children = ['{'] + stack[-1].children + ['}'] # Special handling for graphs: - if stack[-1].tagName == 'graph': + if stack[-1].tag == 'graph': _colorize_graph(doc, stack[-1], token, end, errors) # Special handling for link-type elements: - if stack[-1].tagName in _LINK_COLORIZING_TAGS: + if stack[-1].tag in _LINK_COLORIZING_TAGS: _colorize_link(doc, stack[-1], token, end, errors) # Pop the completed element. @@ -1059,7 +1082,7 @@ # Add any final text. if start < len(str): - stack[-1].appendChild(doc.createTextNode(str[start:])) + stack[-1].children.append(str[start:]) if len(stack) != 1: estr = "Unbalanced '{'." @@ -1078,13 +1101,13 @@ """ bad_graph_spec = False - children = graph.childNodes[:] - for child in children: graph.removeChild(child) + children = graph.children[:] + graph.children = [] - if len(children) != 1 or not isinstance(children[0], Text): + if len(children) != 1 or not isinstance(children[0], basestring): bad_graph_spec = "Bad graph specification" else: - pieces = children[0].data.split(None, 1) + pieces = children[0].split(None, 1) graphtype = pieces[0].replace(':','').strip().lower() if graphtype in GRAPH_TYPES: if len(pieces) == 2: @@ -1100,51 +1123,49 @@ if bad_graph_spec: errors.append(ColorizingError(bad_graph_spec, token, end)) - graph.appendChild(doc.createTextNode('none')) - graph.appendChild(doc.createTextNode('')) + graph.children.append('none') + graph.children.append('') return - graph.appendChild(doc.createTextNode(graphtype)) + graph.children.append(graphtype) for arg in args: - graph.appendChild(doc.createTextNode(arg)) + graph.children.append(arg) def _colorize_link(doc, link, token, end, errors): - variables = link.childNodes[:] + variables = link.children[:] # If the last child isn't text, we know it's bad. - if len(variables)==0 or not isinstance(variables[-1], Text): - estr = "Bad %s target." % link.tagName + if len(variables)==0 or not isinstance(variables[-1], basestring): + estr = "Bad %s target." % link.tag errors.append(ColorizingError(estr, token, end)) return # Did they provide an explicit target? - match2 = _TARGET_RE.match(variables[-1].data) + match2 = _TARGET_RE.match(variables[-1]) if match2: (text, target) = match2.groups() - variables[-1].data = text + variables[-1] = text # Can we extract an implicit target? elif len(variables) == 1: - target = variables[0].data + target = variables[0] else: - estr = "Bad %s target." % link.tagName + estr = "Bad %s target." % link.tag errors.append(ColorizingError(estr, token, end)) return # Construct the name element. - name_elt = doc.createElement('name') - for child in variables: - name_elt.appendChild(link.removeChild(child)) + name_elt = Element('name', *variables) # Clean up the target. For URIs, assume http or mailto if they # don't specify (no relative urls) target = re.sub(r'\s', '', target) - if link.tagName=='uri': + if link.tag=='uri': if not re.match(r'\w+:', target): if re.match(r'\w+@(\w+)(\.\w+)*', target): target = 'mailto:' + target else: target = 'http://'+target - elif link.tagName=='link': + elif link.tag=='link': # Remove arg lists for functions (e.g., L{_colorize_link()}) target = re.sub(r'\(.*\)$', '', target) if not re.match(r'^[a-zA-Z_]\w*(\.[a-zA-Z_]\w*)*$', target): @@ -1153,12 +1174,10 @@ return # Construct the target element. - target_elt = doc.createElement('target') - target_elt.appendChild(doc.createTextNode(target)) + target_elt = Element('target', target) # Add them to the link element. - link.appendChild(name_elt) - link.appendChild(target_elt) + link.children = [name_elt, target_elt] ################################################## ## Formatters @@ -1176,7 +1195,7 @@ - C{to_epytext(parse(str)) == str} (approximately) @param tree: A DOM document encoding of an epytext string. - @type tree: L{xml.dom.minidom.Document} + @type tree: C{Element} @param indent: The indentation for the string representation of C{tree}. Each line of the returned string will begin with C{indent} space characters. @@ -1187,22 +1206,20 @@ @return: The epytext string corresponding to C{tree}. @rtype: C{string} """ - if isinstance(tree, Document): - return to_epytext(tree.childNodes[0], indent, seclevel) - if isinstance(tree, Text): - str = re.sub(r'\{', '\0', tree.data) + if isinstance(tree, basestring): + str = re.sub(r'\{', '\0', tree) str = re.sub(r'\}', '\1', str) return str - if tree.tagName == 'epytext': indent -= 2 - if tree.tagName == 'section': seclevel += 1 - variables = [to_epytext(c, indent+2, seclevel) for c in tree.childNodes] + if tree.tag == 'epytext': indent -= 2 + if tree.tag == 'section': seclevel += 1 + variables = [to_epytext(c, indent+2, seclevel) for c in tree.children] childstr = ''.join(variables) # Clean up for literal blocks (add the double "::" back) childstr = re.sub(':(\s*)\2', '::\\1', childstr) - if tree.tagName == 'para': + if tree.tag == 'para': str = wordwrap(childstr, indent)+'\n' str = re.sub(r'((^|\n)\s*\d+)\.', r'\1E{.}', str) str = re.sub(r'((^|\n)\s*)-', r'\1E{-}', str) @@ -1211,49 +1228,47 @@ str = re.sub('\0', 'E{lb}', str) str = re.sub('\1', 'E{rb}', str) return str - elif tree.tagName == 'li': - bulletAttr = tree.getAttributeNode('bullet') - if bulletAttr: bullet = bulletAttr.value - else: bullet = '-' + elif tree.tag == 'li': + bullet = tree.attribs.get('bullet') or '-' return indent*' '+ bullet + ' ' + childstr.lstrip() - elif tree.tagName == 'heading': + elif tree.tag == 'heading': str = re.sub('\0', 'E{lb}',childstr) str = re.sub('\1', 'E{rb}', str) uline = len(childstr)*_HEADING_CHARS[seclevel-1] return (indent-2)*' ' + str + '\n' + (indent-2)*' '+uline+'\n' - elif tree.tagName == 'doctestblock': + elif tree.tag == 'doctestblock': str = re.sub('\0', '{', childstr) str = re.sub('\1', '}', str) lines = [' '+indent*' '+line for line in str.split('\n')] return '\n'.join(lines) + '\n\n' - elif tree.tagName == 'literalblock': + elif tree.tag == 'literalblock': str = re.sub('\0', '{', childstr) str = re.sub('\1', '}', str) lines = [(indent+1)*' '+line for line in str.split('\n')] return '\2' + '\n'.join(lines) + '\n\n' - elif tree.tagName == 'field': + elif tree.tag == 'field': numargs = 0 - while tree.childNodes[numargs+1].tagName == 'arg': numargs += 1 + while tree.children[numargs+1].tag == 'arg': numargs += 1 tag = variables[0] args = variables[1:1+numargs] body = variables[1+numargs:] str = (indent)*' '+'@'+variables[0] if args: str += '(' + ', '.join(args) + ')' return str + ':\n' + ''.join(body) - elif tree.tagName == 'target': + elif tree.tag == 'target': return '<%s>' % childstr - elif tree.tagName in ('fieldlist', 'tag', 'arg', 'epytext', + elif tree.tag in ('fieldlist', 'tag', 'arg', 'epytext', 'section', 'olist', 'ulist', 'name'): return childstr - elif tree.tagName == 'symbol': + elif tree.tag == 'symbol': return 'E{%s}' % childstr - elif tree.tagName == 'graph': + elif tree.tag == 'graph': return 'G{%s}' % ' '.join(variables) else: for (tag, name) in _COLORIZING_TAGS.items(): - if name == tree.tagName: + if name == tree.tag: return '%s{%s}' % (tag, childstr) - raise ValueError('Unknown DOM element %r' % tree.tagName) + raise ValueError('Unknown DOM element %r' % tree.tag) def to_plaintext(tree, indent=0, seclevel=0): """ @@ -1263,7 +1278,7 @@ escaped characters in unescaped form, etc. @param tree: A DOM document encoding of an epytext string. - @type tree: L{xml.dom.minidom.Document} + @type tree: C{Element} @param indent: The indentation for the string representation of C{tree}. Each line of the returned string will begin with C{indent} space characters. @@ -1274,67 +1289,63 @@ @return: The epytext string corresponding to C{tree}. @rtype: C{string} """ - if isinstance(tree, Document): - return to_plaintext(tree.childNodes[0], indent, seclevel) - if isinstance(tree, Text): return tree.data + if isinstance(tree, basestring): return tree - if tree.tagName == 'section': seclevel += 1 + if tree.tag == 'section': seclevel += 1 # Figure out the child indent level. - if tree.tagName == 'epytext': cindent = indent - elif tree.tagName == 'li' and tree.getAttributeNode('bullet'): - cindent = indent + 1 + len(tree.getAttributeNode('bullet').value) + if tree.tag == 'epytext': cindent = indent + elif tree.tag == 'li' and tree.attribs.get('bullet'): + cindent = indent + 1 + len(tree.attribs.get('bullet')) else: cindent = indent + 2 - variables = [to_plaintext(c, cindent, seclevel) for c in tree.childNodes] + variables = [to_plaintext(c, cindent, seclevel) for c in tree.children] childstr = ''.join(variables) - if tree.tagName == 'para': + if tree.tag == 'para': return wordwrap(childstr, indent)+'\n' - elif tree.tagName == 'li': + elif tree.tag == 'li': # We should be able to use getAttribute here; but there's no # convenient way to test if an element has an attribute.. - bulletAttr = tree.getAttributeNode('bullet') - if bulletAttr: bullet = bulletAttr.value - else: bullet = '-' + bullet = tree.attribs.get('bullet') or '-' return indent*' ' + bullet + ' ' + childstr.lstrip() - elif tree.tagName == 'heading': + elif tree.tag == 'heading': uline = len(childstr)*_HEADING_CHARS[seclevel-1] return ((indent-2)*' ' + childstr + '\n' + (indent-2)*' ' + uline + '\n') - elif tree.tagName == 'doctestblock': + elif tree.tag == 'doctestblock': lines = [(indent+2)*' '+line for line in childstr.split('\n')] return '\n'.join(lines) + '\n\n' - elif tree.tagName == 'literalblock': + elif tree.tag == 'literalblock': lines = [(indent+1)*' '+line for line in childstr.split('\n')] return '\n'.join(lines) + '\n\n' - elif tree.tagName == 'fieldlist': + elif tree.tag == 'fieldlist': return childstr - elif tree.tagName == 'field': + elif tree.tag == 'field': numargs = 0 - while tree.childNodes[numargs+1].tagName == 'arg': numargs += 1 + while tree.children[numargs+1].tag == 'arg': numargs += 1 tag = variables[0] args = variables[1:1+numargs] body = variables[1+numargs:] str = (indent)*' '+'@'+variables[0] if args: str += '(' + ', '.join(args) + ')' return str + ':\n' + ''.join(body) - elif tree.tagName == 'uri': + elif tree.tag == 'uri': if len(variables) != 2: raise ValueError('Bad URI ') elif variables[0] == variables[1]: return '<%s>' % variables[1] else: return '%r<%s>' % (variables[0], variables[1]) - elif tree.tagName == 'link': + elif tree.tag == 'link': if len(variables) != 2: raise ValueError('Bad Link') return '%s' % variables[0] - elif tree.tagName in ('olist', 'ulist'): + elif tree.tag in ('olist', 'ulist'): # [xx] always use condensed lists. ## Use a condensed list if each list item is 1 line long. #for child in variables: # if child.count('\n') > 2: return childstr return childstr.replace('\n\n', '\n')+'\n' - elif tree.tagName == 'symbol': + elif tree.tag == 'symbol': return '%s' % childstr - elif tree.tagName == 'graph': + elif tree.tag == 'graph': return '<<%s graph: %s>>' % (variables[0], ', '.join(variables[1:])) else: # Assume that anything else can be passed through. @@ -1348,7 +1359,7 @@ where different blocks begin, along the left margin. @param tree: A DOM document encoding of an epytext string. - @type tree: L{xml.dom.minidom.Document} + @type tree: C{Element} @param indent: The indentation for the string representation of C{tree}. Each line of the returned string will begin with C{indent} space characters. @@ -1359,21 +1370,19 @@ @return: The epytext string corresponding to C{tree}. @rtype: C{string} """ - if isinstance(tree, Document): - return to_debug(tree.childNodes[0], indent, seclevel) - if isinstance(tree, Text): - str = re.sub(r'\{', '\0', tree.data) + if isinstance(tree, basestring): + str = re.sub(r'\{', '\0', tree) str = re.sub(r'\}', '\1', str) return str - if tree.tagName == 'section': seclevel += 1 - variables = [to_debug(c, indent+2, seclevel) for c in tree.childNodes] + if tree.tag == 'section': seclevel += 1 + variables = [to_debug(c, indent+2, seclevel) for c in tree.children] childstr = ''.join(variables) # Clean up for literal blocks (add the double "::" back) childstr = re.sub(':( *\n \|\n)\2', '::\\1', childstr) - if tree.tagName == 'para': + if tree.tag == 'para': str = wordwrap(childstr, indent-6, 69)+'\n' str = re.sub(r'((^|\n)\s*\d+)\.', r'\1E{.}', str) str = re.sub(r'((^|\n)\s*)-', r'\1E{-}', str) @@ -1385,54 +1394,52 @@ lines[0] = ' P>|' + lines[0] lines[1:] = [' |'+l for l in lines[1:]] return '\n'.join(lines)+'\n |\n' - elif tree.tagName == 'li': - bulletAttr = tree.getAttributeNode('bullet') - if bulletAttr: bullet = bulletAttr.value - else: bullet = '-' + elif tree.tag == 'li': + bullet = tree.attribs.get('bullet') or '-' return ' LI>|'+ (indent-6)*' '+ bullet + ' ' + childstr[6:].lstrip() - elif tree.tagName in ('olist', 'ulist'): + elif tree.tag in ('olist', 'ulist'): return 'LIST>|'+(indent-4)*' '+childstr[indent+2:] - elif tree.tagName == 'heading': + elif tree.tag == 'heading': str = re.sub('\0', 'E{lb}', childstr) str = re.sub('\1', 'E{rb}', str) uline = len(childstr)*_HEADING_CHARS[seclevel-1] return ('SEC'+`seclevel`+'>|'+(indent-8)*' ' + str + '\n' + ' |'+(indent-8)*' ' + uline + '\n') - elif tree.tagName == 'doctestblock': + elif tree.tag == 'doctestblock': str = re.sub('\0', '{', childstr) str = re.sub('\1', '}', str) lines = [' |'+(indent-4)*' '+line for line in str.split('\n')] lines[0] = 'DTST>'+lines[0][5:] return '\n'.join(lines) + '\n |\n' - elif tree.tagName == 'literalblock': + elif tree.tag == 'literalblock': str = re.sub('\0', '{', childstr) str = re.sub('\1', '}', str) lines = [' |'+(indent-5)*' '+line for line in str.split('\n')] lines[0] = ' LIT>'+lines[0][5:] return '\2' + '\n'.join(lines) + '\n |\n' - elif tree.tagName == 'field': + elif tree.tag == 'field': numargs = 0 - while tree.childNodes[numargs+1].tagName == 'arg': numargs += 1 + while tree.children[numargs+1].tag == 'arg': numargs += 1 tag = variables[0] args = variables[1:1+numargs] body = variables[1+numargs:] str = ' FLD>|'+(indent-6)*' '+'@'+variables[0] if args: str += '(' + ', '.join(args) + ')' return str + ':\n' + ''.join(body) - elif tree.tagName == 'target': + elif tree.tag == 'target': return '<%s>' % childstr - elif tree.tagName in ('fieldlist', 'tag', 'arg', 'epytext', + elif tree.tag in ('fieldlist', 'tag', 'arg', 'epytext', 'section', 'olist', 'ulist', 'name'): return childstr - elif tree.tagName == 'symbol': + elif tree.tag == 'symbol': return 'E{%s}' % childstr - elif tree.tagName == 'graph': + elif tree.tag == 'graph': return 'G{%s}' % ' '.join(variables) else: for (tag, name) in _COLORIZING_TAGS.items(): - if name == tree.tagName: + if name == tree.tag: return '%s{%s}' % (tag, childstr) - raise ValueError('Unknown DOM element %r' % tree.tagName) + raise ValueError('Unknown DOM element %r' % tree.tag) ################################################## ## Top-Level Wrapper function @@ -1455,7 +1462,7 @@ written to. @type stream: C{stream} @return: a DOM document encoding the contents of C{str}. - @rtype: L{xml.dom.minidom.Document} + @rtype: C{Element} @raise SyntaxError: If any fatal errors were encountered. """ errors = [] @@ -1556,15 +1563,9 @@ @return: A DOM document containing C{str} in a single literal block. - @rtype: L{xml.dom.minidom.Document} + @rtype: C{Element} """ - doc = Document() - epytext = doc.createElement('epytext') - lit = doc.createElement('literalblock') - doc.appendChild(epytext) - epytext.appendChild(lit) - lit.appendChild(doc.createTextNode(str)) - return doc + return Element('epytext', Element('literalblock', str)) def parse_as_para(str): """ @@ -1578,15 +1579,9 @@ @type str: C{string} @return: A DOM document containing C{str} in a single paragraph. - @rtype: L{xml.dom.minidom.Document} + @rtype: C{Element} """ - doc = Document() - epytext = doc.createElement('epytext') - para = doc.createElement('para') - doc.appendChild(epytext) - epytext.appendChild(para) - para.appendChild(doc.createTextNode(str)) - return doc + return Element('epytext', Element('para', str)) ################################################################# ## SUPPORT FOR EPYDOC @@ -1710,8 +1705,6 @@ } def __init__(self, dom_tree): - if isinstance(dom_tree, Document): - dom_tree = dom_tree.childNodes[0] self._tree = dom_tree # Caching: self._html = self._latex = self._plaintext = None @@ -1752,84 +1745,81 @@ def _to_html(self, tree, linker, directory, docindex, context, indent=0, seclevel=0): - if isinstance(tree, Text): - return plaintext_to_html(tree.data) + if isinstance(tree, basestring): + return plaintext_to_html(tree) - if tree.tagName == 'epytext': indent -= 2 - if tree.tagName == 'section': seclevel += 1 + if tree.tag == 'epytext': indent -= 2 + if tree.tag == 'section': seclevel += 1 # Process the variables first. variables = [self._to_html(c, linker, directory, docindex, context, indent+2, seclevel) - for c in tree.childNodes] + for c in tree.children] # Get rid of unnecessary <P>...</P> tags; they introduce extra # space on most browsers that we don't want. for i in range(len(variables)-1): - if (not isinstance(tree.childNodes[i], Text) and - tree.childNodes[i].tagName == 'para' and - (isinstance(tree.childNodes[i+1], Text) or - tree.childNodes[i+1].tagName != 'para')): + if (not isinstance(tree.children[i], basestring) and + tree.children[i].tag == 'para' and + (isinstance(tree.children[i+1], basestring) or + tree.children[i+1].tag != 'para')): variables[i] = ' '*(indent+2)+variables[i][5+indent:-5]+'\n' - if (tree.hasChildNodes() and - not isinstance(tree.childNodes[-1], Text) and - tree.childNodes[-1].tagName == 'para'): + if (tree.children and + not isinstance(tree.children[-1], basestring) and + tree.children[-1].tag == 'para'): variables[-1] = ' '*(indent+2)+variables[-1][5+indent:-5]+'\n' # Construct the HTML string for the variables. childstr = ''.join(variables) # Perform the approriate action for the DOM tree type. - if tree.tagName == 'para': + if tree.tag == 'para': return wordwrap('<p>%s</p>' % childstr, indent) - elif tree.tagName == 'code': + elif tree.tag == 'code': return '<code>%s</code>' % childstr - elif tree.tagName == 'uri': + elif tree.tag == 'uri': return ('<a href="%s" target="_top">%s</a>' % (variables[1], variables[0])) - elif tree.tagName == 'link': + elif tree.tag == 'link': return linker.translate_identifier_xref(variables[1], variables[0]) - elif tree.tagName == 'italic': + elif tree.tag == 'italic': return '<i>%s</i>' % childstr - elif tree.tagName == 'math': + elif tree.tag == 'math': return '<i class="math">%s</i>' % childstr - elif tree.tagName == 'indexed': - term = tree.cloneNode(1) - term.tagName = 'epytext' + elif tree.tag == 'indexed': + term = Element('epytext', *tree.children, **tree.attribs) return linker.translate_indexterm(ParsedEpytextDocstring(term)) #term_key = self._index_term_key(tree) #return linker.translate_indexterm(childstr, term_key) - elif tree.tagName == 'bold': + elif tree.tag == 'bold': return '<b>%s</b>' % childstr - elif tree.tagName == 'ulist': + elif tree.tag == 'ulist': return '%s<ul>\n%s%s</ul>\n' % (indent*' ', childstr, indent*' ') - elif tree.tagName == 'olist': - startAttr = tree.getAttributeNode('start') - if startAttr: start = ' start="%s"' % startAttr.value - else: start = '' + elif tree.tag == 'olist': + start = tree.attribs.get('start') or '' return ('%s<ol%s>\n%s%s</ol>\n' % (indent*' ', start, childstr, indent*' ')) - elif tree.tagName == 'li': + elif tree.tag == 'li': return indent*' '+'<li>\n%s%s</li>\n' % (childstr, indent*' ') - elif tree.tagName == 'heading': + elif tree.tag == 'heading': return ('%s<h%s class="heading">%s</h%s>\n' % ((indent-2)*' ', seclevel, childstr, seclevel)) - elif tree.tagName == 'literalblock': + elif tree.tag == 'literalblock': return '<pre class="literalblock">\n%s\n</pre>\n' % childstr - elif tree.tagName == 'doctestblock': - return doctest_to_html(tree.childNodes[0].data.strip()) - elif tree.tagName == 'fieldlist': + elif tree.tag == 'doctestblock': + return doctest_to_html(tree.children[0].strip()) + elif tree.tag == 'fieldlist': raise AssertionError("There should not be any field lists left") - elif tree.tagName in ('epytext', 'section', 'tag', 'arg', + elif tree.tag in ('epytext', 'section', 'tag', 'arg', 'name', 'target', 'html'): return childstr - elif tree.tagName == 'symbol': - symbol = tree.childNodes[0].data + elif tree.tag == 'symbol': + symbol = tree.children[0] if self.SYMBOL_TO_HTML.has_key(symbol): return '&%s;' % self.SYMBOL_TO_HTML[symbol] else: return '[??]' - elif tree.tagName == 'graph': + elif tree.tag == 'graph': # Generate the graph. graph = self._build_graph(variables[0], variables[1:], linker, docindex, context) @@ -1839,7 +1829,7 @@ image_file = os.path.join(directory, image_url) return graph.to_html(image_file, image_url) else: - raise ValueError('Unknown epytext DOM element %r' % tree.tagName) + raise ValueError('Unknown epytext DOM element %r' % tree.tag) #GRAPH_TYPES = ['classtree', 'packagetree', 'importgraph'] def _build_graph(self, graph_type, graph_args, linker, @@ -1883,27 +1873,27 @@ def _to_latex(self, tree, linker, indent=0, seclevel=0, breakany=0): - if isinstance(tree, Text): - return plaintext_to_latex(tree.data, breakany=breakany) + if isinstance(tree, basestring): + return plaintext_to_latex(tree, breakany=breakany) - if tree.tagName == 'section': seclevel += 1 + if tree.tag == 'section': seclevel += 1 # Figure out the child indent level. - if tree.tagName == 'epytext': cindent = indent + if tree.tag == 'epytext': cindent = indent else: cindent = indent + 2 variables = [self._to_latex(c, linker, cindent, seclevel, breakany) - for c in tree.childNodes] + for c in tree.children] childstr = ''.join(variables) - if tree.tagName == 'para': + if tree.tag == 'para': return wordwrap(childstr, indent)+'\n' - elif tree.tagName == 'code': + elif tree.tag == 'code': return '\\texttt{%s}' % childstr - elif tree.tagName == 'uri': + elif tree.tag == 'uri': if len(variables) != 2: raise ValueError('Bad URI ') if self._hyperref: # ~ and # should not be escaped in the URI. - uri = tree.childNodes[1].childNodes[0].data + uri = tree.children[1].children[0] uri = uri.replace('{\\textasciitilde}', '~') uri = uri.replace('\\#', '#') if variables[0] == variables[1]: @@ -1916,46 +1906,45 @@ return '\\textit{%s}' % variables[1] else: return '%s\\footnote{%s}' % (variables[0], variables[1]) - elif tree.tagName == 'link': + elif tree.tag == 'link': if len(variables) != 2: raise ValueError('Bad Link') return linker.translate_identifier_xref(variables[1], variables[0]) - elif tree.tagName == 'italic': + elif tree.tag == 'italic': return '\\textit{%s}' % childstr - elif tree.tagName == 'math': + elif tree.tag == 'math': return '\\textit{%s}' % childstr - elif tree.tagName == 'indexed': - term = tree.cloneNode(1) - term.tagName = 'epytext' + elif tree.tag == 'indexed': + term = Element('epytext', *tree.children, **tree.attribs) return linker.translate_indexterm(ParsedEpytextDocstring(term)) - elif tree.tagName == 'bold': + elif tree.tag == 'bold': return '\\textbf{%s}' % childstr - elif tree.tagName == 'li': + elif tree.tag == 'li': return indent*' ' + '\\item ' + childstr.lstrip() - elif tree.tagName == 'heading': + elif tree.tag == 'heading': return ' '*(indent-2) + '(section) %s\n\n' % childstr - elif tree.tagName == 'doctestblock': - return doctest_to_latex(tree.childNodes[0].data.strip()) - elif tree.tagName == 'literalblock': + elif tree.tag == 'doctestblock': + return doctest_to_latex(tree.children[0].strip()) + elif tree.tag == 'literalblock': return '\\begin{alltt}\n%s\\end{alltt}\n\n' % childstr - elif tree.tagName == 'fieldlist': + elif tree.tag == 'fieldlist': return indent*' '+'{omitted fieldlist}\n' - elif tree.tagName == 'olist': + elif tree.tag == 'olist': return (' '*indent + '\\begin{enumerate}\n\n' + ' '*indent + '\\setlength{\\parskip}{0.5ex}\n' + childstr + ' '*indent + '\\end{enumerate}\n\n') - elif tree.tagName == 'ulist': + elif tree.tag == 'ulist': return (' '*indent + '\\begin{itemize}\n' + ' '*indent + '\\setlength{\\parskip}{0.6ex}\n' + childstr + ' '*indent + '\\end{itemize}\n\n') - elif tree.tagName == 'symbol': - symbol = tree.childNodes[0].data + elif tree.tag == 'symbol': + symbol = tree.children[0] if self.SYMBOL_TO_LATEX.has_key(symbol): return r'%s' % self.SYMBOL_TO_LATEX[symbol] else: return '[??]' - elif tree.tagName == 'graph': + elif tree.tag == 'graph': return '(GRAPH)' #raise ValueError, 'graph not implemented yet for latex' else: @@ -1964,78 +1953,73 @@ def summary(self): if self._tree is None: return self - - # Is the cloning that happens here safe/proper? (Cloning - # between 2 different documents) tree = self._tree - - doc = Document() - epytext = doc.createElement('epytext') - doc.appendChild(epytext) + doc = Element('epytext') # Find the first paragraph. - variables = tree.childNodes - while (len(variables) > 0) and (variables[0].tagName != 'para'): - if variables[0].tagName in ('section', 'ulist', 'olist', 'li'): - variables = variables[0].childNodes + variables = tree.children + while (len(variables) > 0) and (variables[0].tag != 'para'): + if variables[0].tag in ('section', 'ulist', 'olist', 'li'): + variables = variables[0].children else: variables = variables[1:] # Special case: if the docstring contains a single literal block, # then try extracting the summary from it. - if (len(variables) == 0 and len(tree.childNodes) == 1 and - tree.childNodes[0].tagName == 'literalblock'): + if (len(variables) == 0 and len(tree.children) == 1 and + tree.children[0].tag == 'literalblock'): str = re.split(r'\n\s*(\n|$).*', - tree.childNodes[0].childNodes[0].data, 1)[0] - variables = [doc.createElement('para')] - variables[0].appendChild(doc.createTextNode(str)) + tree.children[0].children[0], 1)[0] + variables = [Element('para')] + variables[0].children.append(str) # If we didn't find a paragraph, return an empty epytext. if len(variables) == 0: return ParsedEpytextDocstring(doc) # Extract the first sentence. - parachildren = variables[0].childNodes - para = doc.createElement('para') - epytext.appendChild(para) + parachildren = variables[0].children + para = Element('para') + doc.children.append(para) for parachild in parachildren: - if isinstance(parachild, Text): - m = re.match(r'(\s*[\w\W]*?\.)(\s|$)', parachild.data) + if isinstance(parachild, basestring): + m = re.match(r'(\s*[\w\W]*?\.)(\s|$)', parachild) if m: - para.appendChild(doc.createTextNode(m.group(1))) + para.children.append(m.group(1)) return ParsedEpytextDocstring(doc) - para.appendChild(parachild.cloneNode(1)) + para.children.append(parachild) return ParsedEpytextDocstring(doc) def split_fields(self, errors=None): if self._tree is None: return (self, ()) - tree = self._tree.cloneNode(1) # Hmm.. + tree = Element(self._tree.tag, *self._tree.children, + **self._tree.attribs) fields = [] - if (tree.hasChildNodes() and - tree.childNodes[-1].tagName == 'fieldlist' and - tree.childNodes[-1].hasChildNodes()): - field_nodes = tree.childNodes[-1].childNodes - tree.removeChild(tree.childNodes[-1]) + if (tree.children and + tree.children[-1].tag == 'fieldlist' and + tree.children[-1].children): + field_nodes = tree.children[-1].children + del tree.children[-1] for field in field_nodes: # Get the tag - tag = field.childNodes[0].childNodes[0].data.lower() - field.removeChild(field.childNodes[0]) + tag = field.children[0].children[0].lower() + del field.children[0] # Get the argument. - if field.childNodes and field.childNodes[0].tagName == 'arg': - arg = field.childNodes[0].childNodes[0].data - field.removeChild(field.childNodes[0]) + if field.children and field.children[0].tag == 'arg': + arg = field.children[0].children[0] + del field.children[0] else: arg = None # Process the field. - field.tagName = 'epytext' + field.tag = 'epytext' fields.append(Field(tag, arg, ParsedEpytextDocstring(field))) # Save the remaining docstring as the description.. - if tree.hasChildNodes() and tree.childNodes[0].hasChildNodes(): + if tree.children and tree.children[0].children: descr = tree else: descr = None @@ -2049,14 +2033,13 @@ return self._terms def _index_terms(self, tree, terms): - if tree is None or isinstance(tree, Text): + if tree is None or isinstance(tree, basestring): return - if tree.tagName == 'indexed': - term = tree.cloneNode(1) - term.tagName = 'epytext' + if tree.tag == 'indexed': + term = Element('epytext', *tree.children, **tree.attribs) terms.append(ParsedEpytextDocstring(term)) # Look for index items in child nodes. - for child in tree.childNodes: + for child in tree.children: self._index_terms(child, terms) Modified: trunk/epydoc/src/epydoc/test/epytext.doctest =================================================================== --- trunk/epydoc/src/epydoc/test/epytext.doctest 2006-09-02 01:23:23 UTC (rev 1343) +++ trunk/epydoc/src/epydoc/test/epytext.doctest 2006-09-02 01:40:35 UTC (rev 1344) @@ -9,16 +9,15 @@ >>> import re >>> def testparse(s): ... # this strips off the <epytext>...</epytext> - ... out = ''.join([n.toxml() for n in - ... epytext.parse(s).childNodes[0].childNodes]) + ... out = ''.join([str(n) for n in + ... epytext.parse(s).children]) ... # This is basically word-wrapping: ... out = re.sub(r'((</\w+>)+)', r'\1\n', out).rstrip() ... out = re.sub(r'(?m)^(.{50,70}>)(.)', r'\1\n\2', out).rstrip() ... return out >>> def checkparse(s, expect): ... # this strips off the <epytext>...</epytext> - ... got = ''.join([n.toxml() for n in - ... epytext.parse(s).childNodes[0].childNodes]) + ... got = ''.join([str(n) for n in epytext.parse(s).children]) ... if got != expect: ... raise ValueError('mismatch: %r %r' % (expect, got)) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |