[Epydoc-commits] SF.net SVN: epydoc: [1344] trunk/epydoc/src/epydoc
Brought to you by:
edloper
|
From: <ed...@us...> - 2006-09-02 01:40:41
|
Revision: 1344
http://svn.sourceforge.net/epydoc/?rev=1344&view=rev
Author: edloper
Date: 2006-09-01 18:40:35 -0700 (Fri, 01 Sep 2006)
Log Message:
-----------
- Replaced xml.dom.minidom with a *very* simple tree representation
for parsed epytext. (Using the new Element class). This should
significantly speed up some of the epytext processing steps.
Modified Paths:
--------------
trunk/epydoc/src/epydoc/markup/epytext.py
trunk/epydoc/src/epydoc/test/epytext.doctest
Modified: trunk/epydoc/src/epydoc/markup/epytext.py
===================================================================
--- trunk/epydoc/src/epydoc/markup/epytext.py 2006-09-02 01:23:23 UTC (rev 1343)
+++ trunk/epydoc/src/epydoc/markup/epytext.py 2006-09-02 01:40:35 UTC (rev 1344)
@@ -9,7 +9,8 @@
"""
Parser for epytext strings. Epytext is a lightweight markup whose
primary intended application is Python documentation strings. This
-parser converts Epytext strings to a XML/DOM representation. Epytext
+parser converts Epytext strings to a simple DOM-like representation
+(encoded as a tree of L{Element} objects and strings). Epytext
strings can contain the following X{structural blocks}:
- X{epytext}: The top-level element of the DOM tree.
@@ -105,13 +106,48 @@
# 5. testing
import re, string, types, sys, os.path
-from xml.dom.minidom import Document, Text
-import xml.dom.minidom
from epydoc.markup import *
from epydoc.util import wordwrap, plaintext_to_html, plaintext_to_latex
from epydoc.markup.doctest import doctest_to_html, doctest_to_latex
##################################################
+## DOM-Like Encoding
+##################################################
+
+class Element:
+ """
+ A very simple DOM-like representation for parsed epytext
+ documents. Each epytext document is encoded as a tree whose nodes
+ are L{Element} objects, and whose leaves are C{string}s. Each
+ node is marked by a I{tag} and zero or more I{attributes}. Each
+ attribute is a mapping from a string key to a string value.
+ """
+ def __init__(self, tag, *children, **attribs):
+ self.tag = tag
+ """A string tag indicating the type of this element.
+ @type: C{string}"""
+
+ self.children = list(children)
+ """A list of the children of this element.
+ @type: C{list} of (C{string} or C{Element})"""
+
+ self.attribs = attribs
+ """A dictionary mapping attribute names to attribute values
+ for this element.
+ @type: C{dict} from C{string} to C{string}"""
+
+ def __str__(self):
+ """
+ Return a string representation of this element, using XML
+ notation.
+ @bug: Doesn't escape '<' or '&' or '>'.
+ """
+ attribs = ''.join([' %s=%r' % t for t in self.attribs.items()])
+ return ('<%s%s>' % (self.tag, attribs) +
+ ''.join([str(child) for child in self.children]) +
+ '</%s>' % self.tag)
+
+##################################################
## Constants
##################################################
@@ -196,7 +232,7 @@
ignored.
@type errors: C{list} of L{ParseError}
@return: a DOM tree encoding the contents of an epytext string.
- @rtype: L{xml.dom.minidom.Document}
+ @rtype: C{Element}
@raise ParseError: If C{errors} is C{None} and an error is
encountered while parsing.
"""
@@ -217,8 +253,8 @@
# Have we encountered a field yet?
encountered_field = 0
- # Create an XML document to hold the epytext.
- doc = Document()
+ # Create an document to hold the epytext.
+ doc = Element('epytext')
# Maintain two parallel stacks: one contains DOM elements, and
# gives the ancestors of the current block. The other contains
@@ -230,13 +266,13 @@
# corresponds to). No 2 consecutive indent_stack values will be
# ever be "None." Use initial dummy elements in the stack, so we
# don't have to worry about bounds checking.
- stack = [None, doc.createElement('epytext')]
+ stack = [None, doc]
indent_stack = [-1, None]
for token in tokens:
# Uncomment this for debugging:
#print ('%s: %s\n%s: %s\n' %
- # (''.join(['%-11s' % (t and t.tagName) for t in stack]),
+ # (''.join(['%-11s' % (t and t.tag) for t in stack]),
# token.tag, ''.join(['%-11s' % i for i in indent_stack]),
# token.indent))
@@ -253,11 +289,11 @@
# If Token has type LBLOCK, add the new literal block
elif token.tag == Token.LBLOCK:
- stack[-1].appendChild(token.to_dom(doc))
+ stack[-1].children.append(token.to_dom(doc))
# If Token has type DTBLOCK, add the new doctest block
elif token.tag == Token.DTBLOCK:
- stack[-1].appendChild(token.to_dom(doc))
+ stack[-1].children.append(token.to_dom(doc))
# If Token has type BULLET, add the new list/list item/field
elif token.tag == Token.BULLET:
@@ -266,7 +302,7 @@
assert 0, 'Unknown token type: '+token.tag
# Check if the DOM element we just added was a field..
- if stack[-1].tagName == 'field':
+ if stack[-1].tag == 'field':
encountered_field = 1
elif encountered_field == 1:
if len(stack) <= 3:
@@ -282,7 +318,6 @@
return None
# Return the top-level epytext DOM element.
- doc.appendChild(stack[1])
return doc
def _pop_completed_blocks(token, stack, indent_stack):
@@ -305,10 +340,10 @@
# Dedent to a list item, if it is follwed by another list
# item with the same indentation.
elif (token.tag == 'bullet' and indent==indent_stack[-2] and
- stack[-1].tagName in ('li', 'field')): pop=1
+ stack[-1].tag in ('li', 'field')): pop=1
# End of a list (no more list items available)
- elif (stack[-1].tagName in ('ulist', 'olist') and
+ elif (stack[-1].tag in ('ulist', 'olist') and
(token.tag != 'bullet' or token.contents[-1] == ':')):
pop=1
@@ -326,7 +361,7 @@
if para_token.indent == indent_stack[-1]:
# Colorize the paragraph and add it.
para = _colorize(doc, para_token, errors)
- stack[-1].appendChild(para)
+ stack[-1].children.append(para)
else:
estr = "Improper paragraph indentation."
errors.append(StructuringError(estr, para_token.startline))
@@ -341,7 +376,7 @@
# Check for errors.
for tok in stack[2:]:
- if tok.tagName != "section":
+ if tok.tag != "section":
estr = "Headings must occur at the top level."
errors.append(StructuringError(estr, heading_token.startline))
break
@@ -358,10 +393,10 @@
head = _colorize(doc, heading_token, errors, 'heading')
# Add the section's and heading's DOM elements.
- sec = doc.createElement("section")
- stack[-1].appendChild(sec)
+ sec = Element("section")
+ stack[-1].children.append(sec)
stack.append(sec)
- sec.appendChild(head)
+ sec.children.append(head)
indent_stack.append(None)
def _add_list(doc, bullet_token, stack, indent_stack, errors):
@@ -382,11 +417,11 @@
# Is this a new list?
newlist = 0
- if stack[-1].tagName != list_type:
+ if stack[-1].tag != list_type:
newlist = 1
- elif list_type == 'olist' and stack[-1].tagName == 'olist':
- old_listitem = stack[-1].childNodes[-1]
- old_bullet = old_listitem.getAttribute("bullet").split('.')[:-1]
+ elif list_type == 'olist' and stack[-1].tag == 'olist':
+ old_listitem = stack[-1].children[-1]
+ old_bullet = old_listitem.attribs.get("bullet").split('.')[:-1]
new_bullet = bullet_token.contents.split('.')[:-1]
if (new_bullet[:-1] != old_bullet[:-1] or
int(new_bullet[-1]) != int(old_bullet[-1])+1):
@@ -394,7 +429,7 @@
# Create the new list.
if newlist:
- if stack[-1].tagName is 'fieldlist':
+ if stack[-1].tag is 'fieldlist':
# The new list item is not a field list item (since this
# is a new list); but it's indented the same as the field
# list. This either means that they forgot to indent the
@@ -403,7 +438,7 @@
# just warn about that (to avoid confusion).
estr = "Lists must be indented."
errors.append(StructuringError(estr, bullet_token.startline))
- if stack[-1].tagName in ('ulist', 'olist', 'fieldlist'):
+ if stack[-1].tag in ('ulist', 'olist', 'fieldlist'):
stack.pop()
indent_stack.pop()
@@ -419,7 +454,7 @@
if list_type == 'fieldlist':
# Fieldlist should be at the top-level.
for tok in stack[2:]:
- if tok.tagName != "section":
+ if tok.tag != "section":
estr = "Fields must be at the top level."
errors.append(
StructuringError(estr, bullet_token.startline))
@@ -428,41 +463,40 @@
indent_stack[2:] = []
# Add the new list.
- lst = doc.createElement(list_type)
- stack[-1].appendChild(lst)
+ lst = Element(list_type)
+ stack[-1].children.append(lst)
stack.append(lst)
indent_stack.append(bullet_token.indent)
if list_type == 'olist':
start = bullet_token.contents.split('.')[:-1]
if start != '1':
- lst.setAttribute("start", start[-1])
+ lst.attribs["start"] = start[-1]
# Fields are treated somewhat specially: A "fieldlist"
# node is created to make the parsing simpler, but fields
# are adjoined directly into the "epytext" node, not into
# the "fieldlist" node.
if list_type == 'fieldlist':
- li = doc.createElement("field")
+ li = Element("field")
token_words = bullet_token.contents[1:-1].split(None, 1)
- tag_elt = doc.createElement("tag")
- tag_elt.appendChild(doc.createTextNode(token_words[0]))
- li.appendChild(tag_elt)
+ tag_elt = Element("tag")
+ tag_elt.children.append(token_words[0])
+ li.children.append(tag_elt)
if len(token_words) > 1:
- arg_elt = doc.createElement("arg")
- arg_elt.appendChild(doc.createTextNode(token_words[1]))
- li.appendChild(arg_elt)
+ arg_elt = Element("arg")
+ arg_elt.children.append(token_words[1])
+ li.children.append(arg_elt)
else:
- li = doc.createElement("li")
+ li = Element("li")
if list_type == 'olist':
- li.setAttribute("bullet", bullet_token.contents)
+ li.attribs["bullet"] = bullet_token.contents
# Add the bullet.
- stack[-1].appendChild(li)
+ stack[-1].children.append(li)
stack.append(li)
indent_stack.append(None)
-
##################################################
## Tokenization
##################################################
@@ -570,10 +604,10 @@
def to_dom(self, doc):
"""
@return: a DOM representation of this C{Token}.
- @rtype: L{xml.dom.minidom.Element}
+ @rtype: L{Element}
"""
- e = doc.createElement(self.tag)
- e.appendChild(doc.createTextNode(self.contents))
+ e = Element(self.tag)
+ e.children.append(self.contents)
return e
# Construct regular expressions for recognizing bullets. These are
@@ -941,7 +975,7 @@
# the text currently being analyzed. New elements are pushed when
# "{" is encountered, and old elements are popped when "}" is
# encountered.
- stack = [doc.createElement(tagName)]
+ stack = [Element(tagName)]
# This is just used to make error-reporting friendlier. It's a
# stack parallel to "stack" containing the index of each element's
@@ -967,20 +1001,20 @@
if match.group() == '{':
if (end>0) and 'A' <= str[end-1] <= 'Z':
if (end-1) > start:
- stack[-1].appendChild(doc.createTextNode(str[start:end-1]))
+ stack[-1].children.append(str[start:end-1])
if not _COLORIZING_TAGS.has_key(str[end-1]):
estr = "Unknown inline markup tag."
errors.append(ColorizingError(estr, token, end-1))
- stack.append(doc.createElement('unknown'))
+ stack.append(Element('unknown'))
else:
tag = _COLORIZING_TAGS[str[end-1]]
- stack.append(doc.createElement(tag))
+ stack.append(Element(tag))
else:
if end > start:
- stack[-1].appendChild(doc.createTextNode(str[start:end]))
- stack.append(doc.createElement('litbrace'))
+ stack[-1].children.append(str[start:end])
+ stack.append(Element('litbrace'))
openbrace_stack.append(end)
- stack[-2].appendChild(stack[-1])
+ stack[-2].children.append(stack[-1])
# Close braces end colorizing elements.
elif match.group() == '}':
@@ -993,62 +1027,51 @@
# Add any remaining text.
if end > start:
- stack[-1].appendChild(doc.createTextNode(str[start:end]))
+ stack[-1].children.append(str[start:end])
# Special handling for symbols:
- if stack[-1].tagName == 'symbol':
- if (len(stack[-1].childNodes) != 1 or
- not isinstance(stack[-1].childNodes[0], Text)):
+ if stack[-1].tag == 'symbol':
+ if (len(stack[-1].children) != 1 or
+ not isinstance(stack[-1].children[0], basestring)):
estr = "Invalid symbol code."
errors.append(ColorizingError(estr, token, end))
else:
- symb = stack[-1].childNodes[0].data
+ symb = stack[-1].children[0]
if _SYMBOLS.has_key(symb):
# It's a symbol
- symbol = doc.createElement('symbol')
- stack[-2].removeChild(stack[-1])
- stack[-2].appendChild(symbol)
- symbol.appendChild(doc.createTextNode(symb))
+ stack[-2].children[-1] = Element('symbol', symb)
else:
estr = "Invalid symbol code."
errors.append(ColorizingError(estr, token, end))
# Special handling for escape elements:
- if stack[-1].tagName == 'escape':
- if (len(stack[-1].childNodes) != 1 or
- not isinstance(stack[-1].childNodes[0], Text)):
+ if stack[-1].tag == 'escape':
+ if (len(stack[-1].children) != 1 or
+ not isinstance(stack[-1].children[0], basestring)):
estr = "Invalid escape code."
errors.append(ColorizingError(estr, token, end))
else:
- escp = stack[-1].childNodes[0].data
+ escp = stack[-1].children[0]
if _ESCAPES.has_key(escp):
# It's an escape from _ESCPAES
- stack[-2].removeChild(stack[-1])
- escp = _ESCAPES[escp]
- stack[-2].appendChild(doc.createTextNode(escp))
+ stack[-2].children[-1] = _ESCAPES[escp]
elif len(escp) == 1:
# It's a single-character escape (eg E{.})
- stack[-2].removeChild(stack[-1])
- stack[-2].appendChild(doc.createTextNode(escp))
+ stack[-2].children[-1] = escp
else:
estr = "Invalid escape code."
errors.append(ColorizingError(estr, token, end))
# Special handling for literal braces elements:
- if stack[-1].tagName == 'litbrace':
- variables = stack[-1].childNodes
- stack[-2].removeChild(stack[-1])
- stack[-2].appendChild(doc.createTextNode('{'))
- for child in variables:
- stack[-2].appendChild(child)
- stack[-2].appendChild(doc.createTextNode('}'))
+ if stack[-1].tag == 'litbrace':
+ stack[-2].children = ['{'] + stack[-1].children + ['}']
# Special handling for graphs:
- if stack[-1].tagName == 'graph':
+ if stack[-1].tag == 'graph':
_colorize_graph(doc, stack[-1], token, end, errors)
# Special handling for link-type elements:
- if stack[-1].tagName in _LINK_COLORIZING_TAGS:
+ if stack[-1].tag in _LINK_COLORIZING_TAGS:
_colorize_link(doc, stack[-1], token, end, errors)
# Pop the completed element.
@@ -1059,7 +1082,7 @@
# Add any final text.
if start < len(str):
- stack[-1].appendChild(doc.createTextNode(str[start:]))
+ stack[-1].children.append(str[start:])
if len(stack) != 1:
estr = "Unbalanced '{'."
@@ -1078,13 +1101,13 @@
"""
bad_graph_spec = False
- children = graph.childNodes[:]
- for child in children: graph.removeChild(child)
+ children = graph.children[:]
+ graph.children = []
- if len(children) != 1 or not isinstance(children[0], Text):
+ if len(children) != 1 or not isinstance(children[0], basestring):
bad_graph_spec = "Bad graph specification"
else:
- pieces = children[0].data.split(None, 1)
+ pieces = children[0].split(None, 1)
graphtype = pieces[0].replace(':','').strip().lower()
if graphtype in GRAPH_TYPES:
if len(pieces) == 2:
@@ -1100,51 +1123,49 @@
if bad_graph_spec:
errors.append(ColorizingError(bad_graph_spec, token, end))
- graph.appendChild(doc.createTextNode('none'))
- graph.appendChild(doc.createTextNode(''))
+ graph.children.append('none')
+ graph.children.append('')
return
- graph.appendChild(doc.createTextNode(graphtype))
+ graph.children.append(graphtype)
for arg in args:
- graph.appendChild(doc.createTextNode(arg))
+ graph.children.append(arg)
def _colorize_link(doc, link, token, end, errors):
- variables = link.childNodes[:]
+ variables = link.children[:]
# If the last child isn't text, we know it's bad.
- if len(variables)==0 or not isinstance(variables[-1], Text):
- estr = "Bad %s target." % link.tagName
+ if len(variables)==0 or not isinstance(variables[-1], basestring):
+ estr = "Bad %s target." % link.tag
errors.append(ColorizingError(estr, token, end))
return
# Did they provide an explicit target?
- match2 = _TARGET_RE.match(variables[-1].data)
+ match2 = _TARGET_RE.match(variables[-1])
if match2:
(text, target) = match2.groups()
- variables[-1].data = text
+ variables[-1] = text
# Can we extract an implicit target?
elif len(variables) == 1:
- target = variables[0].data
+ target = variables[0]
else:
- estr = "Bad %s target." % link.tagName
+ estr = "Bad %s target." % link.tag
errors.append(ColorizingError(estr, token, end))
return
# Construct the name element.
- name_elt = doc.createElement('name')
- for child in variables:
- name_elt.appendChild(link.removeChild(child))
+ name_elt = Element('name', *variables)
# Clean up the target. For URIs, assume http or mailto if they
# don't specify (no relative urls)
target = re.sub(r'\s', '', target)
- if link.tagName=='uri':
+ if link.tag=='uri':
if not re.match(r'\w+:', target):
if re.match(r'\w+@(\w+)(\.\w+)*', target):
target = 'mailto:' + target
else:
target = 'http://'+target
- elif link.tagName=='link':
+ elif link.tag=='link':
# Remove arg lists for functions (e.g., L{_colorize_link()})
target = re.sub(r'\(.*\)$', '', target)
if not re.match(r'^[a-zA-Z_]\w*(\.[a-zA-Z_]\w*)*$', target):
@@ -1153,12 +1174,10 @@
return
# Construct the target element.
- target_elt = doc.createElement('target')
- target_elt.appendChild(doc.createTextNode(target))
+ target_elt = Element('target', target)
# Add them to the link element.
- link.appendChild(name_elt)
- link.appendChild(target_elt)
+ link.children = [name_elt, target_elt]
##################################################
## Formatters
@@ -1176,7 +1195,7 @@
- C{to_epytext(parse(str)) == str} (approximately)
@param tree: A DOM document encoding of an epytext string.
- @type tree: L{xml.dom.minidom.Document}
+ @type tree: C{Element}
@param indent: The indentation for the string representation of
C{tree}. Each line of the returned string will begin with
C{indent} space characters.
@@ -1187,22 +1206,20 @@
@return: The epytext string corresponding to C{tree}.
@rtype: C{string}
"""
- if isinstance(tree, Document):
- return to_epytext(tree.childNodes[0], indent, seclevel)
- if isinstance(tree, Text):
- str = re.sub(r'\{', '\0', tree.data)
+ if isinstance(tree, basestring):
+ str = re.sub(r'\{', '\0', tree)
str = re.sub(r'\}', '\1', str)
return str
- if tree.tagName == 'epytext': indent -= 2
- if tree.tagName == 'section': seclevel += 1
- variables = [to_epytext(c, indent+2, seclevel) for c in tree.childNodes]
+ if tree.tag == 'epytext': indent -= 2
+ if tree.tag == 'section': seclevel += 1
+ variables = [to_epytext(c, indent+2, seclevel) for c in tree.children]
childstr = ''.join(variables)
# Clean up for literal blocks (add the double "::" back)
childstr = re.sub(':(\s*)\2', '::\\1', childstr)
- if tree.tagName == 'para':
+ if tree.tag == 'para':
str = wordwrap(childstr, indent)+'\n'
str = re.sub(r'((^|\n)\s*\d+)\.', r'\1E{.}', str)
str = re.sub(r'((^|\n)\s*)-', r'\1E{-}', str)
@@ -1211,49 +1228,47 @@
str = re.sub('\0', 'E{lb}', str)
str = re.sub('\1', 'E{rb}', str)
return str
- elif tree.tagName == 'li':
- bulletAttr = tree.getAttributeNode('bullet')
- if bulletAttr: bullet = bulletAttr.value
- else: bullet = '-'
+ elif tree.tag == 'li':
+ bullet = tree.attribs.get('bullet') or '-'
return indent*' '+ bullet + ' ' + childstr.lstrip()
- elif tree.tagName == 'heading':
+ elif tree.tag == 'heading':
str = re.sub('\0', 'E{lb}',childstr)
str = re.sub('\1', 'E{rb}', str)
uline = len(childstr)*_HEADING_CHARS[seclevel-1]
return (indent-2)*' ' + str + '\n' + (indent-2)*' '+uline+'\n'
- elif tree.tagName == 'doctestblock':
+ elif tree.tag == 'doctestblock':
str = re.sub('\0', '{', childstr)
str = re.sub('\1', '}', str)
lines = [' '+indent*' '+line for line in str.split('\n')]
return '\n'.join(lines) + '\n\n'
- elif tree.tagName == 'literalblock':
+ elif tree.tag == 'literalblock':
str = re.sub('\0', '{', childstr)
str = re.sub('\1', '}', str)
lines = [(indent+1)*' '+line for line in str.split('\n')]
return '\2' + '\n'.join(lines) + '\n\n'
- elif tree.tagName == 'field':
+ elif tree.tag == 'field':
numargs = 0
- while tree.childNodes[numargs+1].tagName == 'arg': numargs += 1
+ while tree.children[numargs+1].tag == 'arg': numargs += 1
tag = variables[0]
args = variables[1:1+numargs]
body = variables[1+numargs:]
str = (indent)*' '+'@'+variables[0]
if args: str += '(' + ', '.join(args) + ')'
return str + ':\n' + ''.join(body)
- elif tree.tagName == 'target':
+ elif tree.tag == 'target':
return '<%s>' % childstr
- elif tree.tagName in ('fieldlist', 'tag', 'arg', 'epytext',
+ elif tree.tag in ('fieldlist', 'tag', 'arg', 'epytext',
'section', 'olist', 'ulist', 'name'):
return childstr
- elif tree.tagName == 'symbol':
+ elif tree.tag == 'symbol':
return 'E{%s}' % childstr
- elif tree.tagName == 'graph':
+ elif tree.tag == 'graph':
return 'G{%s}' % ' '.join(variables)
else:
for (tag, name) in _COLORIZING_TAGS.items():
- if name == tree.tagName:
+ if name == tree.tag:
return '%s{%s}' % (tag, childstr)
- raise ValueError('Unknown DOM element %r' % tree.tagName)
+ raise ValueError('Unknown DOM element %r' % tree.tag)
def to_plaintext(tree, indent=0, seclevel=0):
"""
@@ -1263,7 +1278,7 @@
escaped characters in unescaped form, etc.
@param tree: A DOM document encoding of an epytext string.
- @type tree: L{xml.dom.minidom.Document}
+ @type tree: C{Element}
@param indent: The indentation for the string representation of
C{tree}. Each line of the returned string will begin with
C{indent} space characters.
@@ -1274,67 +1289,63 @@
@return: The epytext string corresponding to C{tree}.
@rtype: C{string}
"""
- if isinstance(tree, Document):
- return to_plaintext(tree.childNodes[0], indent, seclevel)
- if isinstance(tree, Text): return tree.data
+ if isinstance(tree, basestring): return tree
- if tree.tagName == 'section': seclevel += 1
+ if tree.tag == 'section': seclevel += 1
# Figure out the child indent level.
- if tree.tagName == 'epytext': cindent = indent
- elif tree.tagName == 'li' and tree.getAttributeNode('bullet'):
- cindent = indent + 1 + len(tree.getAttributeNode('bullet').value)
+ if tree.tag == 'epytext': cindent = indent
+ elif tree.tag == 'li' and tree.attribs.get('bullet'):
+ cindent = indent + 1 + len(tree.attribs.get('bullet'))
else:
cindent = indent + 2
- variables = [to_plaintext(c, cindent, seclevel) for c in tree.childNodes]
+ variables = [to_plaintext(c, cindent, seclevel) for c in tree.children]
childstr = ''.join(variables)
- if tree.tagName == 'para':
+ if tree.tag == 'para':
return wordwrap(childstr, indent)+'\n'
- elif tree.tagName == 'li':
+ elif tree.tag == 'li':
# We should be able to use getAttribute here; but there's no
# convenient way to test if an element has an attribute..
- bulletAttr = tree.getAttributeNode('bullet')
- if bulletAttr: bullet = bulletAttr.value
- else: bullet = '-'
+ bullet = tree.attribs.get('bullet') or '-'
return indent*' ' + bullet + ' ' + childstr.lstrip()
- elif tree.tagName == 'heading':
+ elif tree.tag == 'heading':
uline = len(childstr)*_HEADING_CHARS[seclevel-1]
return ((indent-2)*' ' + childstr + '\n' +
(indent-2)*' ' + uline + '\n')
- elif tree.tagName == 'doctestblock':
+ elif tree.tag == 'doctestblock':
lines = [(indent+2)*' '+line for line in childstr.split('\n')]
return '\n'.join(lines) + '\n\n'
- elif tree.tagName == 'literalblock':
+ elif tree.tag == 'literalblock':
lines = [(indent+1)*' '+line for line in childstr.split('\n')]
return '\n'.join(lines) + '\n\n'
- elif tree.tagName == 'fieldlist':
+ elif tree.tag == 'fieldlist':
return childstr
- elif tree.tagName == 'field':
+ elif tree.tag == 'field':
numargs = 0
- while tree.childNodes[numargs+1].tagName == 'arg': numargs += 1
+ while tree.children[numargs+1].tag == 'arg': numargs += 1
tag = variables[0]
args = variables[1:1+numargs]
body = variables[1+numargs:]
str = (indent)*' '+'@'+variables[0]
if args: str += '(' + ', '.join(args) + ')'
return str + ':\n' + ''.join(body)
- elif tree.tagName == 'uri':
+ elif tree.tag == 'uri':
if len(variables) != 2: raise ValueError('Bad URI ')
elif variables[0] == variables[1]: return '<%s>' % variables[1]
else: return '%r<%s>' % (variables[0], variables[1])
- elif tree.tagName == 'link':
+ elif tree.tag == 'link':
if len(variables) != 2: raise ValueError('Bad Link')
return '%s' % variables[0]
- elif tree.tagName in ('olist', 'ulist'):
+ elif tree.tag in ('olist', 'ulist'):
# [xx] always use condensed lists.
## Use a condensed list if each list item is 1 line long.
#for child in variables:
# if child.count('\n') > 2: return childstr
return childstr.replace('\n\n', '\n')+'\n'
- elif tree.tagName == 'symbol':
+ elif tree.tag == 'symbol':
return '%s' % childstr
- elif tree.tagName == 'graph':
+ elif tree.tag == 'graph':
return '<<%s graph: %s>>' % (variables[0], ', '.join(variables[1:]))
else:
# Assume that anything else can be passed through.
@@ -1348,7 +1359,7 @@
where different blocks begin, along the left margin.
@param tree: A DOM document encoding of an epytext string.
- @type tree: L{xml.dom.minidom.Document}
+ @type tree: C{Element}
@param indent: The indentation for the string representation of
C{tree}. Each line of the returned string will begin with
C{indent} space characters.
@@ -1359,21 +1370,19 @@
@return: The epytext string corresponding to C{tree}.
@rtype: C{string}
"""
- if isinstance(tree, Document):
- return to_debug(tree.childNodes[0], indent, seclevel)
- if isinstance(tree, Text):
- str = re.sub(r'\{', '\0', tree.data)
+ if isinstance(tree, basestring):
+ str = re.sub(r'\{', '\0', tree)
str = re.sub(r'\}', '\1', str)
return str
- if tree.tagName == 'section': seclevel += 1
- variables = [to_debug(c, indent+2, seclevel) for c in tree.childNodes]
+ if tree.tag == 'section': seclevel += 1
+ variables = [to_debug(c, indent+2, seclevel) for c in tree.children]
childstr = ''.join(variables)
# Clean up for literal blocks (add the double "::" back)
childstr = re.sub(':( *\n \|\n)\2', '::\\1', childstr)
- if tree.tagName == 'para':
+ if tree.tag == 'para':
str = wordwrap(childstr, indent-6, 69)+'\n'
str = re.sub(r'((^|\n)\s*\d+)\.', r'\1E{.}', str)
str = re.sub(r'((^|\n)\s*)-', r'\1E{-}', str)
@@ -1385,54 +1394,52 @@
lines[0] = ' P>|' + lines[0]
lines[1:] = [' |'+l for l in lines[1:]]
return '\n'.join(lines)+'\n |\n'
- elif tree.tagName == 'li':
- bulletAttr = tree.getAttributeNode('bullet')
- if bulletAttr: bullet = bulletAttr.value
- else: bullet = '-'
+ elif tree.tag == 'li':
+ bullet = tree.attribs.get('bullet') or '-'
return ' LI>|'+ (indent-6)*' '+ bullet + ' ' + childstr[6:].lstrip()
- elif tree.tagName in ('olist', 'ulist'):
+ elif tree.tag in ('olist', 'ulist'):
return 'LIST>|'+(indent-4)*' '+childstr[indent+2:]
- elif tree.tagName == 'heading':
+ elif tree.tag == 'heading':
str = re.sub('\0', 'E{lb}', childstr)
str = re.sub('\1', 'E{rb}', str)
uline = len(childstr)*_HEADING_CHARS[seclevel-1]
return ('SEC'+`seclevel`+'>|'+(indent-8)*' ' + str + '\n' +
' |'+(indent-8)*' ' + uline + '\n')
- elif tree.tagName == 'doctestblock':
+ elif tree.tag == 'doctestblock':
str = re.sub('\0', '{', childstr)
str = re.sub('\1', '}', str)
lines = [' |'+(indent-4)*' '+line for line in str.split('\n')]
lines[0] = 'DTST>'+lines[0][5:]
return '\n'.join(lines) + '\n |\n'
- elif tree.tagName == 'literalblock':
+ elif tree.tag == 'literalblock':
str = re.sub('\0', '{', childstr)
str = re.sub('\1', '}', str)
lines = [' |'+(indent-5)*' '+line for line in str.split('\n')]
lines[0] = ' LIT>'+lines[0][5:]
return '\2' + '\n'.join(lines) + '\n |\n'
- elif tree.tagName == 'field':
+ elif tree.tag == 'field':
numargs = 0
- while tree.childNodes[numargs+1].tagName == 'arg': numargs += 1
+ while tree.children[numargs+1].tag == 'arg': numargs += 1
tag = variables[0]
args = variables[1:1+numargs]
body = variables[1+numargs:]
str = ' FLD>|'+(indent-6)*' '+'@'+variables[0]
if args: str += '(' + ', '.join(args) + ')'
return str + ':\n' + ''.join(body)
- elif tree.tagName == 'target':
+ elif tree.tag == 'target':
return '<%s>' % childstr
- elif tree.tagName in ('fieldlist', 'tag', 'arg', 'epytext',
+ elif tree.tag in ('fieldlist', 'tag', 'arg', 'epytext',
'section', 'olist', 'ulist', 'name'):
return childstr
- elif tree.tagName == 'symbol':
+ elif tree.tag == 'symbol':
return 'E{%s}' % childstr
- elif tree.tagName == 'graph':
+ elif tree.tag == 'graph':
return 'G{%s}' % ' '.join(variables)
else:
for (tag, name) in _COLORIZING_TAGS.items():
- if name == tree.tagName:
+ if name == tree.tag:
return '%s{%s}' % (tag, childstr)
- raise ValueError('Unknown DOM element %r' % tree.tagName)
+ raise ValueError('Unknown DOM element %r' % tree.tag)
##################################################
## Top-Level Wrapper function
@@ -1455,7 +1462,7 @@
written to.
@type stream: C{stream}
@return: a DOM document encoding the contents of C{str}.
- @rtype: L{xml.dom.minidom.Document}
+ @rtype: C{Element}
@raise SyntaxError: If any fatal errors were encountered.
"""
errors = []
@@ -1556,15 +1563,9 @@
@return: A DOM document containing C{str} in a single literal
block.
- @rtype: L{xml.dom.minidom.Document}
+ @rtype: C{Element}
"""
- doc = Document()
- epytext = doc.createElement('epytext')
- lit = doc.createElement('literalblock')
- doc.appendChild(epytext)
- epytext.appendChild(lit)
- lit.appendChild(doc.createTextNode(str))
- return doc
+ return Element('epytext', Element('literalblock', str))
def parse_as_para(str):
"""
@@ -1578,15 +1579,9 @@
@type str: C{string}
@return: A DOM document containing C{str} in a single paragraph.
- @rtype: L{xml.dom.minidom.Document}
+ @rtype: C{Element}
"""
- doc = Document()
- epytext = doc.createElement('epytext')
- para = doc.createElement('para')
- doc.appendChild(epytext)
- epytext.appendChild(para)
- para.appendChild(doc.createTextNode(str))
- return doc
+ return Element('epytext', Element('para', str))
#################################################################
## SUPPORT FOR EPYDOC
@@ -1710,8 +1705,6 @@
}
def __init__(self, dom_tree):
- if isinstance(dom_tree, Document):
- dom_tree = dom_tree.childNodes[0]
self._tree = dom_tree
# Caching:
self._html = self._latex = self._plaintext = None
@@ -1752,84 +1745,81 @@
def _to_html(self, tree, linker, directory, docindex, context,
indent=0, seclevel=0):
- if isinstance(tree, Text):
- return plaintext_to_html(tree.data)
+ if isinstance(tree, basestring):
+ return plaintext_to_html(tree)
- if tree.tagName == 'epytext': indent -= 2
- if tree.tagName == 'section': seclevel += 1
+ if tree.tag == 'epytext': indent -= 2
+ if tree.tag == 'section': seclevel += 1
# Process the variables first.
variables = [self._to_html(c, linker, directory, docindex, context,
indent+2, seclevel)
- for c in tree.childNodes]
+ for c in tree.children]
# Get rid of unnecessary <P>...</P> tags; they introduce extra
# space on most browsers that we don't want.
for i in range(len(variables)-1):
- if (not isinstance(tree.childNodes[i], Text) and
- tree.childNodes[i].tagName == 'para' and
- (isinstance(tree.childNodes[i+1], Text) or
- tree.childNodes[i+1].tagName != 'para')):
+ if (not isinstance(tree.children[i], basestring) and
+ tree.children[i].tag == 'para' and
+ (isinstance(tree.children[i+1], basestring) or
+ tree.children[i+1].tag != 'para')):
variables[i] = ' '*(indent+2)+variables[i][5+indent:-5]+'\n'
- if (tree.hasChildNodes() and
- not isinstance(tree.childNodes[-1], Text) and
- tree.childNodes[-1].tagName == 'para'):
+ if (tree.children and
+ not isinstance(tree.children[-1], basestring) and
+ tree.children[-1].tag == 'para'):
variables[-1] = ' '*(indent+2)+variables[-1][5+indent:-5]+'\n'
# Construct the HTML string for the variables.
childstr = ''.join(variables)
# Perform the approriate action for the DOM tree type.
- if tree.tagName == 'para':
+ if tree.tag == 'para':
return wordwrap('<p>%s</p>' % childstr, indent)
- elif tree.tagName == 'code':
+ elif tree.tag == 'code':
return '<code>%s</code>' % childstr
- elif tree.tagName == 'uri':
+ elif tree.tag == 'uri':
return ('<a href="%s" target="_top">%s</a>' %
(variables[1], variables[0]))
- elif tree.tagName == 'link':
+ elif tree.tag == 'link':
return linker.translate_identifier_xref(variables[1], variables[0])
- elif tree.tagName == 'italic':
+ elif tree.tag == 'italic':
return '<i>%s</i>' % childstr
- elif tree.tagName == 'math':
+ elif tree.tag == 'math':
return '<i class="math">%s</i>' % childstr
- elif tree.tagName == 'indexed':
- term = tree.cloneNode(1)
- term.tagName = 'epytext'
+ elif tree.tag == 'indexed':
+ term = Element('epytext', *tree.children, **tree.attribs)
return linker.translate_indexterm(ParsedEpytextDocstring(term))
#term_key = self._index_term_key(tree)
#return linker.translate_indexterm(childstr, term_key)
- elif tree.tagName == 'bold':
+ elif tree.tag == 'bold':
return '<b>%s</b>' % childstr
- elif tree.tagName == 'ulist':
+ elif tree.tag == 'ulist':
return '%s<ul>\n%s%s</ul>\n' % (indent*' ', childstr, indent*' ')
- elif tree.tagName == 'olist':
- startAttr = tree.getAttributeNode('start')
- if startAttr: start = ' start="%s"' % startAttr.value
- else: start = ''
+ elif tree.tag == 'olist':
+ start = tree.attribs.get('start') or ''
return ('%s<ol%s>\n%s%s</ol>\n' %
(indent*' ', start, childstr, indent*' '))
- elif tree.tagName == 'li':
+ elif tree.tag == 'li':
return indent*' '+'<li>\n%s%s</li>\n' % (childstr, indent*' ')
- elif tree.tagName == 'heading':
+ elif tree.tag == 'heading':
return ('%s<h%s class="heading">%s</h%s>\n' %
((indent-2)*' ', seclevel, childstr, seclevel))
- elif tree.tagName == 'literalblock':
+ elif tree.tag == 'literalblock':
return '<pre class="literalblock">\n%s\n</pre>\n' % childstr
- elif tree.tagName == 'doctestblock':
- return doctest_to_html(tree.childNodes[0].data.strip())
- elif tree.tagName == 'fieldlist':
+ elif tree.tag == 'doctestblock':
+ return doctest_to_html(tree.children[0].strip())
+ elif tree.tag == 'fieldlist':
raise AssertionError("There should not be any field lists left")
- elif tree.tagName in ('epytext', 'section', 'tag', 'arg',
+ elif tree.tag in ('epytext', 'section', 'tag', 'arg',
'name', 'target', 'html'):
return childstr
- elif tree.tagName == 'symbol':
- symbol = tree.childNodes[0].data
+ elif tree.tag == 'symbol':
+ symbol = tree.children[0]
if self.SYMBOL_TO_HTML.has_key(symbol):
return '&%s;' % self.SYMBOL_TO_HTML[symbol]
else:
return '[??]'
- elif tree.tagName == 'graph':
+ elif tree.tag == 'graph':
# Generate the graph.
graph = self._build_graph(variables[0], variables[1:], linker,
docindex, context)
@@ -1839,7 +1829,7 @@
image_file = os.path.join(directory, image_url)
return graph.to_html(image_file, image_url)
else:
- raise ValueError('Unknown epytext DOM element %r' % tree.tagName)
+ raise ValueError('Unknown epytext DOM element %r' % tree.tag)
#GRAPH_TYPES = ['classtree', 'packagetree', 'importgraph']
def _build_graph(self, graph_type, graph_args, linker,
@@ -1883,27 +1873,27 @@
def _to_latex(self, tree, linker, indent=0, seclevel=0, breakany=0):
- if isinstance(tree, Text):
- return plaintext_to_latex(tree.data, breakany=breakany)
+ if isinstance(tree, basestring):
+ return plaintext_to_latex(tree, breakany=breakany)
- if tree.tagName == 'section': seclevel += 1
+ if tree.tag == 'section': seclevel += 1
# Figure out the child indent level.
- if tree.tagName == 'epytext': cindent = indent
+ if tree.tag == 'epytext': cindent = indent
else: cindent = indent + 2
variables = [self._to_latex(c, linker, cindent, seclevel, breakany)
- for c in tree.childNodes]
+ for c in tree.children]
childstr = ''.join(variables)
- if tree.tagName == 'para':
+ if tree.tag == 'para':
return wordwrap(childstr, indent)+'\n'
- elif tree.tagName == 'code':
+ elif tree.tag == 'code':
return '\\texttt{%s}' % childstr
- elif tree.tagName == 'uri':
+ elif tree.tag == 'uri':
if len(variables) != 2: raise ValueError('Bad URI ')
if self._hyperref:
# ~ and # should not be escaped in the URI.
- uri = tree.childNodes[1].childNodes[0].data
+ uri = tree.children[1].children[0]
uri = uri.replace('{\\textasciitilde}', '~')
uri = uri.replace('\\#', '#')
if variables[0] == variables[1]:
@@ -1916,46 +1906,45 @@
return '\\textit{%s}' % variables[1]
else:
return '%s\\footnote{%s}' % (variables[0], variables[1])
- elif tree.tagName == 'link':
+ elif tree.tag == 'link':
if len(variables) != 2: raise ValueError('Bad Link')
return linker.translate_identifier_xref(variables[1], variables[0])
- elif tree.tagName == 'italic':
+ elif tree.tag == 'italic':
return '\\textit{%s}' % childstr
- elif tree.tagName == 'math':
+ elif tree.tag == 'math':
return '\\textit{%s}' % childstr
- elif tree.tagName == 'indexed':
- term = tree.cloneNode(1)
- term.tagName = 'epytext'
+ elif tree.tag == 'indexed':
+ term = Element('epytext', *tree.children, **tree.attribs)
return linker.translate_indexterm(ParsedEpytextDocstring(term))
- elif tree.tagName == 'bold':
+ elif tree.tag == 'bold':
return '\\textbf{%s}' % childstr
- elif tree.tagName == 'li':
+ elif tree.tag == 'li':
return indent*' ' + '\\item ' + childstr.lstrip()
- elif tree.tagName == 'heading':
+ elif tree.tag == 'heading':
return ' '*(indent-2) + '(section) %s\n\n' % childstr
- elif tree.tagName == 'doctestblock':
- return doctest_to_latex(tree.childNodes[0].data.strip())
- elif tree.tagName == 'literalblock':
+ elif tree.tag == 'doctestblock':
+ return doctest_to_latex(tree.children[0].strip())
+ elif tree.tag == 'literalblock':
return '\\begin{alltt}\n%s\\end{alltt}\n\n' % childstr
- elif tree.tagName == 'fieldlist':
+ elif tree.tag == 'fieldlist':
return indent*' '+'{omitted fieldlist}\n'
- elif tree.tagName == 'olist':
+ elif tree.tag == 'olist':
return (' '*indent + '\\begin{enumerate}\n\n' +
' '*indent + '\\setlength{\\parskip}{0.5ex}\n' +
childstr +
' '*indent + '\\end{enumerate}\n\n')
- elif tree.tagName == 'ulist':
+ elif tree.tag == 'ulist':
return (' '*indent + '\\begin{itemize}\n' +
' '*indent + '\\setlength{\\parskip}{0.6ex}\n' +
childstr +
' '*indent + '\\end{itemize}\n\n')
- elif tree.tagName == 'symbol':
- symbol = tree.childNodes[0].data
+ elif tree.tag == 'symbol':
+ symbol = tree.children[0]
if self.SYMBOL_TO_LATEX.has_key(symbol):
return r'%s' % self.SYMBOL_TO_LATEX[symbol]
else:
return '[??]'
- elif tree.tagName == 'graph':
+ elif tree.tag == 'graph':
return '(GRAPH)'
#raise ValueError, 'graph not implemented yet for latex'
else:
@@ -1964,78 +1953,73 @@
def summary(self):
if self._tree is None: return self
-
- # Is the cloning that happens here safe/proper? (Cloning
- # between 2 different documents)
tree = self._tree
-
- doc = Document()
- epytext = doc.createElement('epytext')
- doc.appendChild(epytext)
+ doc = Element('epytext')
# Find the first paragraph.
- variables = tree.childNodes
- while (len(variables) > 0) and (variables[0].tagName != 'para'):
- if variables[0].tagName in ('section', 'ulist', 'olist', 'li'):
- variables = variables[0].childNodes
+ variables = tree.children
+ while (len(variables) > 0) and (variables[0].tag != 'para'):
+ if variables[0].tag in ('section', 'ulist', 'olist', 'li'):
+ variables = variables[0].children
else:
variables = variables[1:]
# Special case: if the docstring contains a single literal block,
# then try extracting the summary from it.
- if (len(variables) == 0 and len(tree.childNodes) == 1 and
- tree.childNodes[0].tagName == 'literalblock'):
+ if (len(variables) == 0 and len(tree.children) == 1 and
+ tree.children[0].tag == 'literalblock'):
str = re.split(r'\n\s*(\n|$).*',
- tree.childNodes[0].childNodes[0].data, 1)[0]
- variables = [doc.createElement('para')]
- variables[0].appendChild(doc.createTextNode(str))
+ tree.children[0].children[0], 1)[0]
+ variables = [Element('para')]
+ variables[0].children.append(str)
# If we didn't find a paragraph, return an empty epytext.
if len(variables) == 0: return ParsedEpytextDocstring(doc)
# Extract the first sentence.
- parachildren = variables[0].childNodes
- para = doc.createElement('para')
- epytext.appendChild(para)
+ parachildren = variables[0].children
+ para = Element('para')
+ doc.children.append(para)
for parachild in parachildren:
- if isinstance(parachild, Text):
- m = re.match(r'(\s*[\w\W]*?\.)(\s|$)', parachild.data)
+ if isinstance(parachild, basestring):
+ m = re.match(r'(\s*[\w\W]*?\.)(\s|$)', parachild)
if m:
- para.appendChild(doc.createTextNode(m.group(1)))
+ para.children.append(m.group(1))
return ParsedEpytextDocstring(doc)
- para.appendChild(parachild.cloneNode(1))
+ para.children.append(parachild)
return ParsedEpytextDocstring(doc)
def split_fields(self, errors=None):
if self._tree is None: return (self, ())
- tree = self._tree.cloneNode(1) # Hmm..
+ tree = Element(self._tree.tag, *self._tree.children,
+ **self._tree.attribs)
fields = []
- if (tree.hasChildNodes() and
- tree.childNodes[-1].tagName == 'fieldlist' and
- tree.childNodes[-1].hasChildNodes()):
- field_nodes = tree.childNodes[-1].childNodes
- tree.removeChild(tree.childNodes[-1])
+ if (tree.children and
+ tree.children[-1].tag == 'fieldlist' and
+ tree.children[-1].children):
+ field_nodes = tree.children[-1].children
+ del tree.children[-1]
for field in field_nodes:
# Get the tag
- tag = field.childNodes[0].childNodes[0].data.lower()
- field.removeChild(field.childNodes[0])
+ tag = field.children[0].children[0].lower()
+ del field.children[0]
# Get the argument.
- if field.childNodes and field.childNodes[0].tagName == 'arg':
- arg = field.childNodes[0].childNodes[0].data
- field.removeChild(field.childNodes[0])
+ if field.children and field.children[0].tag == 'arg':
+ arg = field.children[0].children[0]
+ del field.children[0]
else:
arg = None
# Process the field.
- field.tagName = 'epytext'
+ field.tag = 'epytext'
fields.append(Field(tag, arg, ParsedEpytextDocstring(field)))
# Save the remaining docstring as the description..
- if tree.hasChildNodes() and tree.childNodes[0].hasChildNodes():
+ if tree.children and tree.children[0].children:
descr = tree
else:
descr = None
@@ -2049,14 +2033,13 @@
return self._terms
def _index_terms(self, tree, terms):
- if tree is None or isinstance(tree, Text):
+ if tree is None or isinstance(tree, basestring):
return
- if tree.tagName == 'indexed':
- term = tree.cloneNode(1)
- term.tagName = 'epytext'
+ if tree.tag == 'indexed':
+ term = Element('epytext', *tree.children, **tree.attribs)
terms.append(ParsedEpytextDocstring(term))
# Look for index items in child nodes.
- for child in tree.childNodes:
+ for child in tree.children:
self._index_terms(child, terms)
Modified: trunk/epydoc/src/epydoc/test/epytext.doctest
===================================================================
--- trunk/epydoc/src/epydoc/test/epytext.doctest 2006-09-02 01:23:23 UTC (rev 1343)
+++ trunk/epydoc/src/epydoc/test/epytext.doctest 2006-09-02 01:40:35 UTC (rev 1344)
@@ -9,16 +9,15 @@
>>> import re
>>> def testparse(s):
... # this strips off the <epytext>...</epytext>
- ... out = ''.join([n.toxml() for n in
- ... epytext.parse(s).childNodes[0].childNodes])
+ ... out = ''.join([str(n) for n in
+ ... epytext.parse(s).children])
... # This is basically word-wrapping:
... out = re.sub(r'((</\w+>)+)', r'\1\n', out).rstrip()
... out = re.sub(r'(?m)^(.{50,70}>)(.)', r'\1\n\2', out).rstrip()
... return out
>>> def checkparse(s, expect):
... # this strips off the <epytext>...</epytext>
- ... got = ''.join([n.toxml() for n in
- ... epytext.parse(s).childNodes[0].childNodes])
+ ... got = ''.join([str(n) for n in epytext.parse(s).children])
... if got != expect:
... raise ValueError('mismatch: %r %r' % (expect, got))
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|