[Epydoc-commits] SF.net SVN: epydoc: [1463] trunk/epydoc/src/epydoc
Brought to you by:
edloper
From: <ed...@us...> - 2007-02-12 05:26:30
|
Revision: 1463 http://svn.sourceforge.net/epydoc/?rev=1463&view=rev Author: edloper Date: 2007-02-11 21:26:28 -0800 (Sun, 11 Feb 2007) Log Message: ----------- - Added a new module for colorizing python values. This abstracts away from the code that's currently in the html writer, and should make it easier to use the same colorization code for other docwriters. The new code should do a better job of pretty-printing values than the current html writer code. I also fixed a couple minor bugs in the regexp colorizer along the way. Note, though, that this code hasn't been plugged into epydoc in any way yet. I.e., docwriter/html.py still needs to be modified to actually *use* this new code. Added Paths: ----------- trunk/epydoc/src/epydoc/markup/pyval_repr.py trunk/epydoc/src/epydoc/test/pyval_repr.doctest Added: trunk/epydoc/src/epydoc/markup/pyval_repr.py =================================================================== --- trunk/epydoc/src/epydoc/markup/pyval_repr.py (rev 0) +++ trunk/epydoc/src/epydoc/markup/pyval_repr.py 2007-02-12 05:26:28 UTC (rev 1463) @@ -0,0 +1,478 @@ +# epydoc -- Marked-up Representations for Python Values +# +# Copyright (C) 2005 Edward Loper +# Author: Edward Loper <ed...@lo...> +# URL: <http://epydoc.sf.net> +# +# $Id: apidoc.py 1448 2007-02-11 00:05:34Z dvarrazzo $ + +""" +Syntax highlighter for Python values. Currently provides special +colorization support for: + + - lists, tuples, sets, frozensets, dicts + - numbers + - strings + - compiled regexps + +The highlighter also takes care of line-wrapping, and automatically +stops generating repr output as soon as it has exceeded the specified +number of lines (which should make it faster than pprint for large +values). It does I{not} bother to do automatic cycle detection, +because maxlines is typically around 5, so it's really not worth it. +""" + +# Implementation note: we use exact tests for classes (list, etc) +# rather than using isinstance, because subclasses might override +# __repr__. + +import types, re +import epydoc.apidoc +from epydoc.util import decode_with_backslashreplace +from epydoc.compat import * +import sre_parse, sre_constants + +def is_re_pattern(pyval): + return type(pyval).__name__ == 'SRE_Pattern' + +class _ColorizerState: + """ + An object uesd to keep track of the current state of the pyval + colorizer. The L{mark()}/L{restore()} methods can be used to set + a backup point, and restore back to that backup point. This is + used by several colorization methods that first try colorizing + their object on a single line (setting linebreakok=False); and + then fall back on a multi-line output if that fails. + """ + def __init__(self): + self.result = [] + self.charpos = 0 + self.lineno = 1 + self.linebreakok = True + + def mark(self): + return (len(self.result), self.charpos, self.lineno, self.linebreakok) + + def restore(self, mark): + n, self.charpos, self.lineno, self.linebreakok = mark + del self.result[n:] + +class _Maxlines(Exception): + """A control-flow exception that is raised when PyvalColorizer + exeeds the maximum number of allowed lines.""" + +class _Linebreak(Exception): + """A control-flow exception that is raised when PyvalColorizer + generates a string containing a newline, but the state object's + linebreakok variable is False.""" + + +class PyvalColorizer: + """ + Syntax highlighter for Python values. + """ + + def __init__(self, linelen=75, maxlines=5, sort=True): + self.linelen = linelen + self.maxlines = maxlines + self.sort = sort + + #//////////////////////////////////////////////////////////// + # Subclassing Hooks + #//////////////////////////////////////////////////////////// + + PREFIX = None + """A string sequence that should be added to the beginning of all + colorized pyval outputs.""" + + SUFFIX = None + """A string sequence that should be added to the beginning of all + colorized pyval outputs.""" + + NEWLINE = '\n' + """The string sequence that should be generated to encode newlines.""" + + LINEWRAP = None + """The string sequence that should be generated when linewrapping a + string that is too long to fit on a single line. (The + NEWLINE sequence will be added immediately after this sequence)""" + + ELLIPSIS = None + """The string sequence that should be generated when omitting the + rest of the repr because maxlines has been exceeded.""" + + def markup(self, s, tag=None): + """ + Apply syntax highlighting to a single substring from a Python + value representation. C{s} is the substring, and C{tag} is + the tag that should be applied to the substring. C{tag} will + be one of the following strings: + + - (list under construction) + """ + + #//////////////////////////////////////////////////////////// + # Colorization Tags + #//////////////////////////////////////////////////////////// + + GROUP_TAG = 'val-group' # e.g., "[" and "]" + COMMA_TAG = 'val-op' # The "," that separates elements + COLON_TAG = 'val-op' # The ":" in dictionaries + CONST_TAG = None # None, True, False + NUMBER_TAG = None # ints, floats, etc + QUOTE_TAG = 'val-quote' # Quotes around strings. + STRING_TAG = 'val-string' # Body of string literals + + RE_CHAR_TAG = None + RE_GROUP_TAG = 're-group' + RE_REF_TAG = 're-ref' + RE_OP_TAG = 're-op' + RE_FLAGS_TAG = 're-flags' + + #//////////////////////////////////////////////////////////// + # Entry Point + #//////////////////////////////////////////////////////////// + + def colorize(self, pyval): + # Create an object to keep track of the colorization. + state = _ColorizerState() + # Add the prefix string. + state.result.append(self.PREFIX) + # Colorize the value. If we reach maxlines, then add on an + # ellipsis marker and call it a day. + try: + self._colorize(pyval, state) + except _Maxlines: + state.result.append(self.ELLIPSIS) + # Add on the suffix string. + state.result.append(self.SUFFIX) + # Put it all together. + return ''.join(state.result) + + def _colorize(self, pyval, state): + pyval_type = type(pyval) + + if pyval is None or pyval is True or pyval is False: + self._output(str(pyval), self.CONST_TAG, state) + elif pyval_type in (int, float, long, types.ComplexType): + self._output(str(pyval), self.NUMBER_TAG, state) + elif pyval_type is str: + self._colorize_str(pyval, state, '', 'string-escape') + elif pyval_type is unicode: + self._colorize_str(pyval, state, 'u', 'unicode-escape') + elif pyval_type is list: + self._multiline(self._colorize_iter, pyval, state, '[', ']') + elif pyval_type is tuple: + self._multiline(self._colorize_iter, pyval, state, '(', ')') + elif pyval_type is set: + if self.sort: pyval = sorted(pyval) + self._multiline(self._colorize_iter, pyval, state, + 'set([', '])') + elif pyval_type is frozenset: + if self.sort: pyval = sorted(pyval) + self._multiline(self._colorize_iter, pyval, state, + 'frozenset([', '])') + elif pyval_type is dict: + items = pyval.items() + if self.sort: items = sorted(items) + self._multiline(self._colorize_dict, items, state, '{', '}') + elif is_re_pattern(pyval): + self._colorize_re(pyval, state) + else: + self._output(repr(pyval), None, state) + + #//////////////////////////////////////////////////////////// + # Object Colorization Functions + #//////////////////////////////////////////////////////////// + + def _multiline(self, func, pyval, state, *args): + """ + Helper for container-type colorizers. First, try calling + C{func(pyval, state, *args)} with linebreakok set to false; + and if that fails, then try again with it set to true. + """ + linebreakok = state.linebreakok + mark = state.mark() + + try: + state.linebreakok = False + func(pyval, state, *args) + state.linebreakok = linebreakok + + except _Linebreak: + if not linebreakok: + raise + state.restore(mark) + func(pyval, state, *args) + + def _colorize_iter(self, pyval, state, prefix, suffix): + self._output(prefix, self.GROUP_TAG, state) + indent = state.charpos + for i, elt in enumerate(pyval): + if i>=1: + if state.linebreakok: + self._output(',', self.COMMA_TAG, state) + self._output('\n'+' '*indent, None, state) + else: + self._output(', ', self.COMMA_TAG, state) + self._colorize(elt, state) + self._output(suffix, self.GROUP_TAG, state) + + def _colorize_dict(self, items, state, prefix, suffix): + self._output(prefix, self.GROUP_TAG, state) + indent = state.charpos + for i, (key, val) in enumerate(items): + if i>=1: + if state.linebreakok: + self._output(',', self.COMMA_TAG, state) + self._output('\n'+' '*indent, None, state) + else: + self._output(', ', self.COMMA_TAG, state) + self._colorize(key, state) + self._output(': ', self.COLON_TAG, state) + self._colorize(val, state) + self._output(suffix, self.GROUP_TAG, state) + + def _colorize_str(self, pyval, state, prefix, encoding): + # Decide which quote to use. + if '\n' in pyval: quote = "'''" + else: quote = "'" + # Open quote. + self._output(prefix+quote, self.QUOTE_TAG, state) + # Body + for i, line in enumerate(pyval.split('\n')): + if i>0: self._output('\n', None, state) + self._output(line.encode(encoding), self.STRING_TAG, state) + # Close quote. + self._output(quote, self.QUOTE_TAG, state) + + def _colorize_re(self, pyval, state): + # Extract the flag & pattern from the regexp. + pat, flags = pyval.pattern, pyval.flags + # If the pattern is a string, decode it to unicode. + if isinstance(pat, str): + pat = decode_with_backslashreplace(pat) + # Parse the regexp pattern. + tree = sre_parse.parse(pat, flags) + groups = dict([(num,name) for (name,num) in + tree.pattern.groupdict.items()]) + # Colorize it! + self._colorize_re_flags(tree.pattern.flags, state) + self._colorize_re_tree(tree, state, True, groups) + + def _colorize_re_flags(self, flags, state): + if flags: + flags = [c for (c,n) in sorted(sre_parse.FLAGS.items()) + if (n&flags)] + flags = '(?%s)' % ''.join(flags) + self._output(flags, self.RE_FLAGS_TAG, state) + + def _colorize_re_tree(self, tree, state, noparen, groups): + assert noparen in (True, False) + if len(tree) > 1 and not noparen: + self._output('(', self.RE_GROUP_TAG, state) + for elt in tree: + op = elt[0] + args = elt[1] + + if op == sre_constants.LITERAL: + c = unichr(args) + # Add any appropriate escaping. + if c in '.^$\\*+?{}[]|()': c = '\\'+c + elif c == '\t': c = '\\t' + elif c == '\r': c = '\\r' + elif c == '\n': c = '\\n' + elif c == '\f': c = '\\f' + elif c == '\v': c = '\\v' + elif ord(c) > 0xffff: c = r'\U%08x' % ord(c) + elif ord(c) > 0xff: c = r'\u%04x' % ord(c) + elif ord(c)<32 or ord(c)>=127: c = r'\x%02x' % ord(c) + self._output(c, self.RE_CHAR_TAG, state) + + elif op == sre_constants.ANY: + self._output('.', self.RE_CHAR_TAG, state) + + elif op == sre_constants.BRANCH: + if args[0] is not None: + raise ValueError('Branch expected None arg but got %s' + % args[0]) + for i, item in enumerate(args[1]): + if i > 0: + self._output('|', self.RE_OP_TAG, state) + self._colorize_re_tree(item, state, True, groups) + + elif op == sre_constants.IN: + if (len(args) == 1 and args[0][0] == sre_constants.CATEGORY): + self._colorize_re_tree(args, state, False, groups) + else: + self._output('[', self.RE_GROUP_TAG, state) + self._colorize_re_tree(args, state, True, groups) + self._output(']', self.RE_GROUP_TAG, state) + + elif op == sre_constants.CATEGORY: + if args == sre_constants.CATEGORY_DIGIT: val = r'\d' + elif args == sre_constants.CATEGORY_NOT_DIGIT: val = r'\D' + elif args == sre_constants.CATEGORY_SPACE: val = r'\s' + elif args == sre_constants.CATEGORY_NOT_SPACE: val = r'\S' + elif args == sre_constants.CATEGORY_WORD: val = r'\w' + elif args == sre_constants.CATEGORY_NOT_WORD: val = r'\W' + else: raise ValueError('Unknown category %s' % args) + self._output(val, self.RE_CHAR_TAG, state) + + elif op == sre_constants.AT: + if args == sre_constants.AT_BEGINNING_STRING: val = r'\A' + elif args == sre_constants.AT_BEGINNING: val = r'^' + elif args == sre_constants.AT_END: val = r'$' + elif args == sre_constants.AT_BOUNDARY: val = r'\b' + elif args == sre_constants.AT_NON_BOUNDARY: val = r'\B' + elif args == sre_constants.AT_END_STRING: val = r'\Z' + else: raise ValueError('Unknown position %s' % args) + self._output(val, self.RE_CHAR_TAG, state) + + elif op in (sre_constants.MAX_REPEAT, sre_constants.MIN_REPEAT): + minrpt = args[0] + maxrpt = args[1] + if maxrpt == sre_constants.MAXREPEAT: + if minrpt == 0: val = '*' + elif minrpt == 1: val = '+' + else: val = '{%d,}' % (minrpt) + elif minrpt == 0: + if maxrpt == 1: val = '?' + else: val = '{,%d}' % (maxrpt) + elif minrpt == maxrpt: + val = '{%d}' % (maxrpt) + else: + val = '{%d,%d}' % (minrpt, maxrpt) + if op == sre_constants.MIN_REPEAT: + val += '?' + + self._colorize_re_tree(args[2], state, False, groups) + self._output(val, self.RE_OP_TAG, state) + + elif op == sre_constants.SUBPATTERN: + if args[0] is None: + self._output('(?:', self.RE_GROUP_TAG, state) + elif args[0] in groups: + self._output('(?P<', self.RE_GROUP_TAG, state) + self._output(groups[args[0]], self.RE_REF_TAG, state) + self._output('>', self.RE_GROUP_TAG, state) + elif isinstance(args[0], (int, long)): + # This is cheating: + self._output('(', self.RE_GROUP_TAG, state) + else: + self._output('(?P<', self.RE_GROUP_TAG, state) + self._output(args[0], self.RE_REF_TAG, state) + self._output('>', self.RE_GROUP_TAG, state) + self._colorize_re_tree(args[1], state, True, groups) + self._output(')', self.RE_GROUP_TAG, state) + + elif op == sre_constants.GROUPREF: + self._output('\\%d' % args, self.RE_REF_TAG, state) + + elif op == sre_constants.RANGE: + self._colorize_re_tree( ((sre_constants.LITERAL, args[0]),), + state, False, groups ) + self._output('-', self.RE_OP_TAG, state) + self._colorize_re_tree( ((sre_constants.LITERAL, args[1]),), + state, False, groups ) + + elif op == sre_constants.NEGATE: + self._output('^', self.RE_OP_TAG, state) + + elif op == sre_constants.ASSERT: + if args[0] > 0: + self._output('(?=', self.RE_GROUP_TAG, state) + else: + self._output('(?<=', self.RE_GROUP_TAG, state) + self._colorize_re_tree(args[1], state, True, groups) + self._output(')', self.RE_GROUP_TAG, state) + + elif op == sre_constants.ASSERT_NOT: + if args[0] > 0: + self._output('(?!', self.RE_GROUP_TAG, state) + else: + self._output('(?<!', self.RE_GROUP_TAG, state) + self._colorize_re_tree(args[1], state, True, groups) + self._output(')', self.RE_GROUP_TAG, state) + + elif op == sre_constants.NOT_LITERAL: + self._output('[^', self.RE_GROUP_TAG, state) + self._colorize_re_tree( ((sre_constants.LITERAL, args),), + state, False, groups ) + self._output(']', self.RE_GROUP_TAG, state) + else: + log.error("Error colorizing regexp: unknown elt %r" % elt) + if len(tree) > 1 and not noparen: + self._output(')', self.RE_GROUP_TAG, state) + + #//////////////////////////////////////////////////////////// + # Output function + #//////////////////////////////////////////////////////////// + + def _output(self, s, tag, state): + """ + Add the string `s` to the result list, tagging its contents + with tag `tag`. Any lines that go beyond `self.linelen` will + be line-wrapped. If the total number of lines exceeds + `self.maxlines`, then raise a `_Maxlines` exception. + """ + if '\n' in s and not state.linebreakok: + raise _Linebreak() + + # Split the string into segments. The first segment is the + # content to add to the current line, and the remaining + # segments are new lines. + segments = s.split('\n') + + for i, segment in enumerate(segments): + # If this isn't the first segment, then add a newline to + # split it from the previous segment. + if i > 0: + if not state.linebreakok: + raise _Linebreak() + state.result.append(self.NEWLINE) + state.lineno += 1 + state.charpos = 0 + if state.lineno > self.maxlines: + raise _Maxlines() + + # If the segment fits on the current line, then just call + # markup to tag it, and store the result. + if state.charpos + len(segment) <= self.linelen: + state.result.append(self.markup(segment, tag)) + state.charpos += len(segment) + + # If the segment doesn't fit on the current line, then + # line-wrap it, and insert the remainder of the line into + # the segments list that we're iterating over. + else: + split = self.linelen-state.charpos + state.result += [self.markup(segment[:split], tag), + self.LINEWRAP] + segments.insert(i+1, segment[split:]) + +class HTMLPyvalColorizer(PyvalColorizer): + NEWLINE = '\n' + PREFIX = SUFFIX = '' + LINEWRAP = (r'<span class="variable-linewrap">' + '<img src="crarr.png" alt="\" /></span>') + ELLIPSIS = r'<span class="variable-ellipsis">...</span>' + def markup(self, s, tag=None): + s = s.replace('&', '&').replace('<', '<').replace('>', '>') + if tag: + return '<span class="variable-%s">%s</span>' % (tag, s) + else: + return s + +class XMLPyvalColorizer(PyvalColorizer): + NEWLINE = '\n' + PREFIX = '<pyval>' + SUFFIX = '</pyval>' + LINEWRAP = '<linewrap />' + ELLIPSIS = '<ellipsis />' + def markup(self, s, tag=None): + s = s.replace('&', '&').replace('<', '<').replace('>', '>') + if tag: + return '<%s>%s</%s>' % (tag, s, tag) + else: + return s Added: trunk/epydoc/src/epydoc/test/pyval_repr.doctest =================================================================== --- trunk/epydoc/src/epydoc/test/pyval_repr.doctest (rev 0) +++ trunk/epydoc/src/epydoc/test/pyval_repr.doctest 2007-02-12 05:26:28 UTC (rev 1463) @@ -0,0 +1,194 @@ +Regression Testing for epydoc.markup.pyval_repr +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + >>> from epydoc.markup.pyval_repr import * + >>> colorizer = XMLPyvalColorizer(linelen=40) + >>> def color(s): print colorizer.colorize(s) + + +Simple Types +============ +Integers, floats, None, and complex numbers get printed using str, +with no syntax highlighting: + + >>> color(10) + <pyval>10</pyval> + >>> color(1./4) + <pyval>0.25</pyval> + >>> color(None) + <pyval>None</pyval> + >>> color(100) + <pyval>100</pyval> + +Long ints will get wrapped if they're big enough: + + >>> color(10000000) + <pyval>10000000</pyval> + >>> color(10**90) + <pyval>1000000000000000000000000000000000000000<linewrap /> + 0000000000000000000000000000000000000000<linewrap /> + 00000000000</pyval> + +Strings +======= +Strings have their quotation marks tagged as 'quote'. Characters are +escaped using the 'string-escape' encoding. + + >>> color(''.join(chr(i) for i in range(256))) + <pyval><val-quote>'''</val-quote><val-string>\x00\x01\x02\x03\x04\x05\x06\x07\x08\</val-string><linewrap /> + <val-string>t</val-string> + <val-string>\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x</val-string><linewrap /> + <val-string>15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x</val-string><linewrap /> + <val-string>1f !"#$%&\'()*+,-./0123456789:;<=>?@ABCD</val-string><linewrap /> + <ellipsis /></pyval> + +Currently, the "'" quote is always used, because that's what the +'string-escape' encoding expects. + + >>> color('Hello') + <pyval><val-quote>'</val-quote><val-string>Hello</val-string><val-quote>'</val-quote></pyval> + >>> color('"Hello"') + <pyval><val-quote>'</val-quote><val-string>"Hello"</val-string><val-quote>'</val-quote></pyval> + >>> color("'Hello'") + <pyval><val-quote>'</val-quote><val-string>\'Hello\'</val-string><val-quote>'</val-quote></pyval> + +Strings containing newlines are automatically rendered as multiline +strings. + + >>> color("This\n is a multiline\n string!") + <pyval><val-quote>'''</val-quote><val-string>This</val-string> + <val-string> is a multiline</val-string> + <val-string> string!</val-string><val-quote>'''</val-quote></pyval> + +Unicode strings are handled properly. + + >>> color(u"Hello world") + <pyval><val-quote>u'</val-quote><val-string>Hello world</val-string><val-quote>'</val-quote></pyval> + >>> color(u"\uaaaa And \ubbbb") + <pyval><val-quote>u'</val-quote><val-string>\uaaaa And \ubbbb</val-string><val-quote>'</val-quote></pyval> + +Lists, Tuples, etc. +=================== +Lists, tuples, and sets are all colorized using the same method. The +braces and commas are tagged with "op". If the value can fit on the +current line, it is displayed on one line. Otherwise, each value is +listed on a separate line, indented by the size of the open-bracket. + + >>> color(range(10)) + <pyval><val-group>[</val-group>0<val-op>, </val-op>1<val-op>, </val-op>2<val-op>, </val-op>3<val-op>, </val-op>4<val-op>, </val-op>5<val-op>, </val-op>6<val-op>, </val-op>7<val-op>, </val-op>8<val-op>, </val-op>9<val-group>]</val-group></pyval> + >>> color(range(100)) + <pyval><val-group>[</val-group>0<val-op>,</val-op> + 1<val-op>,</val-op> + 2<val-op>,</val-op> + 3<val-op>,</val-op> + 4<val-op>,</val-op> + <ellipsis /></pyval> + >>> color([1,2,[5,6,[(11,22,33),9],10],11]+[99,98,97,96,95]) + <pyval><val-group>[</val-group>1<val-op>,</val-op> + 2<val-op>,</val-op> + <val-group>[</val-group>5<val-op>, </val-op>6<val-op>, </val-op><val-group>[</val-group><val-group>(</val-group>11<val-op>, </val-op>22<val-op>, </val-op>33<val-group>)</val-group><val-op>, </val-op>9<val-group>]</val-group><val-op>, </val-op>10<val-group>]</val-group><val-op>,</val-op> + 11<val-op>,</val-op> + 99<val-op>,</val-op> + <ellipsis /></pyval> + >>> color(set(range(20))) + <pyval><val-group>set([</val-group>0<val-op>,</val-op> + 1<val-op>,</val-op> + 2<val-op>,</val-op> + 3<val-op>,</val-op> + 4<val-op>,</val-op> + <ellipsis /></pyval> + +Dictionaries +============ +Dicts are treated just like lists, except that the ":" is also tagged as +"op". + + >>> color({1:33, 2:[1,2,3,{7:'oo'*20}]}) + <pyval><val-group>{</val-group>1<val-op>: </val-op>33<val-op>,</val-op> + 2<val-op>: </val-op><val-group>[</val-group>1<val-op>,</val-op> + 2<val-op>,</val-op> + 3<val-op>,</val-op> + <val-group>{</val-group>7<val-op>: </val-op><val-quote>'</val-quote><val-string>oooooooooooooooooooooooooooooo</val-string><linewrap /> + <ellipsis /></pyval> + +Regular Expressions +=================== + + >>> import re + >>> def color_re(s, check_roundtrip=True): + ... val = colorizer.colorize(re.compile(s)) + ... if check_roundtrip: + ... roundtrip_val = re.sub('</?[\w-]+>', '', val) + ... roundtrip_val = roundtrip_val.replace('>', '>') + ... roundtrip_val = roundtrip_val.replace('<', '<') + ... roundtrip_val = roundtrip_val.replace('&', '&') + ... assert roundtrip_val == s, roundtrip_val + ... print val + + >>> # Literal characters + >>> color_re(u'abc \t\r\n\f\v \xff \uffff \U000fffff', False) + <pyval>abc \t\r\n\f\v \xff \uffff \U000fffff</pyval> + >>> color_re(r'\.\^\$\\\*\+\?\{\}\[\]\|\(\)') + <pyval>\.\^\$\\\*\+\?\{\}\[\]\|\(\)</pyval> + + >>> # Any character & character classes + >>> color_re(r".\d\D\s\S\w\W\A^$\b\B\Z") + <pyval>.\d\D\s\S\w\W\A^$\b\B\Z</pyval> + + >>> # Branching + >>> color_re(r"foo|bar") + <pyval>foo<re-op>|</re-op>bar</pyval> + + >>> # Character classes + >>> color_re(r"[abcd]") + <pyval><re-group>[</re-group>abcd<re-group>]</re-group></pyval> + + >>> # Repeats + >>> color_re(r"a*b+c{4,}d{,5}e{3,9}f?") + <pyval>a<re-op>*</re-op>b<re-op>+</re-op>c<re-op>{4,}</re-op>d<re-op>{,5}</re-op>e<re-op>{3,9}</re-op>f<re-op>?</re-op></pyval> + >>> color_re(r"a*?b+?c{4,}?d{,5}?e{3,9}?f??") + <pyval>a<re-op>*?</re-op>b<re-op>+?</re-op>c<re-op>{4,}?</re-op>d<re-op>{,5}?</re-op>e<re-op>{3,9}?</re-op>f<re-op>??</re-op></pyval> + + >>> # Subpatterns + >>> color_re(r"(foo (bar) | (baz))") + <pyval><re-group>(</re-group>foo <re-group>(</re-group>bar<re-group>)</re-group> <re-op>|</re-op> <re-group>(</re-group>baz<re-group>)</re-group><re-group>)</re-group></pyval> + >>> color_re(r"(?:foo (?:bar) | (?:baz))") + <pyval><re-group>(?:</re-group>foo <re-group>(?:</re-group>bar<re-group>)</re-group> <re-op>|</re-op> <re-group>(?:</re-group>baz<re-group>)</re-group><re-group>)</re-group></pyval> + >>> color_re("(foo (?P<a>bar) | (?P<boop>baz))") + <pyval><re-group>(</re-group>foo <re-group>(?P<</re-group><re-ref>a</re-ref><re-group>></re-group>bar<re-group>)</re-group> <re-op>|</re-op> <re-group>(?P<</re-group><re-ref>boop</re-ref><re-group>></re-group>baz<re-group>)</re-group><re-group>)</re-group></pyval> + + >>> # Group References + >>> color_re(r"(...) and (\1)") + <pyval><re-group>(</re-group>...<re-group>)</re-group> and <re-group>(</re-group><re-ref>\1</re-ref><re-group>)</re-group></pyval> + + >>> # Ranges + >>> color_re(r"[a-bp-z]") + <pyval><re-group>[</re-group>a<re-op>-</re-op>bp<re-op>-</re-op>z<re-group>]</re-group></pyval> + >>> color_re(r"[^a-bp-z]") + <pyval><re-group>[</re-group><re-op>^</re-op>a<re-op>-</re-op>bp<re-op>-</re-op>z<re-group>]</re-group></pyval> + >>> color_re(r"[^abc]") + <pyval><re-group>[</re-group><re-op>^</re-op>abc<re-group>]</re-group></pyval> + + >>> # Lookahead/behinds + >>> color_re(r"foo(?=bar)") + <pyval>foo<re-group>(?=</re-group>bar<re-group>)</re-group></pyval> + >>> color_re(r"foo(?!bar)") + <pyval>foo<re-group>(?!</re-group>bar<re-group>)</re-group></pyval> + >>> color_re(r"(?<=bar)foo") + <pyval><re-group>(?<=</re-group>bar<re-group>)</re-group>foo</pyval> + >>> color_re(r"(?<!bar)foo") + <pyval><re-group>(?<!</re-group>bar<re-group>)</re-group>foo</pyval> + + >>> # Flags + >>> color_re(r"(?im)^Food") + <pyval><re-flags>(?im)</re-flags>^Food</pyval> + >>> color_re(r"(?Limsx)^Food") + <pyval><re-flags>(?Limsx)</re-flags>^Food</pyval> + >>> color_re(r"(?Limstux)^Food") + <pyval><re-flags>(?Limstux)</re-flags>^Food</pyval> + >>> color_re(r"(?x)This is verbose", False) + <pyval><re-flags>(?x)</re-flags>Thisisverbose</pyval> + + + + This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |