From: <md...@us...> - 2007-07-16 12:02:49
|
Revision: 3537 http://matplotlib.svn.sourceforge.net/matplotlib/?rev=3537&view=rev Author: mdboom Date: 2007-07-16 05:02:47 -0700 (Mon, 16 Jul 2007) Log Message: ----------- Updated pyparsing to latest stable version (1.4.6) and updating mathtext to support it. Modified Paths: -------------- branches/mathtext_mgd/lib/matplotlib/mathtext.py branches/mathtext_mgd/lib/matplotlib/pyparsing.py Modified: branches/mathtext_mgd/lib/matplotlib/mathtext.py =================================================================== --- branches/mathtext_mgd/lib/matplotlib/mathtext.py 2007-07-16 08:01:21 UTC (rev 3536) +++ branches/mathtext_mgd/lib/matplotlib/mathtext.py 2007-07-16 12:02:47 UTC (rev 3537) @@ -135,7 +135,8 @@ from matplotlib import verbose from matplotlib.pyparsing import Literal, Word, OneOrMore, ZeroOrMore, \ Combine, Group, Optional, Forward, NotAny, alphas, nums, alphanums, \ - StringStart, StringEnd, ParseException, FollowedBy, Regex + StringStart, StringEnd, ParseException, FollowedBy, Regex, \ + operatorPrecedence, opAssoc, ParseResults from matplotlib.afm import AFM from matplotlib.cbook import enumerate, iterable, Bunch @@ -1259,7 +1260,7 @@ def expression(self, s, loc, toks): self.expr = ExpressionElement(toks) - return loc, [self.expr] + return [self.expr] def space(self, s, loc, toks): assert(len(toks)==1) @@ -1271,7 +1272,7 @@ element = SpaceElement(num) self.symbols.append(element) - return loc, [element] + return [element] def symbol(self, s, loc, toks): @@ -1300,7 +1301,7 @@ sym = SymbolElement(toks[0]) self.symbols.append(sym) - return loc, [sym] + return [sym] def composite(self, s, loc, toks): @@ -1315,7 +1316,7 @@ self.symbols.append(sym0) self.symbols.append(sym1) - return loc, [sym0] + return [sym0] def accent(self, s, loc, toks): @@ -1343,13 +1344,13 @@ sym.neighbors['above'] = above sym.set_pady(1) self.symbols.append(above) - return loc, [sym] + return [sym] def group(self, s, loc, toks): assert(len(toks)==1) #print 'grp', toks grp = GroupElement(toks[0]) - return loc, [grp] + return [grp] def font(self, s, loc, toks): @@ -1357,7 +1358,7 @@ name, grp = toks[0] #print 'fontgrp', toks grp.set_font(name[1:]) # suppress the slash - return loc, [grp] + return [grp] def subscript(self, s, loc, toks): assert(len(toks)==1) @@ -1391,7 +1392,7 @@ else: prev.neighbors['superscript'] = next - return loc, [prev] + return [prev] def subsuperscript(self, s, loc, toks): assert(len(toks)==1) @@ -1405,7 +1406,7 @@ prev.neighbors['subscript'] = down prev.neighbors['superscript'] = up - return loc, [prev] + return [prev] Modified: branches/mathtext_mgd/lib/matplotlib/pyparsing.py =================================================================== --- branches/mathtext_mgd/lib/matplotlib/pyparsing.py 2007-07-16 08:01:21 UTC (rev 3536) +++ branches/mathtext_mgd/lib/matplotlib/pyparsing.py 2007-07-16 12:02:47 UTC (rev 3537) @@ -1,6 +1,6 @@ # module pyparsing.py # -# Copyright (c) 2003,2004,2005 Paul T. McGuire +# Copyright (c) 2003-2007 Paul T. McGuire # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the @@ -21,27 +21,24 @@ # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # -# Todo: -# - add pprint() - pretty-print output of defined BNF -# #from __future__ import generators __doc__ = \ """ pyparsing module - Classes and methods to define and execute parsing grammars -The pyparsing module is an alternative approach to creating and executing simple grammars, +The pyparsing module is an alternative approach to creating and executing simple grammars, vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you -don't need to learn a new syntax for defining grammars or matching expressions - the parsing module +don't need to learn a new syntax for defining grammars or matching expressions - the parsing module provides a library of classes that you use to construct the grammar directly in Python. Here is a program to parse "Hello, World!" (or any greeting of the form "<salutation>, <addressee>!"):: from pyparsing import Word, alphas - + # define grammar of a greeting - greet = Word( alphas ) + "," + Word( alphas ) + "!" - + greet = Word( alphas ) + "," + Word( alphas ) + "!" + hello = "Hello, World!" print hello, "->", greet.parseString( hello ) @@ -49,10 +46,10 @@ Hello, World! -> ['Hello', ',', 'World', '!'] -The Python representation of the grammar is quite readable, owing to the self-explanatory +The Python representation of the grammar is quite readable, owing to the self-explanatory class names, and the use of '+', '|' and '^' operators. -The parsed results returned from parseString() can be accessed as a nested list, a dictionary, or an +The parsed results returned from parseString() can be accessed as a nested list, a dictionary, or an object with named attributes. The pyparsing module handles some of the problems that are typically vexing when writing text parsers: @@ -60,15 +57,18 @@ - quoted strings - embedded comments """ -__version__ = "1.3.4alpha1" -__versionTime__ = "14 December 2005 05:48" +__version__ = "1.4.6" +__versionTime__ = "11 April 2007 16:41" __author__ = "Paul McGuire <pt...@us...>" import string +from weakref import ref as wkref import copy,sys import warnings import re -#sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) ) +import sre_constants +import xml.sax.saxutils +#~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) ) def _ustr(obj): """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries @@ -79,7 +79,7 @@ # If this works, then _ustr(obj) has the same behaviour as str(obj), so # it won't break any existing code. return str(obj) - + except UnicodeEncodeError, e: # The Python docs (http://docs.python.org/ref/customization.html#l2h-182) # state that "The return value must be a string object". However, does a @@ -96,21 +96,29 @@ def _str2dict(strg): return dict( [(c,0) for c in strg] ) + #~ return set( [c for c in strg] ) +class _Constants(object): + pass + alphas = string.lowercase + string.uppercase nums = string.digits hexnums = nums + "ABCDEFabcdef" -alphanums = alphas + nums +alphanums = alphas + nums class ParseBaseException(Exception): """base exception class for all parsing runtime exceptions""" __slots__ = ( "loc","msg","pstr","parserElement" ) # Performance tuning: we construct a *lot* of these, so keep this - # constructor as small and fast as possible - def __init__( self, pstr, loc, msg, elem=None ): + # constructor as small and fast as possible + def __init__( self, pstr, loc=0, msg=None, elem=None ): self.loc = loc - self.msg = msg - self.pstr = pstr + if msg is None: + self.msg = pstr + self.pstr = "" + else: + self.msg = msg + self.pstr = pstr self.parserElement = elem def __getattr__( self, aname ): @@ -129,55 +137,78 @@ raise AttributeError, aname def __str__( self ): - return "%s (at char %d), (line:%d, col:%d)" % ( self.msg, self.loc, self.lineno, self.column ) + return "%s (at char %d), (line:%d, col:%d)" % \ + ( self.msg, self.loc, self.lineno, self.column ) def __repr__( self ): return _ustr(self) def markInputline( self, markerString = ">!<" ): - """Extracts the exception line from the input string, and marks + """Extracts the exception line from the input string, and marks the location of the exception with a special symbol. """ line_str = self.line line_column = self.column - 1 if markerString: - line_str = "".join( [line_str[:line_column], markerString, line_str[line_column:]]) + line_str = "".join( [line_str[:line_column], + markerString, line_str[line_column:]]) return line_str.strip() class ParseException(ParseBaseException): - """exception thrown when parse expressions don't match class""" - """supported attributes by name are: + """exception thrown when parse expressions don't match class; + supported attributes by name are: - lineno - returns the line number of the exception text - col - returns the column number of the exception text - line - returns the line containing the exception text """ pass - + class ParseFatalException(ParseBaseException): """user-throwable exception thrown when inconsistent parse content is found; stops all parsing immediately""" pass +#~ class ReparseException(ParseBaseException): + #~ """Experimental class - parse actions can raise this exception to cause + #~ pyparsing to reparse the input string: + #~ - with a modified input string, and/or + #~ - with a modified start location + #~ Set the values of the ReparseException in the constructor, and raise the + #~ exception in a parse action to cause pyparsing to use the new string/location. + #~ Setting the values as None causes no change to be made. + #~ """ + #~ def __init_( self, newstring, restartLoc ): + #~ self.newParseText = newstring + #~ self.reparseLoc = restartLoc + class RecursiveGrammarException(Exception): """exception thrown by validate() if the grammar could be improperly recursive""" def __init__( self, parseElementList ): self.parseElementTrace = parseElementList - + def __str__( self ): return "RecursiveGrammarException: %s" % self.parseElementTrace +class _ParseResultsWithOffset(object): + def __init__(self,p1,p2): + self.tup = (p1,p2) + def __getitem__(self,i): + return self.tup[i] + def __repr__(self): + return repr(self.tup) + class ParseResults(object): """Structured parse results, to provide multiple means of access to the parsed data: - as a list (len(results)) - by list index (results[0], results[1], etc.) - by attribute (results.<resultsName>) """ - __slots__ = ( "__toklist", "__tokdict", "__doinit", "__name", "__parent", "__modal" ) + __slots__ = ( "__toklist", "__tokdict", "__doinit", "__name", "__parent", "__accumNames", "__weakref__" ) def __new__(cls, toklist, name=None, asList=True, modal=True ): if isinstance(toklist, cls): return toklist retobj = object.__new__(cls) retobj.__doinit = True return retobj - + # Performance tuning: we construct a *lot* of these, so keep this # constructor as small and fast as possible def __init__( self, toklist, name=None, asList=True, modal=True ): @@ -185,77 +216,87 @@ self.__doinit = False self.__name = None self.__parent = None - self.__modal = modal + self.__accumNames = {} if isinstance(toklist, list): self.__toklist = toklist[:] else: self.__toklist = [toklist] self.__tokdict = dict() + # this line is related to debugging the asXML bug + #~ asList = False + if name: - if not self.__name: - self.__modal = self.__modal and modal + if not modal: + self.__accumNames[name] = 0 if isinstance(name,int): name = _ustr(name) # will always return a str, but use _ustr for consistency self.__name = name - if toklist: - if isinstance(toklist,basestring): + if not toklist in (None,'',[]): + if isinstance(toklist,basestring): toklist = [ toklist ] if asList: if isinstance(toklist,ParseResults): - self[name] = (toklist.copy(),-1) + self[name] = _ParseResultsWithOffset(toklist.copy(),-1) else: - self[name] = (ParseResults(toklist[0]),-1) + self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),-1) self[name].__name = name else: try: self[name] = toklist[0] - except TypeError: + except (KeyError,TypeError): self[name] = toklist def __getitem__( self, i ): if isinstance( i, (int,slice) ): return self.__toklist[i] else: - if self.__modal: + if i not in self.__accumNames: return self.__tokdict[i][-1][0] else: return ParseResults([ v[0] for v in self.__tokdict[i] ]) def __setitem__( self, k, v ): - if isinstance(v,tuple): + if isinstance(v,_ParseResultsWithOffset): self.__tokdict[k] = self.__tokdict.get(k,list()) + [v] sub = v[0] + elif isinstance(k,int): + self.__toklist[k] = v + sub = v else: self.__tokdict[k] = self.__tokdict.get(k,list()) + [(v,0)] sub = v if isinstance(sub,ParseResults): - sub.__parent = self - + sub.__parent = wkref(self) + def __delitem__( self, i ): - del self.__toklist[i] + if isinstance(i,(int,slice)): + del self.__toklist[i] + else: + del self._tokdict[i] def __contains__( self, k ): return self.__tokdict.has_key(k) - + def __len__( self ): return len( self.__toklist ) + def __nonzero__( self ): return len( self.__toklist ) > 0 def __iter__( self ): return iter( self.__toklist ) - def keys( self ): + def keys( self ): """Returns all named result keys.""" return self.__tokdict.keys() - - def items( self ): + + def items( self ): """Returns all named result keys and values as a list of tuples.""" - return [(k,v[-1][0]) for k,v in self.__tokdict.items()] - - def values( self ): + return [(k,self[k]) for k in self.__tokdict.keys()] + + def values( self ): """Returns all named result values.""" return [ v[-1][0] for v in self.__tokdict.values() ] def __getattr__( self, name ): if name not in self.__slots__: if self.__tokdict.has_key( name ): - if self.__modal: + if name not in self.__accumNames: return self.__tokdict[name][-1][0] else: return ParseResults([ v[0] for v in self.__tokdict[name] ]) @@ -263,19 +304,27 @@ return "" return None + def __add__( self, other ): + ret = self.copy() + ret += other + return ret + def __iadd__( self, other ): if other.__tokdict: offset = len(self.__toklist) addoffset = ( lambda a: (a<0 and offset) or (a+offset) ) - otherdictitems = [(k,(v[0],addoffset(v[1])) ) for (k,vlist) in other.__tokdict.items() for v in vlist] + otheritems = other.__tokdict.items() + otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) ) + for (k,vlist) in otheritems for v in vlist] for k,v in otherdictitems: self[k] = v if isinstance(v[0],ParseResults): - v[0].__parent = self + v[0].__parent = wkref(self) self.__toklist += other.__toklist + self.__accumNames.update( other.__accumNames ) del other return self - + def __repr__( self ): return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) ) @@ -321,45 +370,52 @@ ret = ParseResults( self.__toklist ) ret.__tokdict = self.__tokdict.copy() ret.__parent = self.__parent - ret.__modal = self.__modal + ret.__accumNames.update( self.__accumNames ) ret.__name = self.__name return ret - + def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ): """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.""" nl = "\n" out = [] - namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items() for v in vlist ] ) + namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items() + for v in vlist ] ) nextLevelIndent = indent + " " - + # collapse out indents if formatting is not desired if not formatted: indent = "" nextLevelIndent = "" nl = "" - + selfTag = None if doctag is not None: selfTag = doctag else: if self.__name: selfTag = self.__name - + if not selfTag: if namedItemsOnly: return "" else: selfTag = "ITEM" - + out += [ nl, indent, "<", selfTag, ">" ] - + worklist = self.__toklist for i,res in enumerate(worklist): if isinstance(res,ParseResults): if i in namedItems: - out += [ res.asXML(namedItems[i], namedItemsOnly and doctag is None, nextLevelIndent,formatted)] + out += [ res.asXML(namedItems[i], + namedItemsOnly and doctag is None, + nextLevelIndent, + formatted)] else: - out += [ res.asXML(None, namedItemsOnly and doctag is None, nextLevelIndent,formatted)] + out += [ res.asXML(None, + namedItemsOnly and doctag is None, + nextLevelIndent, + formatted)] else: # individual token, see if there is a name for it resTag = None @@ -370,51 +426,110 @@ continue else: resTag = "ITEM" - out += [ nl, nextLevelIndent, "<", resTag, ">", _ustr(res), "</", resTag, ">" ] - + xmlBodyText = xml.sax.saxutils.escape(_ustr(res)) + out += [ nl, nextLevelIndent, "<", resTag, ">", + xmlBodyText, + "</", resTag, ">" ] + out += [ nl, indent, "</", selfTag, ">" ] return "".join(out) - def __lookup(self,sub): for k,vlist in self.__tokdict.items(): for v,loc in vlist: if sub is v: return k return None - + def getName(self): """Returns the results name for this token expression.""" if self.__name: return self.__name elif self.__parent: - par = self.__parent + par = self.__parent() if par: return par.__lookup(self) else: return None - elif (len(self) == 1 and + elif (len(self) == 1 and len(self.__tokdict) == 1 and self.__tokdict.values()[0][0][1] in (0,-1)): return self.__tokdict.keys()[0] else: return None + + def dump(self,indent='',depth=0): + """Diagnostic method for listing out the contents of a ParseResults. + Accepts an optional indent argument so that this string can be embedded + in a nested display of other data.""" + out = [] + out.append( indent+_ustr(self.asList()) ) + keys = self.items() + keys.sort() + for k,v in keys: + if out: + out.append('\n') + out.append( "%s%s- %s: " % (indent,(' '*depth), k) ) + if isinstance(v,ParseResults): + if v.keys(): + #~ out.append('\n') + out.append( v.dump(indent,depth+1) ) + #~ out.append('\n') + else: + out.append(_ustr(v)) + else: + out.append(_ustr(v)) + #~ out.append('\n') + return "".join(out) + # add support for pickle protocol + def __getstate__(self): + return ( self.__toklist, + ( self.__tokdict.copy(), + self.__parent is not None and self.__parent() or None, + self.__accumNames, + self.__name ) ) + + def __setstate__(self,state): + self.__toklist = state[0] + self.__tokdict, \ + par, \ + inAccumNames, \ + self.__name = state[1] + self.__accumNames = {} + self.__accumNames.update(inAccumNames) + if par is not None: + self.__parent = wkref(par) + else: + self.__parent = None + + def col (loc,strg): - """Returns current column within a string, counting newlines as line separators + """Returns current column within a string, counting newlines as line separators. The first column is number 1. + + Note: the default parsing behavior is to expand tabs in the input string + before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information + on parsing strings containing <TAB>s, and suggested methods to maintain a + consistent view of the parsed string, the parse location, and line and column + positions within the parsed string. """ - return loc - strg.rfind("\n", 0, loc) + return (loc<len(strg) and strg[loc] == '\n') and 1 or loc - strg.rfind("\n", 0, loc) def lineno(loc,strg): - """Returns current line number within a string, counting newlines as line separators + """Returns current line number within a string, counting newlines as line separators. The first line is number 1. + + Note: the default parsing behavior is to expand tabs in the input string + before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information + on parsing strings containing <TAB>s, and suggested methods to maintain a + consistent view of the parsed string, the parse location, and line and column + positions within the parsed string. """ return strg.count("\n",0,loc) + 1 def line( loc, strg ): - """Returns the line of text containing loc within a string, counting newlines as line separators - The first line is number 1. + """Returns the line of text containing loc within a string, counting newlines as line separators. """ lastCR = strg.rfind("\n", 0, loc) nextCR = strg.find("\n", loc) @@ -424,13 +539,13 @@ return strg[lastCR+1:] def _defaultStartDebugAction( instring, loc, expr ): - print "Match",expr,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ) + print "Match",_ustr(expr),"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ) def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ): - print "Matched",expr,"->",toks.asList() - + print "Matched",_ustr(expr),"->",toks.asList() + def _defaultExceptionDebugAction( instring, loc, expr, exc ): - print "Exception raised:", exc + print "Exception raised:", _ustr(exc) def nullDebugAction(*args): """'Do-nothing' debug action, to suppress debugging output during parsing.""" @@ -439,37 +554,44 @@ class ParserElement(object): """Abstract base level parser element class.""" DEFAULT_WHITE_CHARS = " \n\t\r" - + def setDefaultWhitespaceChars( chars ): """Overrides the default whitespace chars """ ParserElement.DEFAULT_WHITE_CHARS = chars setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars) - + def __init__( self, savelist=False ): - self.parseAction = None + self.parseAction = list() + self.failAction = None #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall self.strRepr = None self.resultsName = None self.saveAsList = savelist self.skipWhitespace = True self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS - self.mayReturnEmpty = False + self.copyDefaultWhiteChars = True + self.mayReturnEmpty = False # used when checking for left-recursion self.keepTabs = False - self.ignoreExprs = [] + self.ignoreExprs = list() self.debug = False self.streamlined = False - self.mayIndexError = True + self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index self.errmsg = "" - self.modalResults = True - self.debugActions = ( None, None, None ) + self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all) + self.debugActions = ( None, None, None ) #custom debug actions self.re = None + self.callPreparse = True # used to avoid redundant calls to preParse + self.callDuringTry = False def copy( self ): - """Make a copy of this ParseElement. Useful for defining different parse actions + """Make a copy of this ParserElement. Useful for defining different parse actions for the same parsing pattern, using copies of the original parse element.""" cpy = copy.copy( self ) - cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS + cpy.parseAction = self.parseAction[:] + cpy.ignoreExprs = self.ignoreExprs[:] + if self.copyDefaultWhiteChars: + cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS return cpy def setName( self, name ): @@ -479,9 +601,9 @@ return self def setResultsName( self, name, listAllMatches=False ): - """Define name for referencing matching tokens as a nested attribute + """Define name for referencing matching tokens as a nested attribute of the returned parse results. - NOTE: this returns a *copy* of the original ParseElement object; + NOTE: this returns a *copy* of the original ParserElement object; this is so that the client can define a basic element, such as an integer, and reference it in multiple places with different names. """ @@ -490,19 +612,111 @@ newself.modalResults = not listAllMatches return newself - def setParseAction( self, fn ): + def normalizeParseActionArgs( f ): + """Internal method used to decorate parse actions that take fewer than 3 arguments, + so that all parse actions can be called as f(s,l,t).""" + STAR_ARGS = 4 + + try: + restore = None + if isinstance(f,type): + restore = f + f = f.__init__ + if f.func_code.co_flags & STAR_ARGS: + return f + numargs = f.func_code.co_argcount + if hasattr(f,"im_self"): + numargs -= 1 + if restore: + f = restore + except AttributeError: + try: + # not a function, must be a callable object, get info from the + # im_func binding of its bound __call__ method + if f.__call__.im_func.func_code.co_flags & STAR_ARGS: + return f + numargs = f.__call__.im_func.func_code.co_argcount + if hasattr(f.__call__,"im_self"): + numargs -= 1 + except AttributeError: + # not a bound method, get info directly from __call__ method + if f.__call__.func_code.co_flags & STAR_ARGS: + return f + numargs = f.__call__.func_code.co_argcount + if hasattr(f.__call__,"im_self"): + numargs -= 1 + + #~ print "adding function %s with %d args" % (f.func_name,numargs) + if numargs == 3: + return f + else: + if numargs == 2: + def tmp(s,l,t): + return f(l,t) + elif numargs == 1: + def tmp(s,l,t): + return f(t) + else: #~ numargs == 0: + def tmp(s,l,t): + return f() + try: + tmp.__name__ = f.__name__ + except AttributeError: + # no need for special handling if attribute doesnt exist + pass + try: + tmp.__doc__ = f.__doc__ + except AttributeError: + # no need for special handling if attribute doesnt exist + pass + try: + tmp.__dict__.update(f.__dict__) + except AttributeError: + # no need for special handling if attribute doesnt exist + pass + return tmp + normalizeParseActionArgs = staticmethod(normalizeParseActionArgs) + + def setParseAction( self, *fns, **kwargs ): """Define action to perform when successfully matching parse element definition. - Parse action fn is a callable method with the arguments (s, loc, toks) where: - - s = the original string being parsed + Parse action fn is a callable method with 0-3 arguments, called as fn(s,loc,toks), + fn(loc,toks), fn(toks), or just fn(), where: + - s = the original string being parsed (see note below) - loc = the location of the matching substring - toks = a list of the matched tokens, packaged as a ParseResults object - If the function fn modifies the tokens, it can return them as the return + If the functions in fns modify the tokens, they can return them as the return value from fn, and the modified list of tokens will replace the original. Otherwise, fn does not need to return any value. - """ - self.parseAction = fn + + Note: the default parsing behavior is to expand tabs in the input string + before starting the parsing process. See L{I{parseString}<parseString>} for more information + on parsing strings containing <TAB>s, and suggested methods to maintain a + consistent view of the parsed string, the parse location, and line and column + positions within the parsed string. + """ + self.parseAction = map(self.normalizeParseActionArgs, list(fns)) + self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"]) return self + def addParseAction( self, *fns, **kwargs ): + """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}.""" + self.parseAction += map(self.normalizeParseActionArgs, list(fns)) + self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"]) + return self + + def setFailAction( self, fn ): + """Define action to perform if parsing fails at this expression. + Fail acton fn is a callable function that takes the arguments + fn(s,loc,expr,err) where: + - s = string being parsed + - loc = location where expression match was attempted and failed + - expr = the parse expression that failed + - err = the exception thrown + The function returns no value. It may throw ParseFatalException + if it is desired to stop parsing immediately.""" + self.failAction = fn + return self + def skipIgnorables( self, instring, loc ): exprsFound = True while exprsFound: @@ -510,7 +724,7 @@ for e in self.ignoreExprs: try: while 1: - loc,dummy = e.parse( instring, loc ) + loc,dummy = e._parse( instring, loc ) exprsFound = True except ParseException: pass @@ -519,13 +733,13 @@ def preParse( self, instring, loc ): if self.ignoreExprs: loc = self.skipIgnorables( instring, loc ) - + if self.skipWhitespace: wt = self.whiteChars instrlen = len(instring) while loc < instrlen and instring[loc] in wt: loc += 1 - + return loc def parseImpl( self, instring, loc, doActions=True ): @@ -535,66 +749,70 @@ return tokenlist #~ @profile - def parse( self, instring, loc, doActions=True, callPreParse=True ): + def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ): debugging = ( self.debug ) #and doActions ) - if debugging: + if debugging or self.failAction: #~ print "Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ) if (self.debugActions[0] ): self.debugActions[0]( instring, loc, self ) - if callPreParse: - loc = self.preParse( instring, loc ) + if callPreParse and self.callPreparse: + preloc = self.preParse( instring, loc ) + else: + preloc = loc tokensStart = loc try: try: - loc,tokens = self.parseImpl( instring, loc, doActions ) + loc,tokens = self.parseImpl( instring, preloc, doActions ) except IndexError: - raise ParseException, ( instring, len(instring), self.errmsg, self ) + raise ParseException( instring, len(instring), self.errmsg, self ) except ParseException, err: #~ print "Exception raised:", err - if (self.debugActions[2] ): + if self.debugActions[2]: self.debugActions[2]( instring, tokensStart, self, err ) + if self.failAction: + self.failAction( instring, tokensStart, self, err ) raise else: if callPreParse: - loc = self.preParse( instring, loc ) + preloc = self.preParse( instring, loc ) + else: + preloc = loc tokensStart = loc if self.mayIndexError or loc >= len(instring): try: - loc,tokens = self.parseImpl( instring, loc, doActions ) + loc,tokens = self.parseImpl( instring, preloc, doActions ) except IndexError: - raise ParseException, ( instring, len(instring), self.errmsg, self ) + raise ParseException( instring, len(instring), self.errmsg, self ) else: - loc,tokens = self.parseImpl( instring, loc, doActions ) - + loc,tokens = self.parseImpl( instring, preloc, doActions ) + tokens = self.postParse( instring, loc, tokens ) retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults ) - if self.parseAction and doActions: + if self.parseAction and (doActions or self.callDuringTry): if debugging: try: - tokens = self.parseAction( instring, tokensStart, retTokens ) - if tokens is not None: - if isinstance(tokens,tuple): - tokens = tokens[1] - retTokens = ParseResults( tokens, - self.resultsName, - asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), - modal=self.modalResults ) + for fn in self.parseAction: + tokens = fn( instring, tokensStart, retTokens ) + if tokens is not None: + retTokens = ParseResults( tokens, + self.resultsName, + asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), + modal=self.modalResults ) except ParseException, err: #~ print "Exception raised in user parse action:", err if (self.debugActions[2] ): self.debugActions[2]( instring, tokensStart, self, err ) raise else: - tokens = self.parseAction( instring, tokensStart, retTokens ) - if tokens is not None: - if isinstance(tokens,tuple): - tokens = tokens[1] - retTokens = ParseResults( tokens, - self.resultsName, - asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), - modal=self.modalResults ) + for fn in self.parseAction: + tokens = fn( instring, tokensStart, retTokens ) + if tokens is not None: + retTokens = ParseResults( tokens, + self.resultsName, + asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), + modal=self.modalResults ) if debugging: #~ print "Matched",self,"->",retTokens.asList() @@ -604,53 +822,128 @@ return loc, retTokens def tryParse( self, instring, loc ): - return self.parse( instring, loc, doActions=False )[0] + return self._parse( instring, loc, doActions=False )[0] + + # this method gets repeatedly called during backtracking with the same arguments - + # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression + def _parseCache( self, instring, loc, doActions=True, callPreParse=True ): + #if doActions and self.parseAction: + # return self._parseNoCache( instring, loc, doActions, callPreParse ) + lookup = (self,instring,loc,callPreParse,doActions) + if lookup in ParserElement._exprArgCache: + value = ParserElement._exprArgCache[ lookup ] + if isinstance(value,Exception): + if isinstance(value,ParseBaseException): + value.loc = loc + raise value + return value + else: + try: + ParserElement._exprArgCache[ lookup ] = \ + value = self._parseNoCache( instring, loc, doActions, callPreParse ) + return value + except ParseBaseException, pe: + ParserElement._exprArgCache[ lookup ] = pe + raise + _parse = _parseNoCache + + # argument cache for optimizing repeated calls when backtracking through recursive expressions + _exprArgCache = {} + def resetCache(): + ParserElement._exprArgCache.clear() + resetCache = staticmethod(resetCache) + + _packratEnabled = False + def enablePackrat(): + """Enables "packrat" parsing, which adds memoizing to the parsing logic. + Repeated parse attempts at the same string location (which happens + often in many complex grammars) can immediately return a cached value, + instead of re-executing parsing/validating code. Memoizing is done of + both valid results and parsing exceptions. + + This speedup may break existing programs that use parse actions that + have side-effects. For this reason, packrat parsing is disabled when + you first import pyparsing. To activate the packrat feature, your + program must call the class method ParserElement.enablePackrat(). If + your program uses psyco to "compile as you go", you must call + enablePackrat before calling psyco.full(). If you do not do this, + Python will crash. For best results, call enablePackrat() immediately + after importing pyparsing. + """ + if not ParserElement._packratEnabled: + ParserElement._packratEnabled = True + ParserElement._parse = ParserElement._parseCache + enablePackrat = staticmethod(enablePackrat) + def parseString( self, instring ): """Execute the parse expression with the given string. - This is the main interface to the client code, once the complete + This is the main interface to the client code, once the complete expression has been built. + + Note: parseString implicitly calls expandtabs() on the input string, + in order to report proper column numbers in parse actions. + If the input string contains tabs and + the grammar uses parse actions that use the loc argument to index into the + string being parsed, you can ensure you have a consistent view of the input + string by: + - calling parseWithTabs on your grammar before calling parseString + (see L{I{parseWithTabs}<parseWithTabs>}) + - define your parse action using the full (s,loc,toks) signature, and + reference the input string using the parse action's s argument + - explictly expand the tabs in your input string before calling + parseString """ + ParserElement.resetCache() if not self.streamlined: self.streamline() - self.saveAsList = True + #~ self.saveAsList = True for e in self.ignoreExprs: e.streamline() if self.keepTabs: - loc, tokens = self.parse( instring, 0 ) + loc, tokens = self._parse( instring, 0 ) else: - loc, tokens = self.parse( instring.expandtabs(), 0 ) + loc, tokens = self._parse( instring.expandtabs(), 0 ) return tokens - def scanString( self, instring ): - """Scan the input string for expression matches. Each match will return the matching tokens, start location, and end location.""" + def scanString( self, instring, maxMatches=sys.maxint ): + """Scan the input string for expression matches. Each match will return the + matching tokens, start location, and end location. May be called with optional + maxMatches argument, to clip scanning after 'n' matches are found. + + Note that the start and end locations are reported relative to the string + being parsed. See L{I{parseString}<parseString>} for more information on parsing + strings with embedded tabs.""" if not self.streamlined: self.streamline() for e in self.ignoreExprs: e.streamline() - + if not self.keepTabs: - instring = instring.expandtabs() + instring = _ustr(instring).expandtabs() instrlen = len(instring) loc = 0 preparseFn = self.preParse - parseFn = self.parse - while loc < instrlen: + parseFn = self._parse + ParserElement.resetCache() + matches = 0 + while loc <= instrlen and matches < maxMatches: try: - loc = preparseFn( instring, loc ) - nextLoc,tokens = parseFn( instring, loc, callPreParse=False ) + preloc = preparseFn( instring, loc ) + nextLoc,tokens = parseFn( instring, preloc, callPreParse=False ) except ParseException: - loc += 1 + loc = preloc+1 else: - yield tokens, loc, nextLoc + matches += 1 + yield tokens, preloc, nextLoc loc = nextLoc - + def transformString( self, instring ): """Extension to scanString, to modify matching text with modified tokens that may - be returned from a parse action. To use transformString, define a grammar and - attach a parse action to it that modifies the returned token list. - Invoking transformString() on a target string will then scan for matches, - and replace the matched text patterns according to the logic in the parse + be returned from a parse action. To use transformString, define a grammar and + attach a parse action to it that modifies the returned token list. + Invoking transformString() on a target string will then scan for matches, + and replace the matched text patterns according to the logic in the parse action. transformString() returns the resulting transformed string.""" out = [] lastE = 0 @@ -668,54 +961,85 @@ out.append(t) lastE = e out.append(instring[lastE:]) - return "".join(out) + return "".join(map(_ustr,out)) + def searchString( self, instring, maxMatches=sys.maxint ): + """Another extension to scanString, simplifying the access to the tokens found + to match the given parse expression. May be called with optional + maxMatches argument, to clip searching after 'n' matches are found. + """ + return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ]) + def __add__(self, other ): """Implementation of + operator - returns And""" if isinstance( other, basestring ): other = Literal( other ) + if not isinstance( other, ParserElement ): + warnings.warn("Cannot add element of type %s to ParserElement" % type(other), + SyntaxWarning, stacklevel=2) return And( [ self, other ] ) def __radd__(self, other ): """Implementation of += operator""" if isinstance( other, basestring ): other = Literal( other ) + if not isinstance( other, ParserElement ): + warnings.warn("Cannot add element of type %s to ParserElement" % type(other), + SyntaxWarning, stacklevel=2) return other + self def __or__(self, other ): """Implementation of | operator - returns MatchFirst""" if isinstance( other, basestring ): other = Literal( other ) + if not isinstance( other, ParserElement ): + warnings.warn("Cannot add element of type %s to ParserElement" % type(other), + SyntaxWarning, stacklevel=2) return MatchFirst( [ self, other ] ) def __ror__(self, other ): """Implementation of |= operator""" if isinstance( other, basestring ): other = Literal( other ) + if not isinstance( other, ParserElement ): + warnings.warn("Cannot add element of type %s to ParserElement" % type(other), + SyntaxWarning, stacklevel=2) return other | self def __xor__(self, other ): """Implementation of ^ operator - returns Or""" if isinstance( other, basestring ): other = Literal( other ) + if not isinstance( other, ParserElement ): + warnings.warn("Cannot add element of type %s to ParserElement" % type(other), + SyntaxWarning, stacklevel=2) return Or( [ self, other ] ) def __rxor__(self, other ): """Implementation of ^= operator""" if isinstance( other, basestring ): other = Literal( other ) + if not isinstance( other, ParserElement ): + warnings.warn("Cannot add element of type %s to ParserElement" % type(other), + SyntaxWarning, stacklevel=2) return other ^ self def __and__(self, other ): """Implementation of & operator - returns Each""" if isinstance( other, basestring ): other = Literal( other ) + if not isinstance( other, ParserElement ): + warnings.warn("Cannot add element of type %s to ParserElement" % type(other), + SyntaxWarning, stacklevel=2) return Each( [ self, other ] ) def __rand__(self, other ): """Implementation of right-& operator""" if isinstance( other, basestring ): other = Literal( other ) + if not isinstance( other, ParserElement ): + warnings.warn("Cannot add element of type %s to ParserElement" % type(other), + SyntaxWarning, stacklevel=2) return other & self def __invert__( self ): @@ -723,13 +1047,13 @@ return NotAny( self ) def suppress( self ): - """Suppresses the output of this ParseElement; useful to keep punctuation from + """Suppresses the output of this ParserElement; useful to keep punctuation from cluttering up returned output. """ return Suppress( self ) def leaveWhitespace( self ): - """Disables the skipping of whitespace before matching the characters in the + """Disables the skipping of whitespace before matching the characters in the ParserElement's defined pattern. This is normally only used internally by the pyparsing module, but may be needed in some whitespace-sensitive grammars. """ @@ -741,16 +1065,18 @@ """ self.skipWhitespace = True self.whiteChars = chars - + self.copyDefaultWhiteChars = False + return self + def parseWithTabs( self ): """Overrides default behavior to expand <TAB>s to spaces before parsing the input string. - Must be called before parseString when the input grammar contains elements that + Must be called before parseString when the input grammar contains elements that match <TAB> characters.""" self.keepTabs = True return self - + def ignore( self, other ): - """Define expression to be ignored (e.g., comments) while doing pattern + """Define expression to be ignored (e.g., comments) while doing pattern matching; may be called repeatedly, to define multiple comment or other ignorable patterns. """ @@ -763,8 +1089,8 @@ def setDebugActions( self, startAction, successAction, exceptionAction ): """Enable display of debugging messages while doing pattern matching.""" - self.debugActions = (startAction or _defaultStartDebugAction, - successAction or _defaultSuccessDebugAction, + self.debugActions = (startAction or _defaultStartDebugAction, + successAction or _defaultSuccessDebugAction, exceptionAction or _defaultExceptionDebugAction) self.debug = True return self @@ -782,15 +1108,15 @@ def __repr__( self ): return _ustr(self) - + def streamline( self ): self.streamlined = True self.strRepr = None return self - + def checkRecursion( self, parseElementList ): pass - + def validate( self, validateTrace=[] ): """Check defined expressions for valid structure, check for infinite recursive definitions.""" self.checkRecursion( [] ) @@ -840,7 +1166,7 @@ self.mayIndexError = False self.errmsg = "Unmatchable token" self.myException.msg = self.errmsg - + def parseImpl( self, instring, loc, doActions=True ): exc = self.myException exc.loc = loc @@ -857,9 +1183,10 @@ try: self.firstMatchChar = matchString[0] except IndexError: - warnings.warn("null string passed to Literal; use Empty() instead", + warnings.warn("null string passed to Literal; use Empty() instead", SyntaxWarning, stacklevel=2) - self.name = '"%s"' % self.match + self.__class__ = Empty + self.name = '"%s"' % _ustr(self.match) self.errmsg = "Expected " + self.name self.mayReturnEmpty = False self.myException.msg = self.errmsg @@ -873,14 +1200,14 @@ if (instring[loc] == self.firstMatchChar and (self.matchLen==1 or instring.startswith(self.match,loc)) ): return loc+self.matchLen, self.match - #~ raise ParseException, ( instring, loc, self.errmsg ) + #~ raise ParseException( instring, loc, self.errmsg ) exc = self.myException exc.loc = loc exc.pstr = instring raise exc class Keyword(Token): - """Token to exactly match a specified string as a keyword, that is, it must be + """Token to exactly match a specified string as a keyword, that is, it must be immediately followed by a non-keyword character. Compare with Literal:: Literal("if") will match the leading 'if' in 'ifAndOnlyIf'. Keyword("if") will not; it will only match the leading 'if in 'if x=1', or 'if(y==2)' @@ -890,7 +1217,7 @@ matching, default is False. """ DEFAULT_KEYWORD_CHARS = alphanums+"_$" - + def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ): super(Keyword,self).__init__() self.match = matchString @@ -898,7 +1225,7 @@ try: self.firstMatchChar = matchString[0] except IndexError: - warnings.warn("null string passed to Keyword; use Empty() instead", + warnings.warn("null string passed to Keyword; use Empty() instead", SyntaxWarning, stacklevel=2) self.name = '"%s"' % self.match self.errmsg = "Expected " + self.name @@ -914,29 +1241,31 @@ def parseImpl( self, instring, loc, doActions=True ): if self.caseless: if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and - (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ): + (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and + (loc == 0 or instring[loc-1].upper() not in self.identChars) ): return loc+self.matchLen, self.match else: if (instring[loc] == self.firstMatchChar and (self.matchLen==1 or instring.startswith(self.match,loc)) and - (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) ): + (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and + (loc == 0 or instring[loc-1] not in self.identChars) ): return loc+self.matchLen, self.match - #~ raise ParseException, ( instring, loc, self.errmsg ) + #~ raise ParseException( instring, loc, self.errmsg ) exc = self.myException exc.loc = loc exc.pstr = instring raise exc - + def copy(self): c = super(Keyword,self).copy() c.identChars = Keyword.DEFAULT_KEYWORD_CHARS return c - + def setDefaultKeywordChars( chars ): """Overrides the default Keyword chars """ Keyword.DEFAULT_KEYWORD_CHARS = chars - setDefaultKeywordChars = staticmethod(setDefaultKeywordChars) + setDefaultKeywordChars = staticmethod(setDefaultKeywordChars) class CaselessLiteral(Literal): @@ -955,7 +1284,7 @@ def parseImpl( self, instring, loc, doActions=True ): if instring[ loc:loc+self.matchLen ].upper() == self.match: return loc+self.matchLen, self.returnString - #~ raise ParseException, ( instring, loc, self.errmsg ) + #~ raise ParseException( instring, loc, self.errmsg ) exc = self.myException exc.loc = loc exc.pstr = instring @@ -963,13 +1292,13 @@ class CaselessKeyword(Keyword): def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ): - super(CaselessKeyword,self).__init__( matchString, identCars, caseless=True ) + super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True ) def parseImpl( self, instring, loc, doActions=True ): if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ): return loc+self.matchLen, self.match - #~ raise ParseException, ( instring, loc, self.errmsg ) + #~ raise ParseException( instring, loc, self.errmsg ) exc = self.myException exc.loc = loc exc.pstr = instring @@ -982,7 +1311,7 @@ defaults to the initial character set), and an optional minimum, maximum, and/or exact length. """ - def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0 ): + def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False ): super(Word,self).__init__() self.initCharsOrig = initChars self.initChars = _str2dict(initChars) @@ -992,7 +1321,7 @@ else: self.bodyCharsOrig = initChars self.bodyChars = _str2dict(initChars) - + self.maxSpecified = max > 0 self.minLen = min @@ -1010,20 +1339,26 @@ self.errmsg = "Expected " + self.name self.myException.msg = self.errmsg self.mayIndexError = False - + self.asKeyword = asKeyword + if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0): if self.bodyCharsOrig == self.initCharsOrig: self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig) elif len(self.bodyCharsOrig) == 1: self.reString = "%s[%s]*" % \ - (_escapeRegexChars(self.initCharsOrig), + (re.escape(self.initCharsOrig), _escapeRegexRangeChars(self.bodyCharsOrig),) else: self.reString = "[%s][%s]*" % \ (_escapeRegexRangeChars(self.initCharsOrig), _escapeRegexRangeChars(self.bodyCharsOrig),) - self.re = re.compile( self.reString ) - + if self.asKeyword: + self.reString = r"\b"+self.reString+r"\b" + try: + self.re = re.compile( self.reString ) + except: + self.re = None + def parseImpl( self, instring, loc, d... [truncated message content] |