From: <md...@us...> - 2008-06-02 18:28:13
|
Revision: 5365 http://matplotlib.svn.sourceforge.net/matplotlib/?rev=5365&view=rev Author: mdboom Date: 2008-06-02 11:26:43 -0700 (Mon, 02 Jun 2008) Log Message: ----------- Include our own XML escaping function for micro-Python installs that don't include xml.sax (such as that on the maemo platform) Modified Paths: -------------- trunk/matplotlib/lib/matplotlib/pyparsing.py Modified: trunk/matplotlib/lib/matplotlib/pyparsing.py =================================================================== --- trunk/matplotlib/lib/matplotlib/pyparsing.py 2008-06-02 17:57:28 UTC (rev 5364) +++ trunk/matplotlib/lib/matplotlib/pyparsing.py 2008-06-02 18:26:43 UTC (rev 5365) @@ -27,18 +27,18 @@ """ pyparsing module - Classes and methods to define and execute parsing grammars -The pyparsing module is an alternative approach to creating and executing simple grammars, +The pyparsing module is an alternative approach to creating and executing simple grammars, vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you -don't need to learn a new syntax for defining grammars or matching expressions - the parsing module +don't need to learn a new syntax for defining grammars or matching expressions - the parsing module provides a library of classes that you use to construct the grammar directly in Python. Here is a program to parse "Hello, World!" (or any greeting of the form "<salutation>, <addressee>!"):: from pyparsing import Word, alphas - + # define grammar of a greeting - greet = Word( alphas ) + "," + Word( alphas ) + "!" - + greet = Word( alphas ) + "," + Word( alphas ) + "!" + hello = "Hello, World!" print hello, "->", greet.parseString( hello ) @@ -46,10 +46,10 @@ Hello, World! -> ['Hello', ',', 'World', '!'] -The Python representation of the grammar is quite readable, owing to the self-explanatory +The Python representation of the grammar is quite readable, owing to the self-explanatory class names, and the use of '+', '|' and '^' operators. -The parsed results returned from parseString() can be accessed as a nested list, a dictionary, or an +The parsed results returned from parseString() can be accessed as a nested list, a dictionary, or an object with named attributes. The pyparsing module handles some of the problems that are typically vexing when writing text parsers: @@ -68,7 +68,25 @@ import warnings import re import sre_constants -import xml.sax.saxutils +try: + from xml.sax.saxutils import escape as xml_escape +except ImportError: + def xml_escape(data, entities={}): + """Escape &, <, and > in a string of data. + + You can escape other strings of data by passing a dictionary as + the optional entities parameter. The keys and values must all be + strings; each key will be replaced with its corresponding value. + """ + + # must do ampersand first + data = data.replace("&", "&") + data = data.replace(">", ">") + data = data.replace("<", "<") + if entities: + data = __dict_replace(data, entities) + return data + #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) ) def _ustr(obj): @@ -80,7 +98,7 @@ # If this works, then _ustr(obj) has the same behaviour as str(obj), so # it won't break any existing code. return str(obj) - + except UnicodeEncodeError, e: # The Python docs (http://docs.python.org/ref/customization.html#l2h-182) # state that "The return value must be a string object". However, does a @@ -101,17 +119,17 @@ class _Constants(object): pass - + alphas = string.lowercase + string.uppercase nums = string.digits hexnums = nums + "ABCDEFabcdef" -alphanums = alphas + nums +alphanums = alphas + nums class ParseBaseException(Exception): """base exception class for all parsing runtime exceptions""" __slots__ = ( "loc","msg","pstr","parserElement" ) # Performance tuning: we construct a *lot* of these, so keep this - # constructor as small and fast as possible + # constructor as small and fast as possible def __init__( self, pstr, loc=0, msg=None, elem=None ): self.loc = loc if msg is None: @@ -143,7 +161,7 @@ def __repr__( self ): return _ustr(self) def markInputline( self, markerString = ">!<" ): - """Extracts the exception line from the input string, and marks + """Extracts the exception line from the input string, and marks the location of the exception with a special symbol. """ line_str = self.line @@ -161,7 +179,7 @@ - line - returns the line containing the exception text """ pass - + class ParseFatalException(ParseBaseException): """user-throwable exception thrown when inconsistent parse content is found; stops all parsing immediately""" @@ -184,7 +202,7 @@ """exception thrown by validate() if the grammar could be improperly recursive""" def __init__( self, parseElementList ): self.parseElementTrace = parseElementList - + def __str__( self ): return "RecursiveGrammarException: %s" % self.parseElementTrace @@ -209,7 +227,7 @@ retobj = object.__new__(cls) retobj.__doinit = True return retobj - + # Performance tuning: we construct a *lot* of these, so keep this # constructor as small and fast as possible def __init__( self, toklist, name=None, asList=True, modal=True ): @@ -226,7 +244,7 @@ # this line is related to debugging the asXML bug #~ asList = False - + if name: if not modal: self.__accumNames[name] = 0 @@ -234,7 +252,7 @@ name = _ustr(name) # will always return a str, but use _ustr for consistency self.__name = name if not toklist in (None,'',[]): - if isinstance(toklist,basestring): + if isinstance(toklist,basestring): toklist = [ toklist ] if asList: if isinstance(toklist,ParseResults): @@ -269,7 +287,7 @@ sub = v if isinstance(sub,ParseResults): sub.__parent = wkref(self) - + def __delitem__( self, i ): if isinstance(i,(int,slice)): del self.__toklist[i] @@ -278,20 +296,20 @@ def __contains__( self, k ): return self.__tokdict.has_key(k) - + def __len__( self ): return len( self.__toklist ) def __bool__(self): return len( self.__toklist ) > 0 def __nonzero__( self ): return self.__bool__() def __iter__( self ): return iter( self.__toklist ) - def keys( self ): + def keys( self ): """Returns all named result keys.""" return self.__tokdict.keys() - - def items( self ): + + def items( self ): """Returns all named result keys and values as a list of tuples.""" return [(k,self[k]) for k in self.__tokdict.keys()] - - def values( self ): + + def values( self ): """Returns all named result values.""" return [ v[-1][0] for v in self.__tokdict.values() ] @@ -310,7 +328,7 @@ ret = self.copy() ret += other return ret - + def __iadd__( self, other ): if other.__tokdict: offset = len(self.__toklist) @@ -326,7 +344,7 @@ self.__accumNames.update( other.__accumNames ) del other return self - + def __repr__( self ): return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) ) @@ -375,7 +393,7 @@ ret.__accumNames.update( self.__accumNames ) ret.__name = self.__name return ret - + def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ): """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.""" nl = "\n" @@ -383,28 +401,28 @@ namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items() for v in vlist ] ) nextLevelIndent = indent + " " - + # collapse out indents if formatting is not desired if not formatted: indent = "" nextLevelIndent = "" nl = "" - + selfTag = None if doctag is not None: selfTag = doctag else: if self.__name: selfTag = self.__name - + if not selfTag: if namedItemsOnly: return "" else: selfTag = "ITEM" - + out += [ nl, indent, "<", selfTag, ">" ] - + worklist = self.__toklist for i,res in enumerate(worklist): if isinstance(res,ParseResults): @@ -428,11 +446,11 @@ continue else: resTag = "ITEM" - xmlBodyText = xml.sax.saxutils.escape(_ustr(res)) + xmlBodyText = xml_escape(_ustr(res)) out += [ nl, nextLevelIndent, "<", resTag, ">", xmlBodyText, "</", resTag, ">" ] - + out += [ nl, indent, "</", selfTag, ">" ] return "".join(out) @@ -442,7 +460,7 @@ if sub is v: return k return None - + def getName(self): """Returns the results name for this token expression.""" if self.__name: @@ -453,13 +471,13 @@ return par.__lookup(self) else: return None - elif (len(self) == 1 and + elif (len(self) == 1 and len(self.__tokdict) == 1 and self.__tokdict.values()[0][0][1] in (0,-1)): return self.__tokdict.keys()[0] else: return None - + def dump(self,indent='',depth=0): """Diagnostic method for listing out the contents of a ParseResults. Accepts an optional indent argument so that this string can be embedded @@ -491,7 +509,7 @@ self.__parent is not None and self.__parent() or None, self.__accumNames, self.__name ) ) - + def __setstate__(self,state): self.__toklist = state[0] self.__tokdict, \ @@ -521,7 +539,7 @@ def lineno(loc,strg): """Returns current line number within a string, counting newlines as line separators. The first line is number 1. - + Note: the default parsing behavior is to expand tabs in the input string before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information on parsing strings containing <TAB>s, and suggested methods to maintain a @@ -545,7 +563,7 @@ def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ): print "Matched",_ustr(expr),"->",toks.asList() - + def _defaultExceptionDebugAction( instring, loc, expr, exc ): print "Exception raised:", _ustr(exc) @@ -556,13 +574,13 @@ class ParserElement(object): """Abstract base level parser element class.""" DEFAULT_WHITE_CHARS = " \n\t\r" - + def setDefaultWhitespaceChars( chars ): """Overrides the default whitespace chars """ ParserElement.DEFAULT_WHITE_CHARS = chars setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars) - + def __init__( self, savelist=False ): self.parseAction = list() self.failAction = None @@ -605,7 +623,7 @@ return self def setResultsName( self, name, listAllMatches=False ): - """Define name for referencing matching tokens as a nested attribute + """Define name for referencing matching tokens as a nested attribute of the returned parse results. NOTE: this returns a *copy* of the original ParserElement object; this is so that the client can define a basic element, such as an @@ -698,7 +716,7 @@ pass return tmp normalizeParseActionArgs = staticmethod(normalizeParseActionArgs) - + def setParseAction( self, *fns, **kwargs ): """Define action to perform when successfully matching parse element definition. Parse action fn is a callable method with 0-3 arguments, called as fn(s,loc,toks), @@ -709,7 +727,7 @@ If the functions in fns modify the tokens, they can return them as the return value from fn, and the modified list of tokens will replace the original. Otherwise, fn does not need to return any value. - + Note: the default parsing behavior is to expand tabs in the input string before starting the parsing process. See L{I{parseString}<parseString>} for more information on parsing strings containing <TAB>s, and suggested methods to maintain a @@ -727,8 +745,8 @@ return self def setFailAction( self, fn ): - """Define action to perform if parsing fails at this expression. - Fail acton fn is a callable function that takes the arguments + """Define action to perform if parsing fails at this expression. + Fail acton fn is a callable function that takes the arguments fn(s,loc,expr,err) where: - s = string being parsed - loc = location where expression match was attempted and failed @@ -738,7 +756,7 @@ if it is desired to stop parsing immediately.""" self.failAction = fn return self - + def skipIgnorables( self, instring, loc ): exprsFound = True while exprsFound: @@ -755,13 +773,13 @@ def preParse( self, instring, loc ): if self.ignoreExprs: loc = self.skipIgnorables( instring, loc ) - + if self.skipWhitespace: wt = self.whiteChars instrlen = len(instring) while loc < instrlen and instring[loc] in wt: loc += 1 - + return loc def parseImpl( self, instring, loc, doActions=True ): @@ -808,7 +826,7 @@ raise ParseException( instring, len(instring), self.errmsg, self ) else: loc,tokens = self.parseImpl( instring, preloc, doActions ) - + tokens = self.postParse( instring, loc, tokens ) retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults ) @@ -818,9 +836,9 @@ for fn in self.parseAction: tokens = fn( instring, tokensStart, retTokens ) if tokens is not None: - retTokens = ParseResults( tokens, - self.resultsName, - asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), + retTokens = ParseResults( tokens, + self.resultsName, + asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), modal=self.modalResults ) except ParseException, err: #~ print "Exception raised in user parse action:", err @@ -831,9 +849,9 @@ for fn in self.parseAction: tokens = fn( instring, tokensStart, retTokens ) if tokens is not None: - retTokens = ParseResults( tokens, - self.resultsName, - asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), + retTokens = ParseResults( tokens, + self.resultsName, + asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), modal=self.modalResults ) if debugging: @@ -845,7 +863,7 @@ def tryParse( self, instring, loc ): return self._parse( instring, loc, doActions=False )[0] - + # this method gets repeatedly called during backtracking with the same arguments - # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression def _parseCache( self, instring, loc, doActions=True, callPreParse=True ): @@ -873,20 +891,20 @@ def resetCache(): ParserElement._exprArgCache.clear() resetCache = staticmethod(resetCache) - + _packratEnabled = False def enablePackrat(): """Enables "packrat" parsing, which adds memoizing to the parsing logic. - Repeated parse attempts at the same string location (which happens - often in many complex grammars) can immediately return a cached value, + Repeated parse attempts at the same string location (which happens + often in many complex grammars) can immediately return a cached value, instead of re-executing parsing/validating code. Memoizing is done of both valid results and parsing exceptions. - - This speedup may break existing programs that use parse actions that + + This speedup may break existing programs that use parse actions that have side-effects. For this reason, packrat parsing is disabled when you first import pyparsing. To activate the packrat feature, your program must call the class method ParserElement.enablePackrat(). If - your program uses psyco to "compile as you go", you must call + your program uses psyco to "compile as you go", you must call enablePackrat before calling psyco.full(). If you do not do this, Python will crash. For best results, call enablePackrat() immediately after importing pyparsing. @@ -898,21 +916,21 @@ def parseString( self, instring ): """Execute the parse expression with the given string. - This is the main interface to the client code, once the complete + This is the main interface to the client code, once the complete expression has been built. - + Note: parseString implicitly calls expandtabs() on the input string, - in order to report proper column numbers in parse actions. + in order to report proper column numbers in parse actions. If the input string contains tabs and - the grammar uses parse actions that use the loc argument to index into the - string being parsed, you can ensure you have a consistent view of the input + the grammar uses parse actions that use the loc argument to index into the + string being parsed, you can ensure you have a consistent view of the input string by: - calling parseWithTabs on your grammar before calling parseString (see L{I{parseWithTabs}<parseWithTabs>}) - - define your parse action using the full (s,loc,toks) signature, and + - define your parse action using the full (s,loc,toks) signature, and reference the input string using the parse action's s argument - - explictly expand the tabs in your input string before calling - parseString + - explictly expand the tabs in your input string before calling + parseString """ ParserElement.resetCache() if not self.streamlined: @@ -927,18 +945,18 @@ return tokens def scanString( self, instring, maxMatches=sys.maxint ): - """Scan the input string for expression matches. Each match will return the + """Scan the input string for expression matches. Each match will return the matching tokens, start location, and end location. May be called with optional maxMatches argument, to clip scanning after 'n' matches are found. - + Note that the start and end locations are reported relative to the string - being parsed. See L{I{parseString}<parseString>} for more information on parsing + being parsed. See L{I{parseString}<parseString>} for more information on parsing strings with embedded tabs.""" if not self.streamlined: self.streamline() for e in self.ignoreExprs: e.streamline() - + if not self.keepTabs: instring = _ustr(instring).expandtabs() instrlen = len(instring) @@ -957,13 +975,13 @@ matches += 1 yield tokens, preloc, nextLoc loc = nextLoc - + def transformString( self, instring ): """Extension to scanString, to modify matching text with modified tokens that may - be returned from a parse action. To use transformString, define a grammar and - attach a parse action to it that modifies the returned token list. - Invoking transformString() on a target string will then scan for matches, - and replace the matched text patterns according to the logic in the parse + be returned from a parse action. To use transformString, define a grammar and + attach a parse action to it that modifies the returned token list. + Invoking transformString() on a target string will then scan for matches, + and replace the matched text patterns according to the logic in the parse action. transformString() returns the resulting transformed string.""" out = [] lastE = 0 @@ -989,7 +1007,7 @@ maxMatches argument, to clip searching after 'n' matches are found. """ return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ]) - + def __add__(self, other ): """Implementation of + operator - returns And""" if isinstance( other, basestring ): @@ -1081,7 +1099,7 @@ return Suppress( self ) def leaveWhitespace( self ): - """Disables the skipping of whitespace before matching the characters in the + """Disables the skipping of whitespace before matching the characters in the ParserElement's defined pattern. This is normally only used internally by the pyparsing module, but may be needed in some whitespace-sensitive grammars. """ @@ -1095,16 +1113,16 @@ self.whiteChars = chars self.copyDefaultWhiteChars = False return self - + def parseWithTabs( self ): """Overrides default behavior to expand <TAB>s to spaces before parsing the input string. - Must be called before parseString when the input grammar contains elements that + Must be called before parseString when the input grammar contains elements that match <TAB> characters.""" self.keepTabs = True return self - + def ignore( self, other ): - """Define expression to be ignored (e.g., comments) while doing pattern + """Define expression to be ignored (e.g., comments) while doing pattern matching; may be called repeatedly, to define multiple comment or other ignorable patterns. """ @@ -1117,8 +1135,8 @@ def setDebugActions( self, startAction, successAction, exceptionAction ): """Enable display of debugging messages while doing pattern matching.""" - self.debugActions = (startAction or _defaultStartDebugAction, - successAction or _defaultSuccessDebugAction, + self.debugActions = (startAction or _defaultStartDebugAction, + successAction or _defaultSuccessDebugAction, exceptionAction or _defaultExceptionDebugAction) self.debug = True return self @@ -1137,15 +1155,15 @@ def __repr__( self ): return _ustr(self) - + def streamline( self ): self.streamlined = True self.strRepr = None return self - + def checkRecursion( self, parseElementList ): pass - + def validate( self, validateTrace=[] ): """Check defined expressions for valid structure, check for infinite recursive definitions.""" self.checkRecursion( [] ) @@ -1165,7 +1183,7 @@ def getException(self): return ParseException("",0,self.errmsg,self) - + def __getattr__(self,aname): if aname == "myException": self.myException = ret = self.getException(); @@ -1184,8 +1202,8 @@ self.errmsg = "Expected " + self.name #s.myException.msg = self.errmsg return s - + class Empty(Token): """An empty token, will always match.""" def __init__( self ): @@ -1204,7 +1222,7 @@ self.mayIndexError = False self.errmsg = "Unmatchable token" #self.myException.msg = self.errmsg - + def parseImpl( self, instring, loc, doActions=True ): exc = self.myException exc.loc = loc @@ -1221,7 +1239,7 @@ try: self.firstMatchChar = matchString[0] except IndexError: - warnings.warn("null string passed to Literal; use Empty() instead", + warnings.warn("null string passed to Literal; use Empty() instead", SyntaxWarning, stacklevel=2) self.__class__ = Empty self.name = '"%s"' % _ustr(self.match) @@ -1245,7 +1263,7 @@ raise exc class Keyword(Token): - """Token to exactly match a specified string as a keyword, that is, it must be + """Token to exactly match a specified string as a keyword, that is, it must be immediately followed by a non-keyword character. Compare with Literal:: Literal("if") will match the leading 'if' in 'ifAndOnlyIf'. Keyword("if") will not; it will only match the leading 'if in 'if x=1', or 'if(y==2)' @@ -1255,7 +1273,7 @@ matching, default is False. """ DEFAULT_KEYWORD_CHARS = alphanums+"_$" - + def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ): super(Keyword,self).__init__() self.match = matchString @@ -1263,7 +1281,7 @@ try: self.firstMatchChar = matchString[0] except IndexError: - warnings.warn("null string passed to Keyword; use Empty() instead", + warnings.warn("null string passed to Keyword; use Empty() instead", SyntaxWarning, stacklevel=2) self.name = '"%s"' % self.match self.errmsg = "Expected " + self.name @@ -1293,17 +1311,17 @@ exc.loc = loc exc.pstr = instring raise exc - + def copy(self): c = super(Keyword,self).copy() c.identChars = Keyword.DEFAULT_KEYWORD_CHARS return c - + def setDefaultKeywordChars( chars ): """Overrides the default Keyword chars """ Keyword.DEFAULT_KEYWORD_CHARS = chars - setDefaultKeywordChars = staticmethod(setDefaultKeywordChars) + setDefaultKeywordChars = staticmethod(setDefaultKeywordChars) class CaselessLiteral(Literal): @@ -1361,9 +1379,9 @@ else: self.bodyCharsOrig = initChars self.bodyChars = _str2dict(initChars) - + self.maxSpecified = max > 0 - + if min < 1: raise ValueError, "cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted" @@ -1383,7 +1401,7 @@ #self.myException.msg = self.errmsg self.mayIndexError = False self.asKeyword = asKeyword - + if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0): if self.bodyCharsOrig == self.initCharsOrig: self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig) @@ -1401,7 +1419,7 @@ self.re = re.compile( self.reString ) except: self.re = None - + def parseImpl( self, instring, loc, doActions=True ): if self.re: result = self.re.match(instring,loc) @@ -1410,10 +1428,10 @@ exc.loc = loc exc.pstr = instring raise exc - + loc = result.end() return loc,result.group() - + if not(instring[ loc ] in self.initChars): #~ raise ParseException( instring, loc, self.errmsg ) exc = self.myException @@ -1428,7 +1446,7 @@ maxloc = min( maxloc, instrlen ) while loc < maxloc and instring[loc] in bodychars: loc += 1 - + throwException = False if loc - start < self.minLen: throwException = True @@ -1453,15 +1471,15 @@ except: pass - + if self.strRepr is None: - + def charsAsStr(s): if len(s)>4: return s[:4]+"..." else: return s - + if ( self.initCharsOrig != self.bodyCharsOrig ): self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) ) else: @@ -1477,19 +1495,19 @@ def __init__( self, pattern, flags=0): """The parameters pattern and flags are passed to the re.compile() function as-is. See the Python re module for an explanation of the acceptable patterns and flags.""" super(Regex,self).__init__() - + if len(pattern) == 0: - warnings.warn("null string passed to Regex; use Empty() instead", + warnings.warn("null string passed to Regex; use Empty() instead", SyntaxWarning, stacklevel=2) - + self.pattern = pattern self.flags = flags - + try: self.re = re.compile(self.pattern, self.flags) self.reString = self.pattern except sre_constants.error,e: - warnings.warn("invalid pattern (%s) passed to Regex" % pattern, + warnings.warn("invalid pattern (%s) passed to Regex" % pattern, SyntaxWarning, stacklevel=2) raise @@ -1498,7 +1516,7 @@ #self.myException.msg = self.errmsg self.mayIndexError = False self.mayReturnEmpty = True - + def parseImpl( self, instring, loc, doActions=True ): result = self.re.match(instring,loc) if not result: @@ -1506,7 +1524,7 @@ exc.loc = loc exc.pstr = instring raise exc - + loc = result.end() d = result.groupdict() ret = ParseResults(result.group()) @@ -1514,16 +1532,16 @@ for k in d.keys(): ret[k] = d[k] return loc,ret - + def __str__( self ): try: return super(Regex,self).__str__() except: pass - + if self.strRepr is None: self.strRepr = "Re:(%s)" % repr(self.pattern) - + return self.strRepr @@ -1541,13 +1559,13 @@ - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar) """ super(QuotedString,self).__init__() - + # remove white space from quote chars - wont work anyway quoteChar = quoteChar.strip() if len(quoteChar) == 0: warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) raise SyntaxError() - + if endQuoteChar is None: endQuoteChar = quoteChar else: @@ -1555,7 +1573,7 @@ if len(endQuoteChar) == 0: warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) raise SyntaxError() - + self.quoteChar = quoteChar self.quoteCharLen = len(quoteChar) self.firstQuoteChar = quoteChar[0] @@ -1564,7 +1582,7 @@ self.escChar = escChar self.escQuote = escQuote self.unquoteResults = unquoteResults - + if multiline: self.flags = re.MULTILINE | re.DOTALL self.pattern = r'%s(?:[^%s%s]' % \ @@ -1580,7 +1598,7 @@ if len(self.endQuoteChar) > 1: self.pattern += ( '|(?:' + ')|(?:'.join(["%s[^%s]" % (re.escape(self.endQuoteChar[:i]), - _escapeRegexRangeChars(self.endQuoteChar[i])) + _escapeRegexRangeChars(self.endQuoteChar[i])) for i in range(len(self.endQuoteChar)-1,0,-1)]) + ')' ) if escQuote: @@ -1589,12 +1607,12 @@ self.pattern += (r'|(?:%s.)' % re.escape(escChar)) self.escCharReplacePattern = re.escape(self.escChar)+"(.)" self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) - + try: self.re = re.compile(self.pattern, self.flags) self.reString = self.pattern except sre_constants.error,e: - warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, + warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, SyntaxWarning, stacklevel=2) raise @@ -1603,7 +1621,7 @@ #self.myException.msg = self.errmsg self.mayIndexError = False self.mayReturnEmpty = True - + def parseImpl( self, instring, loc, doActions=True ): result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None if not result: @@ -1611,15 +1629,15 @@ exc.loc = loc exc.pstr = instring raise exc - + loc = result.end() ret = result.group() - + if self.unquoteResults: - + # strip off quotes ret = ret[self.quoteCharLen:-self.endQuoteCharLen] - + if isinstance(ret,basestring): # replace escaped characters if self.escChar: @@ -1630,22 +1648,22 @@ ret = ret.replace(self.escQuote, self.endQuoteChar) return loc, ret - + def __str__( self ): try: return super(QuotedString,self).__str__() except: pass - + if self.strRepr is None: self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar) - + return self.strRepr class CharsNotIn(Token): """Token for matching words composed of characters *not* in a given set. - Defined with string containing all disallowed characters, and an optional + Defined with string containing all disallowed characters, and an optional minimum, maximum, and/or exact length. The default value for min is 1 (a minimum value < 1 is not valid); the default values for max and exact are 0, meaning no maximum or exact length restriction. @@ -1654,7 +1672,7 @@ super(CharsNotIn,self).__init__() self.skipWhitespace = False self.notChars = notChars - + if min < 1: raise ValueError, "cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted" @@ -1668,7 +1686,7 @@ if exact > 0: self.maxLen = exact self.minLen = exact - + self.name = _ustr(self) self.errmsg = "Expected " + self.name self.mayReturnEmpty = ( self.minLen == 0 ) @@ -1682,7 +1700,7 @@ exc.loc = loc exc.pstr = instring raise exc - + start = loc loc += 1 notchars = self.notChars @@ -1711,7 +1729,7 @@ self.strRepr = "!W:(%s...)" % self.notChars[:4] else: self.strRepr = "!W:(%s)" % self.notChars - + return self.strRepr class White(Token): @@ -1747,7 +1765,7 @@ if exact > 0: self.maxLen = exact self.minLen = exact - + def parseImpl( self, instring, loc, doActions=True ): if not(instring[ loc ] in self.matchWhite): #~ raise ParseException( instring, loc, self.errmsg ) @@ -1834,7 +1852,7 @@ self.setWhitespaceChars( " \t" ) self.errmsg = "Expected end of line" #self.myException.msg = self.errmsg - + def parseImpl( self, instring, loc, doActions=True ): if loc<len(instring): if instring[loc] == "\n": @@ -1859,7 +1877,7 @@ super(StringStart,self).__init__() self.errmsg = "Expected start of text" #self.myException.msg = self.errmsg - + def parseImpl( self, instring, loc, doActions=True ): if loc != 0: # see if entire string up to here is just whitespace and ignoreables @@ -1877,7 +1895,7 @@ super(StringEnd,self).__init__() self.errmsg = "Expected end of text" #self.myException.msg = self.errmsg - + def parseImpl( self, instring, loc, doActions=True ): if loc < len(instring): #~ raise ParseException( instring, loc, "Expected end of text" ) @@ -1942,7 +1960,7 @@ return super(ParseExpression,self).__str__() except: pass - + if self.strRepr is None: self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) ) return self.strRepr @@ -1982,7 +2000,7 @@ def setResultsName( self, name, listAllMatches=False ): ret = super(ParseExpression,self).setResultsName(name,listAllMatches) return ret - + def validate( self, validateTrace=[] ): tmp = validateTrace[:]+[self] for e in self.exprs: @@ -2019,24 +2037,24 @@ if isinstance( other, basestring ): other = Literal( other ) return self.append( other ) #And( [ self, other ] ) - + def checkRecursion( self, parseElementList ): subRecCheckList = parseElementList[:] + [ self ] for e in self.exprs: e.checkRecursion( subRecCheckList ) if not e.mayReturnEmpty: break - + def __str__( self ): if hasattr(self,"name"): return self.name - + if self.strRepr is None: self.strRepr = "{" + " ".join( [ _ustr(e) for e in self.exprs ] ) + "}" - + return self.strRepr - + class Or(ParseExpression): """Requires that at least one ParseExpression is found. If two expressions match, the expression that matches the longest string will be used. @@ -2049,7 +2067,7 @@ if e.mayReturnEmpty: self.mayReturnEmpty = True break - + def parseImpl( self, instring, loc, doActions=True ): maxExcLoc = -1 maxMatchLoc = -1 @@ -2068,7 +2086,7 @@ if loc2 > maxMatchLoc: maxMatchLoc = loc2 maxMatchExp = e - + if maxMatchLoc < 0: if self.exprs: raise maxException @@ -2085,12 +2103,12 @@ def __str__( self ): if hasattr(self,"name"): return self.name - + if self.strRepr is None: self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}" - + return self.strRepr - + def checkRecursion( self, parseElementList ): subRecCheckList = parseElementList[:] + [ self ] for e in self.exprs: @@ -2112,7 +2130,7 @@ break else: self.mayReturnEmpty = True - + def parseImpl( self, instring, loc, doActions=True ): maxExcLoc = -1 for e in self.exprs: @@ -2143,12 +2161,12 @@ def __str__( self ): if hasattr(self,"name"): return self.name - + if self.strRepr is None: self.strRepr = "{" + " | ".join( [ _ustr(e) for e in self.exprs ] ) + "}" - + return self.strRepr - + def checkRecursion( self, parseElementList ): subRecCheckList = parseElementList[:] + [ self ] for e in self.exprs: @@ -2197,7 +2215,7 @@ tmpOpt.remove(e) if len(failed) == len(tmpExprs): keepMatching = False - + if tmpReqd: missing = ", ".join( [ _ustr(e) for e in tmpReqd ] ) raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing ) @@ -2206,7 +2224,7 @@ for e in matchOrder: loc,results = e._parse(instring,loc,doActions) resultlist.append(results) - + finalResults = ParseResults([]) for r in resultlist: dups = {} @@ -2223,12 +2241,12 @@ def __str__( self ): if hasattr(self,"name"): return self.name - + if self.strRepr is None: self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}" - + return self.strRepr - + def checkRecursion( self, parseElementList ): subRecCheckList = parseElementList[:] + [ self ] for e in self.exprs: @@ -2256,7 +2274,7 @@ return self.expr._parse( instring, loc, doActions, callPreParse=False ) else: raise ParseException("",loc,self.errmsg,self) - + def leaveWhitespace( self ): self.skipWhitespace = False self.expr = self.expr.copy() @@ -2288,19 +2306,19 @@ subRecCheckList = parseElementList[:] + [ self ] if self.expr is not None: self.expr.checkRecursion( subRecCheckList ) - + def validate( self, validateTrace=[] ): tmp = validateTrace[:]+[self] if self.expr is not None: self.expr.validate(tmp) self.checkRecursion( [] ) - + def __str__( self ): try: return super(ParseElementEnhance,self).__str__() except: pass - + if self.strRepr is None and self.expr is not None: self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) ) return self.strRepr @@ -2308,13 +2326,13 @@ class FollowedBy(ParseElementEnhance): """Lookahead matching of the given parse expression. FollowedBy - does *not* advance the parsing position within the input string, it only - verifies that the specified parse expression matches at the current + does *not* advance the parsing position within the input string, it only + verifies that the specified parse expression matches at the current position. FollowedBy always returns a null token list.""" def __init__( self, expr ): super(FollowedBy,self).__init__(expr) self.mayReturnEmpty = True - + def parseImpl( self, instring, loc, doActions=True ): self.expr.tryParse( instring, loc ) return loc, [] @@ -2322,9 +2340,9 @@ class NotAny(ParseElementEnhance): """Lookahead to disallow matching with the given parse expression. NotAny - does *not* advance the parsing position within the input string, it only - verifies that the specified parse expression does *not* match at the current - position. Also, NotAny does *not* skip over leading whitespace. NotAny + does *not* advance the parsing position within the input string, it only + verifies that the specified parse expression does *not* match at the current + position. Also, NotAny does *not* skip over leading whitespace. NotAny always returns a null token list. May be constructed using the '~' operator.""" def __init__( self, expr ): super(NotAny,self).__init__(expr) @@ -2333,7 +2351,7 @@ self.mayReturnEmpty = True self.errmsg = "Found unwanted token, "+_ustr(self.expr) #self.myException = ParseException("",0,self.errmsg,self) - + def parseImpl( self, instring, loc, doActions=True ): try: self.expr.tryParse( instring, loc ) @@ -2350,10 +2368,10 @@ def __str__( self ): if hasattr(self,"name"): return self.name - + if self.strRepr is None: self.strRepr = "~{" + _ustr(self.expr) + "}" - + return self.strRepr @@ -2362,7 +2380,7 @@ def __init__( self, expr ): super(ZeroOrMore,self).__init__(expr) self.mayReturnEmpty = True - + def parseImpl( self, instring, loc, doActions=True ): tokens = [] try: @@ -2384,18 +2402,18 @@ def __str__( self ): if hasattr(self,"name"): return self.name - + if self.strRepr is None: self.strRepr = "[" + _ustr(self.expr) + "]..." - + return self.strRepr - + def setResultsName( self, name, listAllMatches=False ): ret = super(ZeroOrMore,self).setResultsName(name,listAllMatches) ret.saveAsList = True return ret - + class OneOrMore(ParseElementEnhance): """Repetition of one or more of the given expression.""" def parseImpl( self, instring, loc, doActions=True ): @@ -2419,12 +2437,12 @@ def __str__( self ): if hasattr(self,"name"): return self.name - + if self.strRepr is None: self.strRepr = "{" + _ustr(self.expr) + "}..." - + return self.strRepr - + def setResultsName( self, name, listAllMatches=False ): ret = super(OneOrMore,self).setResultsName(name,listAllMatches) ret.saveAsList = True @@ -2460,17 +2478,17 @@ def __str__( self ): if hasattr(self,"name"): return self.name - + if self.strRepr is None: self.strRepr = "[" + _ustr(self.expr) + "]" - + return self.strRepr class SkipTo(ParseElementEnhance): """Token for skipping over all undefined text until the matched expression is found. If include is set to true, the matched expression is also consumed. The ignore - argument is used to define grammars (typically quoted strings and comments) that + argument is used to define grammars (typically quoted strings and comments) that might contain false matches. """ def __init__( self, other, include=False, ignore=None ): @@ -2515,7 +2533,7 @@ """Forward declaration of an expression to be defined later - used for recursive grammars, such as algebraic infix notation. When the expression is known, it is assigned to the Forward variable using the '<<' operator. - + Note: take care when assigning to Forward not to overlook precedence of operators. Specifically, '|' has a lower precedence than '<<', so that:: fwdExpr << a | b | c @@ -2538,7 +2556,7 @@ self.mayReturnEmpty = self.expr.mayReturnEmpty self.setWhitespaceChars( self.expr.whiteChars ) self.skipWhitespace = self.expr.skipWhitespace - self.saveAsList = self.expr.saveAsList + self.saveAsList = self.expr.saveAsList return self def leaveWhitespace( self ): @@ -2548,31 +2566,31 @@ def streamline( self ): if not self.streamlined: self.streamlined = True - if self.expr is not None: + if self.expr is not None: self.expr.streamline() return self def validate( self, validateTrace=[] ): if self not in validateTrace: tmp = validateTrace[:]+[self] - if self.expr is not None: + if self.expr is not None: self.expr.validate(tmp) - self.checkRecursion([]) - + self.checkRecursion([]) + def __str__( self ): if hasattr(self,"name"): return self.name self.__class__ = _ForwardNoRecurse try: - if self.expr is not None: + if self.expr is not None: retString = _ustr(self.expr) else: retString = "None" finally: self.__class__ = Forward return "Forward: "+retString - + def copy(self): if self.expr is not None: return super(Forward,self).copy() @@ -2584,7 +2602,7 @@ class _ForwardNoRecurse(Forward): def __str__( self ): return "..." - + class TokenConverter(ParseElementEnhance): """Abstract subclass of ParseExpression, for converting parsed results.""" def __init__( self, expr, savelist=False ): @@ -2595,9 +2613,9 @@ """Converter to upper case all matching tokens.""" def __init__(self, *args): super(Upcase,self).__init__(*args) - warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead", + warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead", DeprecationWarning,stacklevel=2) - + def postParse( self, instring, loc, tokenlist ): return map( string.upper, tokenlist ) @@ -2641,7 +2659,7 @@ def postParse( self, instring, loc, tokenlist ): return [ tokenlist ] - + class Dict(TokenConverter): """Converter to return a repetitive expression as a list, but also as a dictionary. Each element can also be referenced using the first token in the expression as its key. @@ -2653,7 +2671,7 @@ def postParse( self, instring, loc, tokenlist ): for i,tok in enumerate(tokenlist): - if len(tok) == 0: + if len(tok) == 0: continue ikey = tok[0] if isinstance(ikey,int): @@ -2680,7 +2698,7 @@ """Converter for ignoring the results of a parsed expression.""" def postParse( self, instring, loc, tokenlist ): return [] - + def suppress( self ): return self @@ -2720,13 +2738,13 @@ except AttributeError: pass return z - + # # global helpers # def delimitedList( expr, delim=",", combine=False ): """Helper to define a delimited list of expressions - the delimiter defaults to ','. - By default, the list elements and delimiters can have intervening whitespace, and + By default, the list elements and delimiters can have intervening whitespace, and comments, but this can be overridden by passing 'combine=True' in the constructor. If combine is set to True, the matching tokens are returned as a single token string, with the delimiters included; otherwise, the matching tokens are returned @@ -2764,8 +2782,8 @@ first = Word(nums) second = matchPreviousLiteral(first) matchExpr = first + ":" + second - will match "1:1", but not "1:2". Because this matches a - previous literal, will also match the leading "1:1" in "1:10". + will match "1:1", but not "1:2". Because this matches a + previous literal, will also match the leading "1:1" in "1:10". If this is not desired, use matchPreviousExpr. Do *not* use with packrat parsing enabled. """ @@ -2782,7 +2800,7 @@ rep << Empty() expr.addParseAction(copyTokenToRepeater, callDuringTry=True) return rep - + def matchPreviousExpr(expr): """Helper to define an expression that is indirectly defined from the tokens matched in a previous expression, that is, it looks @@ -2808,7 +2826,7 @@ rep.setParseAction( mustMatchTheseTokens, callDuringTry=True ) expr.addParseAction(copyTokenToRepeater, callDuringTry=True) return rep - + def _escapeRegexRangeChars(s): #~ escape these chars: ^-] for c in r"\^-]": @@ -2816,12 +2834,12 @@ s = s.replace("\n",r"\n") s = s.replace("\t",r"\t") return _ustr(s) - + def oneOf( strs, caseless=False, useRegex=True ): - """Helper to quickly define a set of alternative Literals, and makes sure to do - longest-first testing when there is a conflict, regardless of the input order, - but returns a MatchFirst for best performance. - + """Helper to quickly define a set of alternative Literals, and makes sure to do + longest-first testing when there is a conflict, regardless of the input order, + but returns a MatchFirst for best performance. + Parameters: - strs - a string of space-delimited literals, or a list of string literals - caseless - (default=False) - treat all literals as caseless @@ -2837,7 +2855,7 @@ isequal = ( lambda a,b: a == b ) masks = ( lambda a,b: b.startswith(a) ) parseElementClass = Literal - + if isinstance(strs,(list,tuple)): symbols = strs[:] elif isinstance(strs,basestring): @@ -2860,7 +2878,7 @@ else: break i += 1 - + if not caseless and useRegex: #~ print strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] ) try: @@ -2881,7 +2899,7 @@ for the key and value. Takes care of defining the Dict, ZeroOrMore, and Group tokens in the proper order. The key pattern can include delimiting markers or punctuation, as long as they are suppressed, thereby leaving the significant key text. The value - pattern can include named results, so that the Dict results can include named token + pattern can include named results, so that the Dict results can include named token fields. """ return Dict( ZeroOrMore( Group ( key + value ) ) ) @@ -2905,14 +2923,14 @@ _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]" _expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p) - + def srange(s): r"""Helper to easily define string ranges for use in Word construction. Borrows syntax from regexp '[]' string range definitions:: srange("[0-9]") -> "0123456789" srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" - The input string must be enclosed in []'s, and the returned string is the expanded + The input string must be enclosed in []'s, and the returned string is the expanded character set joined into a single string. The values enclosed in the []'s may be:: a single character @@ -2928,7 +2946,7 @@ return "" def replaceWith(replStr): - """Helper method for common parse actions that simply return a literal value. Especially + """Helper method for common parse actions that simply return a literal value. Especially useful when used with transformString(). """ def _replFunc(*args): @@ -2962,7 +2980,7 @@ return t def getTokensEndLoc(): - """Method to be called from within a parse action to determine the end + """Method to be called from within a parse action to determine the end location of the parsed tokens.""" import inspect fstack = inspect.stack() @@ -2984,7 +3002,7 @@ tagStr = Keyword(tagStr, caseless=not xml) else: resname = tagStr.name - + tagAttrName = Word(alphas,alphanums+"_-:") if (xml): tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes ) @@ -2999,10 +3017,10 @@ Optional( Suppress("=") + tagAttrValue ) ))) + \ Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") closeTag = Combine("</" + tagStr + ">") - + openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % tagStr) closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % tagStr) - + return openTag, closeTag def makeHTMLTags(tagStr): @@ -3014,12 +3032,12 @@ return _makeTags( tagStr, True ) def withAttribute(*args,**attrDict): - """Helper to create a validating parse action to be used with start tags created - with makeXMLTags or makeHTMLTags. Use withAttribute to qualify a starting tag - with a required attribute value, to avoid false matches on common tags such as + """Helper to create a validating parse action to be used with start tags created + with makeXMLTags or makeHTMLTags. Use withAttribute to qualify a starting tag + with a required attribute value, to avoid false matches on common tags such as <TD> or <DIV>. - Call withAttribute with a series of attribute names and values. Specify the list + Call withAttribute with a series of attribute names and values. Specify the list of filter attributes names and values as: - keyword arguments, as in (class="Customer",align="right"), or - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) @@ -3036,7 +3054,7 @@ if attrName not in tokens: raise ParseException(s,l,"no matching attribute " + attrName) if tokens[attrName] != attrValue: - raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" % + raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" % (attrName, tokens[attrName], attrValue)) return pa @@ -3053,16 +3071,16 @@ return tokens[0] return map(_flattenOpPrecTokens,tokens) return tokens - + def operatorPrecedence( baseExpr, opList ): - """Helper method for constructing grammars of expressions made up of + """Helper method for constructing grammars of expressions made up of operators working in a precedence hierarchy. Operators may be unary or binary, left- or right-associative. Parse actions can also be attached to operator expressions. - + Parameters: - - baseExpr - expression representing the most basic element for the nested - - opList - list of tuples, one for each operator precedence level in the + - baseExpr - expression representing the most basic element for the nested + - opList - list of tuples, one for each operator precedence level in the expression grammar; each tuple is of the form (opExpr, numTerms, rightLeftAssoc, parseAction), where: - opExpr is the pyparsing expression for the operator; @@ -3072,7 +3090,7 @@ - rightLeftAssoc is the indicator whether the operator is right or left associative, using the pyparsing-defined constants opAssoc.RIGHT and opAssoc.LEFT. - - parseAction is the parse action to be associated with + - parseAction is the parse action to be associated with expressions matching this operator expression (the parse action tuple member may be omitted) """ @@ -3093,7 +3111,7 @@ # try to avoid LR... [truncated message content] |