Re: [Pyparsing] fixed-length field preceded by length?
Brought to you by:
ptmcg
From: John W. S. <jo...@nm...> - 2012-01-01 19:31:34
|
#!/usr/bin/env python3 #================================================================ # countedtext: Using pyparsing for a string preceded by a count #---------------------------------------------------------------- # Author: John W. Shipman (jo...@nm...) # New Mexico Tech Computer Center # Socorro, NM 87801 # # Problem: Sanjay Ghemawat's venerable 'ical' calendar utility # (http://en.wikipedia.org/wiki/Ical_%28Unix%29) saves events # in a .calendar file, in which the description of an event # is saved in a line like this: # # Text [6 [Easter]] # # The problem is to write a pyparsing pattern that parses the # count and the bracketed string. The shortcut method is to # use QuotedString(quoteChar='[', endQuoteChar=']'), but this # fails if the literal string contains a ']' character. # # Paul McGuire responded immediately to my post on the pyparsing # mailing list, suggesting that I study the implementation of the # countedArray() helper. Based on this advice, I offer this # implementation of a countedText() pattern that matches an # integer followed by a literal string in brackets, complete # with a test driver. #---------------------------------------------------------------- import sys import pyparsing as pp def countedText(): '''Defines a pattern of the form: int "[" char* "]" where int is an integer that specifies the length of the following bracketed string literal. Example: "6 [Easter]" ''' stringExpr = pp.Forward() def countedParseAction(s, l, t): '''Parse action that sets up the count in stringExpr. ''' n = int(t[0]) #-- # CharsNotIn does not like exact=0. We use Combine so that # whichever pattern represents the contents, the result is # a single string. #-- if n > 0: contents = pp.CharsNotIn('', exact=n) else: contents = pp.Empty() stringExpr << pp.Combine(pp.Literal("[").suppress() + contents + pp.Literal("]").suppress()) return [] #-- # The first parse action converts the count to an int. #-- intExpr = pp.Word(pp.nums).setParseAction(lambda t: int(t[0])) #-- # The second parse action uses the count to define the # stringExpr pattern using the actual value of the count. #-- intExpr.addParseAction(countedParseAction) return (intExpr + stringExpr) linePat = countedText() # - - - - - m a i n testLines = [ # Test output "0 []", # [''] "11 [abcdefghijk]", # ['abcdefghijk'] "6 [Easter]", # ['Easter'] "4 [[[]]]", # ['[[]]'] "6 []]]]]]]", # [']]]]]]'] "6 [ abcdef]" # Fails (leading whitespace not skipped) ] def main(): """Main """ for line in testLines: test(line) def test(line): '''Test one line ''' print("\n", line, sep='') try: result = linePat.parseString(line, parseAll=True) print(result) except pp.ParseException as x: print("{}^".format(" "*(x.column-1))) print("No") # - - - - - E p i l o g u e if __name__ == "__main__": main() |