Re: [Pyparsing] more refinement but still lost
Brought to you by:
ptmcg
From: Paul M. <pt...@au...> - 2012-09-18 10:46:11
|
all_tests = { "test_1": "some plain text", "test_2": "[simple ]", "test_3": "[simple_text some plain text]", "test_4": "[onearg [one ]]", "test_5": "[twoarg [one ] [two ]]", "test_6": "[onearg_text [one some plain text]]", "test_7": "[twoarg_text [one ] [two some plain text arg]]", "test_8": "[nested_text some [not plain] text]", "test_9": "[nested_text [one text] some [not [very ] plain] text]", "test_10": "[nested_text_escaped [one text] some [not [very ] plain] bracketed \[text\]]", "test_11": """[nested_text_escaped_indented [one text] some [not [very ] plain ] bracked \[text\] ]""", } # a simple BNF: # # listExpr ::= '[' listContent ']' # listContent ::= (contentsWord | escapedChar | listExpr)* # contentsWord ::= printableCharacter+ # # # Some notes: # 1. listContent could be empty, "[]" is a valid listExpr # 2. contentsWord cannot contain '\', '[' or ']' characters, or # else we couldn't distinguish delimiters from contents, or # detect escapes # from pyparsing import * # start with the basics LBRACK,RBRACK = map(Suppress,"[]") escapedChar = Combine('\\' + oneOf(list(printables))) contentsWord = Word(printables,excludeChars=r"\[]") # define a placeholder for a nested list, since we need to # reference it before it is fully defined listExpr = Forward() # the contents of a list is one or more contents words or lists listContent = ZeroOrMore(contentsWord | escapedChar | listExpr) # a list is a listContent enclosed in []'s - enclose # in a Group so that pyparsing will maintain the nested structure # # since listExpr was already defined as a Forward, we use '<<' to # "inject" the definition into the already defined Forward listExpr << Group(LBRACK + listContent + RBRACK) # parse the test string - note that the results no longer contain # the parsed '[' and ']' characters, but they do retain the # nesting of the original string in nested lists for name,testStr in all_tests.items(): print name, listContent.parseString(testStr).asList() # pyparsing includes a short-cut to simplify defining nested # structures like this print nestedExpr('[',']').parseString(all_tests['test_9']).asList() |