[Pyparsing] grammar with bad performance (with examples :))
Brought to you by:
ptmcg
From: david <asd...@gn...> - 2007-12-10 14:50:18
|
Hi all, I use pyparsing to parse a simple, self-made, query language. Here is the grammar: ----8<------------------------------------------------------------------- import pyparsing as psg EQUAL = psg.Keyword('=') NOT_EQUAL = psg.Keyword('!=') CONTAINS = psg.Keyword('~') NOT_CONTAINS = psg.Keyword('!~') STARTWITH = psg.Keyword('~>') OPERATOR = psg.Or([EQUAL, NOT_EQUAL, CONTAINS, NOT_CONTAINS, STARTWITH]) FIELD = psg.Word(psg.alphanums + '/').setParseAction(lambda s, loc, toks: Field(toks[0])) VALUE = psg.quotedString.setParseAction(psg.removeQuotes) ATOM = FIELD + OPERATOR + VALUE LOGIC_OP = psg.Or([ psg.Keyword('and').setParseAction(lambda *args: And()), psg.Keyword('or').setParseAction(lambda *args: Or()) ]) BIN_EXPR = psg.Forward() UNABIN_EXPR = psg.Or([ATOM, BIN_EXPR]) BIN_EXPR << (LOGIC_OP + '(' + UNABIN_EXPR + ',' + UNABIN_EXPR + ')') GRAMMAR = psg.Or([BIN_EXPR, ATOM]) ---->8------------------------------------------------------------------- This grammar works like a charme for small input string, but hang with CPU 100% for minutes with a large input string. For large input string I mean something like (sorry this will break your newsreader): ----8<------------------------------------------------------------------- data = """or(/BI/BIB/BIBH = '00000509', or(/BI/BIB/BIBH = '00000769', or(/ BI/BIB/BIBH = '00001673', or(/BI/BIB/BIBH = '00000058', or(/BI/BIB/BIBH = '00000764', or(/BI/BIB/BIBH = '00001238', or(/BI/BIB/BIBH = '00001592', or (/BI/BIB/BIBH = '00001017', or(/BI/BIB/BIBH = '00002676', or(/BI/BIB/BIBH = '00001554', or(/BI/BIB/BIBH = '00002193', or(/BI/BIB/BIBH = '00001907', or(/BI/BIB/BIBH = '00000366', or(/BI/BIB/BIBH = '00001161', or(/BI/BIB/ BIBH = '00002991', or(/BI/BIB/BIBH = '00002820', or(/BI/BIB/BIBH = '00000610', or(/BI/BIB/BIBH = '00000521', or(/BI/BIB/BIBH = '00003143', or (/BI/BIB/BIBH = '00002869', or(/BI/BIB/BIBH = '00000410', or(/BI/BIB/BIBH = '00001926', or(/BI/BIB/BIBH = '00000061', or(/BI/BIB/BIBH = '00000165', or(/BI/BIB/BIBH = '00000669', or(/BI/BIB/BIBH = '00002675', or(/BI/BIB/ BIBH = '00000770', or(/BI/BIB/BIBH = '00000981', or(/BI/BIB/BIBH = '00001841', or(/BI/BIB/BIBH = '00002668', or(/BI/BIB/BIBH = '00001949', or (/BI/BIB/BIBH = '00002819', or(/BI/BIB/BIBH = '00000623', or(/BI/BIB/BIBH = '00002365', or(/BI/BIB/BIBH = '00000865', or(/BI/BIB/BIBH = '00000176', or(/BI/BIB/BIBH = '00002036', or(/BI/BIB/BIBH = '00002640', or(/BI/BIB/ BIBH = '00001297', or(/BI/BIB/BIBH = '00000811', or(/BI/BIB/BIBH = '00001594', or(/BI/BIB/BIBH = '00001160', or(/BI/BIB/BIBH = '00000709', or (/BI/BIB/BIBH = '00001908', or(/BI/BIB/BIBH = '00002836', or(/BI/BIB/BIBH = '00001283', or(/BI/BIB/BIBH = '00002946', or(/BI/BIB/BIBH = '00001877', or(/BI/BIB/BIBH = '00002378', or(/BI/BIB/BIBH = '00000474', or(/BI/BIB/ BIBH = '00002710', or(/BI/BIB/BIBH = '00001884', or(/BI/BIB/BIBH = '00000968', or(/BI/BIB/BIBH = '00000926', or(/BI/BIB/BIBH = '00001902', or (/BI/BIB/BIBH = '00001018', or(/BI/BIB/BIBH = '00002989', or(/BI/BIB/BIBH = '00001590', or(/BI/BIB/BIBH = '00001300', or(/BI/BIB/BIBH = '00002754', or(/BI/BIB/BIBH = '00001923', or(/BI/BIB/BIBH = '00002771', or(/BI/BIB/ BIBH = '00000768', or(/BI/BIB/BIBH = '00002034', or(/BI/BIB/BIBH = '00001851', or(/BI/BIB/BIBH = '00001303', or(/BI/BIB/BIBH = '00002642', or (/BI/BIB/BIBH = '00002949', or(/BI/BIB/BIBH = '00000821', or(/BI/BIB/BIBH = '00000989', or(/BI/BIB/BIBH = '00001377', or(/BI/BIB/BIBH = '00001514', or(/BI/BIB/BIBH = '00000510', or(/BI/BIB/BIBH = '00000054', or(/BI/BIB/ BIBH = '00001280', or(/BI/BIB/BIBH = '00002370', or(/BI/BIB/BIBH = '00001924', or(/BI/BIB/BIBH = '00000063', or(/BI/BIB/BIBH = '00001632', or (/BI/BIB/BIBH = '00000434', or(/BI/BIB/BIBH = '00003035', or(/BI/BIB/BIBH = '00001657', or(/BI/BIB/BIBH = '00002053', or(/BI/BIB/BIBH = '00000966', or(/BI/BIB/BIBH = '00002699', or(/BI/BIB/BIBH = '00003178', or(/BI/BIB/ BIBH = '00002602', or(/BI/BIB/BIBH = '00002194', or(/BI/BIB/BIBH = '00001591', or(/BI/BIB/BIBH = '00001910', or(/BI/BIB/BIBH = '00000174', or (/BI/BIB/BIBH = '00002132', /BI/BIB/BIBH = '00000665'))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))""" import time start = time.time() tokens = GRAMMAR.parseString(data) print time.time() - start ---->8------------------------------------------------------------------- I tried with last pyparsing version (1.4.8) with no luck, have you any ideas? The grammar is self-made, I can change it if you think the bad performance is fault of it. |