Here's a (very standard) agenda-based implementation of the generic chart parsing algorithm. Parsing becomes about 4 times faster (tested on the ATIS grammar). Tracing works as before.
There's a new optional argument to ChartParser.__init__, which defaults to using the agenda-based algorithm.
Furthermore, there's a new method ChartParser.chart_parse, which returns the final Chart. nbest_parse is reimplemented to call chart_parse.
SteppingChartParser is not changed.
Attached is the modified file, parse/chart.py. Below is a diff:
$ diff -w -u chart*.py
--- chart-old.py 2008-06-19 23:15:53.000000000 +0200
+++ chart-new.py 2008-06-21 21:41:42.000000000 +0200
@@ -1278,7 +1278,7 @@
- Apply I{rule} to any applicable edges in the chart.
- Return any complete parses in the chart
"""
- def __init__(self, grammar, strategy, trace=0):
+ def __init__(self, grammar, strategy, trace=0, use_agenda=True):
"""
Create a new chart parser, that uses C{grammar} to parse
texts.
@@ -1293,38 +1293,83 @@
parsing a text. C{0} will generate no tracing output;
and higher numbers will produce more verbose tracing
output.
+ @type use_agenda: C{bool}
+ @param use_agenda: Use an optimized agenda-based algorithm,
+ if possible.
"""
self._grammar = grammar
self._strategy = strategy
self._trace = trace
+ # If the strategy only consists of axioms (NUM_EDGES==0) and
+ # inference rules (NUM_EDGES==1), we can use an agenda-based algorithm:
+ self._use_agenda = use_agenda
+ self._axioms = []
+ self._inference_rules = []
+ for rule in strategy:
+ if rule.NUM_EDGES == 0:
+ self._axioms.append(rule)
+ elif rule.NUM_EDGES == 1:
+ self._inference_rules.append(rule)
+ else:
+ self._use_agenda = False
def grammar(self):
return self._grammar
- def nbest_parse(self, tokens, n=None, tree_class=Tree):
- tokens = list(tokens)
+ def chart_parse(self, sent):
+ """
+ @return: The final parse L{Chart},
+ from which all possible parse trees can be extracted.
+
+ @param sent: The sentence to be parsed
+ @type sent: L{list} of L{string}
+ @rtype: L{Chart}
+ """
+ tokens = list(sent)
self._grammar.check_coverage(tokens)
chart = Chart(list(tokens))
+ agenda = []
grammar = self._grammar
# Width, for printing trace edges.
w = 50/(chart.num_leaves()+1)
if self._trace > 0: print chart.pp_leaves(w)
- edges_added = 1
- while edges_added > 0:
- edges_added = 0
- for rule in self._strategy:
- edges_added_by_rule = 0
- for e in rule.apply_everywhere(chart, grammar):
- if self._trace > 0 and edges_added_by_rule == 0:
+ def _trace_new_edges(rule, new_edges):
+ if self._trace > 0 and new_edges != []:
print '%s:' % rule
- edges_added_by_rule += 1
- if self._trace > 1: print chart.pp_edge(e,w)
- if self._trace == 1 and edges_added_by_rule > 0:
- print ' - Added %d edges' % edges_added_by_rule
- edges_added += edges_added_by_rule
+ if self._trace > 1:
+ for e in new_edges: print chart.pp_edge(e,w)
+ else:
+ print ' - Added %d edges' % len(new_edges)
+
+ if self._use_agenda:
+ for axiom in self._axioms:
+ new_edges = axiom.apply(chart, grammar)
+ agenda += new_edges
+ _trace_new_edges(axiom, new_edges)
+ while agenda != []:
+ edge = agenda.pop()
+ for rule in self._inference_rules:
+ new_edges = rule.apply(chart, grammar, edge)
+ agenda += new_edges
+ _trace_new_edges(rule, new_edges)
+ else:
+ edges_added = True
+ while edges_added:
+ edges_added = False
+ for rule in self._strategy:
+ new_edges = rule.apply_everywhere(chart, grammar)
+ if new_edges != []: edges_added = True
+ _trace_new_edges(rule, new_edges)
+
+ # Return the final chart.
+ return chart
+
+ def nbest_parse(self, tokens, n=None, tree_class=Tree):
+ chart = self.chart_parse(tokens)
+ grammar = self._grammar
# Return a list of complete parses.
return chart.parses(grammar.start(), tree_class=tree_class)[:n]
peter ljunglöf
2008-06-21
New version of parse/chart.py
Steven Bird
2008-12-09