Pander - 2013-12-28

The Python script below generates a Graphviz file for create an overview of all the rules and groups. It is also possible to generate SVG which can be embedded in XHTML. Contact me for a trick to rotate the labels in the SVG so they do not overlap.

The code to run it all:

1
2
3
4
#!/bin/sh
./generate-graphs.py
#dot -Tpdf phrases.gv -ophrases.pdf
dot -Tpdf rules.gv -orules.pdf

The script itself:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#!/usr/bin/python
# -*- coding: utf-8 -*-
import os
import libxml2
if __name__ == '__main__':
  grammar = libxml2.parseFile('grammar.xml')
  phrases = grammar.xpathEval('/rules/phrases/phrase')
  if phrases:
    graph = open('phrases.gv', 'w')
    title = 'Frases in Grammaticacontrole Nederlandse Taal - OpenTaal 2011-08-14'
    graph.write('digraph "' + title + '" {\n')
    graph.write('labelloc=t\n')
    graph.write('layout=twopi\n')
    graph.write('fontname="FreeSerif"\n')
    graph.write('fontsize="128"\n')
    graph.write('label="' + title + '"\n')
    graph.write('edge [arrowhead="none" ]\n')
    graph.write('node [shape="box" ]\n')
    graph.write('ranksep="4:4:8"\n')
#    graph.write('"Phrases" [label="Frases" fontsize="64" shape="none" ]\n')
    for p in phrases:
      p_id= p.prop('id')
      p_name = ''
      tokens = p.xpathEval('token')
      for t in tokens:
        if t.content:
          p_name = t.content.strip()
          break
      if p_name == '':
        graph.write('"' + p_id + '" [label="' + p_id + '" fontname="FreeSerif Italic" fontsize="64" ]\n')
      else:
        graph.write('"' + p_id + '" [label="' + p_id + ': ' + p_name + '" fontsize="64" ]\n')
#      graph.write('"Phrases" -> "' + p_id + '" [penwidth="none" ]\n')
      for t in tokens:
        if t.properties:
          t_name = t.prop('postag')
          if t_name:
            graph.write('"' + p_id + '-' + t_name + '" [label="' + t_name + '" fontsize="32" ]\n')
            graph.write('"' + p_id + '" -> "' + p_id + '-' + t_name + '"\n')
      includephrases = p.xpathEval('includephrases')
      for ip in includephrases:
        phraserefs = ip.xpathEval('phraseref')
        for pr in phraserefs:
          pr_idref =  pr.prop('idref')
          graph.write('"' + p_id + '" -> "' + pr_idref + '" [style="dashed" ]\n')
    graph.write('}\n')
  categories = grammar.xpathEval('/rules/category')
  if categories:
    graph = open('rules.gv', 'w')
    title = 'Regels in Grammaticacontrole Nederlandse Taal - OpenTaal 2011-08-13'
    graph.write('digraph "' + title + '" {\n')
    graph.write('labelloc=t\n')
    graph.write('layout=twopi\n')
    graph.write('fontname="FreeSerif"\n')
    graph.write('fontsize="128"\n')
    graph.write('label="' + title + '"\n')
    graph.write('edge [arrowhead="none" ]\n')
    graph.write('node [shape="box" ]\n')
    graph.write('ranksep="10:10:20"\n')
    graph.write('"Categories" [label="" shape="none" ]\n')
    for c in categories:
      c_name = c.prop('name')
#      print c_name
      graph.write('"' + c_name + '" [fontsize="64" style="rounded" ]\n')
      graph.write('"Categories" -> "' + c_name + '" [penwidth="0" ]\n')
      rules = c.xpathEval('rule')
      for r in rules:
        r_id = r .prop('id')
        r_name = r.prop('name')
        graph.write('"' + c_name + '" -> "' + r_id + '"\n')
        graph.write('"' + r_id + '" [fontsize="16" label="' + r_id + ': '+ r_name + '" ]\n')
      rulegroups = c.xpathEval('rulegroup')
      for rg in rulegroups:
        rg_id = rg .prop('id')
        rg_name = rg.prop('name')
        graph.write('"' + c_name + '" -> "' + rg_id + '"\n')
        graph.write('"' + rg_id + '" [fontsize="32" label="' + rg_id + ': '+ rg_name + '" style="rounded" ]\n')
        rules = rg.xpathEval('rule')
        for r in rules:
          r_id = r .prop('id')
          r_name = r.prop('name')
          graph.write('"' + rg_id + '" -> "' + r_id + '"\n')
          graph.write('"' + r_id + '" [fontsize="16" label="' + r_id + ': '+ r_name + '" ]\n')
    graph.write('}\n')