In [16]:
import os
import pygraphviz
import networkx as nx
from discoursegraphs.readwrite import TigerDocumentGraph
from discoursegraphs.util import natural_sort_key
# DOC_ID = 'maz-19295'
DOC_ID = 'maz-00002'
TIGER_DIR = '~/repos/pcc-annis-merged/maz176/syntax/'
In [17]:
def add_precedence_relations(dot_graph, token_ids, constraint='false', style='invis', weight=1):
"""
builds an invisible precedence chain, i.e. adds an
invisible edge from the first token node to the second, from the
second to the third etc.
"""
for i, token_id in enumerate(token_ids[:-1]):
dot_graph.add_edge(token_id, token_ids[i+1], style=style,
weight=weight, constraint=constraint)
def add_token_subgraph(dot_graph, token_ids, cluster=True, rankdir='TB'):
"""
adds a cluster subgraph to the given dotgraph that puts all of its
token nodes on the same rank.
Returns
-------
token_subgraph : pygraphviz.agraph.AGraph
a cluster subgraph that contains all the token nodes
"""
subgraph_name = 'cluster_tokens' if cluster else 'tokens'
return dot_graph.add_subgraph(sorted(token_ids, key=natural_sort_key),
name=subgraph_name, rank='same', rankdir=rankdir)
In [18]:
def nx2dot(nxgraph):
digraph = nx.DiGraph(nxgraph) # convert multidigraph to digraph
dotgraph = pygraphviz.AGraph(directed=True)
for node_id, node_attrs in digraph.nodes_iter(data=True):
if 'label' in digraph.node[node_id]:
dotgraph.add_node(node_id, label=digraph.node[node_id]['label'])
else:
dotgraph.add_node(node_id)
for source, target, edge_attrs in digraph.edges_iter(data=True):
if 'label' in digraph.edge[source][target]:
dotgraph.add_edge(source, target, label=digraph.edge[source][target]['label'])
else:
dotgraph.add_edge(source, target)
return dotgraph
We need constraint=true
on precedence edges and a non-cluster subgraph
to draw the tokens in the right order and horizontally aligned!
In [19]:
tiger_filepath = os.path.join(os.path.expanduser(TIGER_DIR), DOC_ID+'.xml')
tdg = TigerDocumentGraph(tiger_filepath)
dotgraph = nx2dot(tdg)
add_precedence_relations(dotgraph, tdg.tokens, constraint='true', style='dotted')
add_token_subgraph(dotgraph, tdg.tokens, cluster=True)
dotgraph.write('nx2dot.dot')
In [ ]:
# try one non-cluster subgraph for each rst-token-segment
# then, combine them in a cluster w/ rankdir=LR