In [37]:
from discoursegraphs.readwrite import RSTGraph
import networkx as nx
from collections import defaultdict
# DOC_ID = "maz-6046"
DOC_ID = "maz-00002"
In [38]:
def add_precedence_relations(dot_graph, token_ids, weight=1):
"""
builds an invisible precedence chain, i.e. adds an
invisible edge from the first token node to the second, from the
second to the third etc.
"""
for i, token_id in enumerate(token_ids[:-1]):
dot_graph.add_edge(token_id, token_ids[i+1], style='invis', weight=weight)
def add_token_rank(dot_graph, token_ids):
"""
adds a subgraph to the given dotgraph that puts all of its
token nodes on the same rank and connects them with invisible
edges.
"""
dot_graph.add_subgraph(token_ids, name='tokens', rank='same')
add_precedence_relations(dot_graph, token_ids)
In [39]:
def rst_rank_levels(rst_docgraph):
"""
generates RST rank levels (i.e. positions on the Y-axis for dot layout).
The root note will have rank=0, all nodes one level below it get rank=1 etc.
Returns
-------
distance_from_root : dict
maps from the rank (int) to the list of nodes (int/str node IDs)
with that rank
"""
# convert multi-digraph into undirected graph
undirected_rg = nx.Graph(rst_docgraph.to_undirected())
undirected_rg.root = rg.root
undirected_rg.relations = rg.relations
distance_from_root = defaultdict(list)
for node_id, node_attrs in undirected_rg.nodes_iter(data=True):
if 'rst:token' not in undirected_rg.node[node_id]['layers']:
rank_distance = 0
if nx.has_path(undirected_rg, undirected_rg.root, node_id):
shortest_path = nx.shortest_path(undirected_rg, undirected_rg.root, node_id)
for i, path_node in enumerate(shortest_path[:-1]):
edge_attrs = undirected_rg.edge[path_node][shortest_path[i+1]]
if ((edge_attrs.get('rst:relname') == 'span') or
(undirected_rg.relations.get(edge_attrs.get('rst:relname')) == 'multinuc')):
rank_distance += 1
distance_from_root[rank_distance].append(node_id)
return distance_from_root
In [40]:
rg = RSTGraph('/home/arne/repos/pcc-annis-merged/maz176/rst/{}.rs3'.format(DOC_ID))
nx.write_dot(rg, 'rst-nx.dot')
write_dot
works fine, but the tokens within an RST span are unsorted and don't align horizontally
In [41]:
rdotgraph = nx.to_agraph(rg)
add_token_rank(rdotgraph, rg.tokens)
rdotgraph.write('rst-dot.dot')
We could fix the token sorting and the token alignment, but the rst segments still don't show a clear vertical hierarchy
In [42]:
for distance, nodes in rst_rank_levels(rg).iteritems():
rdotgraph.add_subgraph(nodes, name='distance{}'.format(distance), rank='same')
# rdotgraph.graph_attr['ranksep'] = '2.0'
rdotgraph.write('rst-distances.dot')