In [37]:
from discoursegraphs.readwrite import RSTGraph
import networkx as nx
from collections import defaultdict

# DOC_ID = "maz-6046"
DOC_ID = "maz-00002"

In [38]:
def add_precedence_relations(dot_graph, token_ids, weight=1):
    """
    builds an invisible precedence chain, i.e. adds an
    invisible edge from the first token node to the second, from the
    second to the third etc.
    """
    for i, token_id in enumerate(token_ids[:-1]):
        dot_graph.add_edge(token_id, token_ids[i+1], style='invis', weight=weight)
    
def add_token_rank(dot_graph, token_ids):
    """
    adds a subgraph to the given dotgraph that puts all of its
    token nodes on the same rank and connects them with invisible
    edges.
    """
    dot_graph.add_subgraph(token_ids, name='tokens', rank='same')
    add_precedence_relations(dot_graph, token_ids)

In [39]:
def rst_rank_levels(rst_docgraph):
    """
    generates RST rank levels (i.e. positions on the Y-axis for dot layout).
    The root note will have rank=0, all nodes one level below it get rank=1 etc.
    
    Returns
    -------
    distance_from_root : dict
        maps from the rank (int) to the list of nodes (int/str node IDs)
        with that rank
    """
    # convert multi-digraph into undirected graph
    undirected_rg = nx.Graph(rst_docgraph.to_undirected())
    undirected_rg.root = rg.root
    undirected_rg.relations = rg.relations

    distance_from_root = defaultdict(list)
    for node_id, node_attrs in undirected_rg.nodes_iter(data=True):
        if 'rst:token' not in undirected_rg.node[node_id]['layers']:
            rank_distance = 0
            if nx.has_path(undirected_rg, undirected_rg.root, node_id):
                shortest_path = nx.shortest_path(undirected_rg, undirected_rg.root, node_id)
                for i, path_node in enumerate(shortest_path[:-1]):
                    edge_attrs = undirected_rg.edge[path_node][shortest_path[i+1]]
                    if ((edge_attrs.get('rst:relname') == 'span') or 
                        (undirected_rg.relations.get(edge_attrs.get('rst:relname')) == 'multinuc')):
                        rank_distance += 1
                distance_from_root[rank_distance].append(node_id)
    return distance_from_root

In [40]:
rg = RSTGraph('/home/arne/repos/pcc-annis-merged/maz176/rst/{}.rs3'.format(DOC_ID))
nx.write_dot(rg, 'rst-nx.dot')

write_dot works fine, but the tokens within an RST span are unsorted and don't align horizontally


In [41]:
rdotgraph = nx.to_agraph(rg)
add_token_rank(rdotgraph, rg.tokens)
rdotgraph.write('rst-dot.dot')

We could fix the token sorting and the token alignment, but the rst segments still don't show a clear vertical hierarchy


In [42]:
for distance, nodes in rst_rank_levels(rg).iteritems():
    rdotgraph.add_subgraph(nodes, name='distance{}'.format(distance), rank='same')

# rdotgraph.graph_attr['ranksep'] = '2.0'
rdotgraph.write('rst-distances.dot')