notebook.community

Edit and run



In [16]:

    
import os
import pygraphviz
import networkx as nx
from discoursegraphs.readwrite import TigerDocumentGraph
from discoursegraphs.util import natural_sort_key

# DOC_ID = 'maz-19295'
DOC_ID = 'maz-00002'
TIGER_DIR = '~/repos/pcc-annis-merged/maz176/syntax/'



In [17]:

    
def add_precedence_relations(dot_graph, token_ids, constraint='false', style='invis', weight=1):
    """
    builds an invisible precedence chain, i.e. adds an
    invisible edge from the first token node to the second, from the
    second to the third etc.
    """
    for i, token_id in enumerate(token_ids[:-1]):
        dot_graph.add_edge(token_id, token_ids[i+1], style=style,
                           weight=weight, constraint=constraint)
    
def add_token_subgraph(dot_graph, token_ids, cluster=True, rankdir='TB'):
    """
    adds a cluster subgraph to the given dotgraph that puts all of its
    token nodes on the same rank.
    
    Returns
    -------
    token_subgraph : pygraphviz.agraph.AGraph
        a cluster subgraph that contains all the token nodes
    """
    subgraph_name = 'cluster_tokens' if cluster else 'tokens'
    return dot_graph.add_subgraph(sorted(token_ids, key=natural_sort_key),
                                  name=subgraph_name, rank='same', rankdir=rankdir)



In [18]:

    
def nx2dot(nxgraph):
    digraph = nx.DiGraph(nxgraph) # convert multidigraph to digraph
    dotgraph = pygraphviz.AGraph(directed=True)
    for node_id, node_attrs in digraph.nodes_iter(data=True):
        if 'label' in digraph.node[node_id]:
            dotgraph.add_node(node_id, label=digraph.node[node_id]['label'])
        else:
            dotgraph.add_node(node_id)
    
    for source, target, edge_attrs in digraph.edges_iter(data=True):
        if 'label' in digraph.edge[source][target]:
            dotgraph.add_edge(source, target, label=digraph.edge[source][target]['label'])
        else:
            dotgraph.add_edge(source, target)
    return dotgraph

We need constraint=true on precedence edges and a non-cluster subgraph to draw the tokens in the right order and horizontally aligned!



In [19]:

    
tiger_filepath = os.path.join(os.path.expanduser(TIGER_DIR), DOC_ID+'.xml')
tdg = TigerDocumentGraph(tiger_filepath)

dotgraph = nx2dot(tdg)
add_precedence_relations(dotgraph, tdg.tokens, constraint='true', style='dotted')
add_token_subgraph(dotgraph, tdg.tokens, cluster=True)
dotgraph.write('nx2dot.dot')



In [ ]:

    
# try one non-cluster subgraph for each rst-token-segment
# then, combine them in a cluster w/ rankdir=LR