In [1]:
import pandas as pd
from goatools import obo_parser

oboUrl = './data/go.obo'
treeSourceUrl = 'http://chianti.ucsd.edu/~kono/ci/data/collapsed_go.no_IGI.propagated.small_parent_tree'
oboUrl = './data/go.obo'
yeastAnnotationUrl = './data/gene_association.sgd.gz'
kegg2goUrl = 'http://geneontology.org/external2go/kegg2go'
reactome2go = 'http://geneontology.org/external2go/reactome2go'

phenotypeUrl='http://downloads.yeastgenome.org/curation/literature/phenotype_data.tab'

In [2]:
treeColNames = ['parent', 'child', 'type', 'in_tree']
tree = pd.read_csv(treeSourceUrl, delimiter='\t', names=treeColNames)
tree.tail(10)


Out[2]:
parent child type in_tree
441927 GO:0090150 YHR083W gene NOT_TREE
441928 GO:0005575 YHR083W gene NOT_TREE
441929 GO:0098796 YHR083W gene NOT_TREE
441930 GO:1902589 YHR083W gene NOT_TREE
441931 GO:0044085 YHR083W gene NOT_TREE
441932 GO:0015031 YHR083W gene NOT_TREE
441933 GO:1902582 YHR083W gene NOT_TREE
441934 GO:1902580 YHR083W gene NOT_TREE
441935 GO:0098799 YHR083W gene NOT_TREE
441936 GO:0098798 YHR083W gene NOT_TREE

In [3]:
obo = obo_parser.GODag(oboUrl)


load obo file ./data/go.obo
./data/go.obo: fmt(1.2) rel(2016-12-23) 47,905 GO Terms

In [148]:
import networkx as nx

G=nx.DiGraph()

node_set = set()
edges = []

for row in tree.itertuples():
    node_set.add(row[1])
    node_set.add(row[2])
    if "GO:" in row[1] and "GO:" in row[2]:
        edges.append((row[2], row[1]))

In [149]:
for node in node_set:
     if "GO:" in node:
        G.add_node(node)
    
len(edges)


Out[149]:
14528

In [150]:
for e in edges:
    G.add_edge(e[0], e[1])

In [151]:
print(nx.info(G))
for n in G.nodes():
    if n == 'GO:00SUPER':
        print(n)
        
root = G.node['GO:00SUPER']


Name: 
Type: DiGraph
Number of nodes: 6618
Number of edges: 14528
Average in degree:   2.1952
Average out degree:   2.1952
GO:00SUPER

In [153]:
paths = nx.all_simple_paths(G, source='GO:0098799', target='GO:00SUPER')

sg = nx.DiGraph()
ns = set()

for p in paths:
    p_len = len(p)
    
    for i, v in enumerate(p):
        
        if i < p_len-1:
            s = v
            t = p[i+1]
            
            if s not in ns:
                sg.add_node(s)
                ns.add(s)
            if t not in ns:
                sg.add_node(t)
                ns.add(t)
            sg.add_edge(s, t)

print(ns)
print(nx.info(sg))

nx.write_graphml(sg, "test.graphml")


{'GO:0098798', 'GO:0044455', 'GO:0044425', 'GO:0019867', 'GO:0098588', 'GO:0044422', 'GO:0098805', 'GO:0044464', 'GO:0032991', 'GO:0044444', 'GO:0005740', 'GO:0044446', 'GO:0044424', 'GO:0043231', 'GO:0043234', 'GO:0043226', 'GO:0043227', 'GO:0005737', 'GO:0098799', 'GO:0016020', 'GO:0044429', 'GO:0031966', 'GO:00SUPER', 'GO:0043229', 'GO:0031090', 'GO:0031967', 'GO:0005741', 'GO:0005739', 'GO:0005622', 'GO:0005575', 'GO:0098796', 'GO:0031968'}
Name: 
Type: DiGraph
Number of nodes: 32
Number of edges: 63
Average in degree:   1.9688
Average out degree:   1.9688

In [102]:
import igraph as ig

g = ig.Graph(directed=True)
for node in node_set:
    if "GO:" in node:
        g.add_vertex(name=node)

g.summary()


Out[102]:
'IGRAPH DN-- 6618 0 -- \n+ attr: name (v)'

In [103]:
g.add_edges(edges)

In [104]:
g.summary()
for e in edges:
    if e[0] == 'GO:0098798' or e[1]=='GO:0098798':
        print(e)


('GO:0098798', 'GO:0005739')
('GO:0098798', 'GO:0044429')
('GO:0098798', 'GO:0043234')
('GO:0017087', 'GO:0098798')
('GO:0098800', 'GO:0098798')
('GO:0098799', 'GO:0098798')
('GO:0030062', 'GO:0098798')

In [117]:
paths1 = g.vertex_disjoint_paths(g.vs.find('GO:0098798').index, target=g.vs.find('GO:00SUPER').index)

In [118]:
print(g.vs[2787]['name'])
print(paths1)


GO:0003857
1

In [108]:
subg = ig.Graph()
n_set = set()

for p in paths1:
    p_len = len(p)
    
    
    for i, v in enumerate(p):
        if i < p_len-1:
#             print(str(v) + ' --> ' + str(p[i+1]))
            s = g.vs[v]['name']
            t = g.vs[p[i+1]]['name']
            print(s + ' --> ' + t)
            if s not in n_set: 
                subg.add_vertex(s)
                n_set.add(s)
            if t not in n_set:
                subg.add_vertex(t)
                n_set.add(t)
            subg.add_edge(source=s, target=t)
    print('-----')

subg.summary()
print(n_set)


GO:0098798 --> GO:0043234
GO:0043234 --> GO:0032991
GO:0032991 --> GO:0005575
GO:0005575 --> GO:00SUPER
-----
{'GO:0032991', 'GO:0043234', 'GO:0098798', 'GO:00SUPER', 'GO:0005575'}

In [101]:
subg.save("sub.gml", format="gml")