In [1]:
import pandas as pd
from goatools import obo_parser
oboUrl = './data/go.obo'
treeSourceUrl = 'http://chianti.ucsd.edu/~kono/ci/data/collapsed_go.no_IGI.propagated.small_parent_tree'
oboUrl = './data/go.obo'
yeastAnnotationUrl = './data/gene_association.sgd.gz'
kegg2goUrl = 'http://geneontology.org/external2go/kegg2go'
reactome2go = 'http://geneontology.org/external2go/reactome2go'
phenotypeUrl='http://downloads.yeastgenome.org/curation/literature/phenotype_data.tab'
In [2]:
treeColNames = ['parent', 'child', 'type', 'in_tree']
tree = pd.read_csv(treeSourceUrl, delimiter='\t', names=treeColNames)
tree.tail(10)
Out[2]:
In [3]:
obo = obo_parser.GODag(oboUrl)
In [148]:
import networkx as nx
G=nx.DiGraph()
node_set = set()
edges = []
for row in tree.itertuples():
node_set.add(row[1])
node_set.add(row[2])
if "GO:" in row[1] and "GO:" in row[2]:
edges.append((row[2], row[1]))
In [149]:
for node in node_set:
if "GO:" in node:
G.add_node(node)
len(edges)
Out[149]:
In [150]:
for e in edges:
G.add_edge(e[0], e[1])
In [151]:
print(nx.info(G))
for n in G.nodes():
if n == 'GO:00SUPER':
print(n)
root = G.node['GO:00SUPER']
In [153]:
paths = nx.all_simple_paths(G, source='GO:0098799', target='GO:00SUPER')
sg = nx.DiGraph()
ns = set()
for p in paths:
p_len = len(p)
for i, v in enumerate(p):
if i < p_len-1:
s = v
t = p[i+1]
if s not in ns:
sg.add_node(s)
ns.add(s)
if t not in ns:
sg.add_node(t)
ns.add(t)
sg.add_edge(s, t)
print(ns)
print(nx.info(sg))
nx.write_graphml(sg, "test.graphml")
In [102]:
import igraph as ig
g = ig.Graph(directed=True)
for node in node_set:
if "GO:" in node:
g.add_vertex(name=node)
g.summary()
Out[102]:
In [103]:
g.add_edges(edges)
In [104]:
g.summary()
for e in edges:
if e[0] == 'GO:0098798' or e[1]=='GO:0098798':
print(e)
In [117]:
paths1 = g.vertex_disjoint_paths(g.vs.find('GO:0098798').index, target=g.vs.find('GO:00SUPER').index)
In [118]:
print(g.vs[2787]['name'])
print(paths1)
In [108]:
subg = ig.Graph()
n_set = set()
for p in paths1:
p_len = len(p)
for i, v in enumerate(p):
if i < p_len-1:
# print(str(v) + ' --> ' + str(p[i+1]))
s = g.vs[v]['name']
t = g.vs[p[i+1]]['name']
print(s + ' --> ' + t)
if s not in n_set:
subg.add_vertex(s)
n_set.add(s)
if t not in n_set:
subg.add_vertex(t)
n_set.add(t)
subg.add_edge(source=s, target=t)
print('-----')
subg.summary()
print(n_set)
In [101]:
subg.save("sub.gml", format="gml")