In [1]:
!python --version
In [51]:
# Load data sets
import pandas as pd
treeSourceUrl = './data/preds_yeastnet_no_gi_0.04_0.5.txt.propagate.small_parent_tree'
geneCountFile = './data/preds_yeastnet_no_gi_0.04_0.5.txt.propagate.term_sizes'
alignmentFile = './data/alignments_FDR_0.1_t_0.1'
geneAssignment = './data/preds_yeastnet_no_gi_0.04_0.5.txt.propagate.mapping'
# Load the tree data
treeColNames = ['parent', 'child', 'type', 'in_tree']
tree = pd.read_csv(treeSourceUrl, delimiter='\t', names=treeColNames)
tree.tail()
Out[51]:
In [55]:
assignment = pd.read_csv(geneAssignment, sep='\t', names=['gene', 'clixo'])
print(assignment['clixo'].unique().shape)
assignment.head()
Out[55]:
In [56]:
al = pd.read_csv(alignmentFile, sep='\t', names=['clixo', 'go', 'similarity', 'fdr', 'genes'])
al.head()
Out[56]:
In [57]:
mapping = {}
for row in al.itertuples():
entry = {
'go': row[2],
'score': row[3],
'dfr': row[4]
}
mapping[str(row[1])] = entry
In [58]:
geneCounts = pd.read_csv(geneCountFile, names=['clixo', 'count'], sep='\t')
term2count = {}
for row in geneCounts.itertuples():
term2count[str(row[1])] = row[2].item()
In [59]:
# Get unique terms
clixo_terms = set()
for row in tree.itertuples():
etype = row[3]
if not etype.startswith('gene'):
clixo_terms.add(str(row[1]))
clixo_terms.add(str(row[2]))
print(len(clixo_terms))
In [60]:
import json
clixoTree = {
'data': {
'name': 'CLIXO Tree'
},
'elements': {
'nodes': [],
'edges': []
}
}
print(json.dumps(clixoTree, indent=4))
In [63]:
def get_node(id, count):
node = {
'data': {
'id': id,
'geneCount': count
}
}
return node
def get_edge(source, target):
edge = {
'data': {
'source': target,
'target': source
}
}
return edge
In [65]:
edges = []
PREFIX = 'CLIXO:'
for row in tree.itertuples():
etype = row[3]
in_tree = row[4]
if etype.startswith('gene') or in_tree == 'NOT_TREE':
continue
source = PREFIX + str(row[1])
child = PREFIX + str(row[2])
edges.append(get_edge(source, child))
print(len(edges))
In [66]:
nodes = []
for id in clixo_terms:
node = get_node(PREFIX + id, term2count[id])
nodes.append(node)
print(len(nodes))
In [67]:
clixoTree['elements']['nodes'] = nodes
clixoTree['elements']['edges'] = edges
with open('./data/clixo-tree.cyjs', 'w') as outfile:
json.dump(clixoTree, outfile)
In [29]:
import networkx as nx
DG=nx.DiGraph()
for node in nodes:
DG.add_node(node['data']['id'])
for edge in edges:
DG.add_edge(edge['data']['source'], edge['data']['target'])
In [30]:
import matplotlib.pyplot as plt
In [31]:
nx.draw_circular(DG)
In [33]:
# pos = nx.nx_pydot.pydot_layout(DG)