In [30]:
import json
import pandas as pd
# Load mapping
with open('./data/clixo-mapping.json', 'r') as f:
clixo_map = json.load(f)
print(len(clixo_map.keys()))
In [32]:
with open('./data/clixo-tree-layout.cyjs', 'r') as f:
clixo = json.load(f)
nodes = clixo['elements']['nodes']
len(nodes)
Out[32]:
In [34]:
print(nodes[1])
In [33]:
from goatools import obo_parser
# Annotation file for the CLIXO terms
oboUrl = './data/go.obo'
obo = obo_parser.GODag(oboUrl, optional_attrs=['def'])
In [37]:
new_nodes = []
id2clixo = {}
gcounts = []
for n in nodes:
d = n['data']
id2clixo[d['id']] = d['id_original']
clixo_id = d['id_original'].split(':')[1]
name = d['id_original']
genes = int(d['geneCount'])
gcounts.append(genes)
if clixo_id in clixo_map.keys():
goid = clixo_map[clixo_id]['go']
name = obo[goid].name
data = {
'id': d['id_original'],
'name': name,
'geneCount': genes
}
new_nodes.append({
'data': data,
'position': n['position']
})
In [38]:
len(nodes)
Out[38]:
In [39]:
max(gcounts)
Out[39]:
In [40]:
# Cleanup edges
edges = clixo['elements']['edges']
new_edges = []
for e in edges:
d = e['data']
data = {
'source': id2clixo[d['source']],
'target': id2clixo[d['target']],
}
new_edges.append({'data': data})
In [41]:
clixo_compact = {
'data': {
'name': 'CLIXO Tree'
},
'elements': {
'nodes': new_nodes,
'edges': new_edges
}
}
with open('./data/clixo-compact.json', 'w') as outfile:
json.dump(clixo_compact, outfile)
In [1]:
!wget 'http://chianti.ucsd.edu/~kono/ci/data/deep-cell/go-sparse_original.cyjs' -O ./data/go-original.cyjs
In [6]:
import json
import pandas as pd
import math
with open('./data/go-original.cyjs', 'r') as f:
gotree = json.load(f)
In [4]:
import pandas as pd
df = pd.read_csv('./data/collapsed_go.no_IGI.propagated.term_sizes', sep='\t', names=['id', 'genes'])
df.head(10)
Out[4]:
In [7]:
math.exp(max(df['genes'])/1000)
Out[7]:
In [8]:
go2genes = {}
for row in df.itertuples():
go2genes[row[1]] = row[2].item()
In [9]:
nodes = gotree['elements']['nodes']
nodes[1]
Out[9]:
In [10]:
id2go = {}
new_nodes = []
for n in nodes:
d = n['data']
id2go[d['id']] = d['id_original']
name = d['name']
genes = go2genes[d['id_original']]
data = {
'id': d['id_original'],
'name': name,
'geneCount': genes,
'namespace': d['namespace']
}
new_nodes.append({
'data': data,
'position': n['position']
})
In [11]:
new_nodes[1]
Out[11]:
In [12]:
math.exp(6000/1000)
Out[12]:
In [13]:
edges = gotree['elements']['edges']
new_edges = []
for e in edges:
d = e['data']
data = {
'source': id2go[d['source']],
'target': id2go[d['target']],
'branch': d['branch']
}
new_edges.append({'data': data})
In [14]:
new_edges[1]
Out[14]:
In [15]:
go_compact = {
'data': {
'name': 'GO Tree'
},
'elements': {
'nodes': new_nodes,
'edges': new_edges
}
}
with open('./data/go-sparse-compact.json', 'w') as outfile:
json.dump(go_compact, outfile)