In [30]:
    
import json
import pandas as pd
# Load mapping
with open('./data/clixo-mapping.json', 'r') as f:
    clixo_map = json.load(f)
    
print(len(clixo_map.keys()))
    
    
In [32]:
    
with open('./data/clixo-tree-layout.cyjs', 'r') as f:
    clixo = json.load(f)
nodes = clixo['elements']['nodes']
len(nodes)
    
    Out[32]:
In [34]:
    
print(nodes[1])
    
    
In [33]:
    
from goatools import obo_parser
# Annotation file for the CLIXO terms
oboUrl = './data/go.obo'
obo = obo_parser.GODag(oboUrl, optional_attrs=['def'])
    
    
In [37]:
    
new_nodes = []
id2clixo = {}
gcounts = []
for n in nodes:
    d = n['data']
    
    id2clixo[d['id']] = d['id_original']
    clixo_id = d['id_original'].split(':')[1]
    name = d['id_original']
    genes = int(d['geneCount'])
    
    gcounts.append(genes)
    
    if clixo_id in clixo_map.keys():
        goid = clixo_map[clixo_id]['go']
        name = obo[goid].name
        
    data = {
        'id': d['id_original'],
        'name': name,
        'geneCount': genes
    }
    
    new_nodes.append({
            'data': data,
            'position': n['position']
        })
    
In [38]:
    
len(nodes)
    
    Out[38]:
In [39]:
    
max(gcounts)
    
    Out[39]:
In [40]:
    
# Cleanup edges
edges = clixo['elements']['edges']
new_edges = []
for e in edges:
    d = e['data']
    
    data = {
        'source': id2clixo[d['source']],
        'target': id2clixo[d['target']],
    }
    
    new_edges.append({'data': data})
    
In [41]:
    
clixo_compact = {
    'data': {
        'name': 'CLIXO Tree'
    },
    'elements': {
        'nodes': new_nodes,
        'edges': new_edges
    }
}
with open('./data/clixo-compact.json', 'w') as outfile:
    json.dump(clixo_compact, outfile)
    
In [1]:
    
!wget 'http://chianti.ucsd.edu/~kono/ci/data/deep-cell/go-sparse_original.cyjs' -O ./data/go-original.cyjs
    
    
In [6]:
    
import json
import pandas as pd
import math
with open('./data/go-original.cyjs', 'r') as f:
    gotree = json.load(f)
    
In [4]:
    
import pandas as pd
df = pd.read_csv('./data/collapsed_go.no_IGI.propagated.term_sizes', sep='\t', names=['id', 'genes'])
df.head(10)
    
    Out[4]:
In [7]:
    
math.exp(max(df['genes'])/1000)
    
    Out[7]:
In [8]:
    
go2genes = {}
for row in df.itertuples():
    go2genes[row[1]] = row[2].item()
    
In [9]:
    
nodes = gotree['elements']['nodes']
nodes[1]
    
    Out[9]:
In [10]:
    
id2go = {}
new_nodes = []
for n in nodes:
    d = n['data']
    
    id2go[d['id']] = d['id_original']
    name = d['name']
    genes = go2genes[d['id_original']]
        
    data = {
        'id': d['id_original'],
        'name': name,
        'geneCount': genes,
        'namespace': d['namespace']
    }
    
    new_nodes.append({
            'data': data,
            'position': n['position']
        })
    
In [11]:
    
new_nodes[1]
    
    Out[11]:
In [12]:
    
math.exp(6000/1000)
    
    Out[12]:
In [13]:
    
edges = gotree['elements']['edges']
new_edges = []
for e in edges:
    d = e['data']
    
    data = {
        'source': id2go[d['source']],
        'target': id2go[d['target']],
        'branch': d['branch']
    }
    
    new_edges.append({'data': data})
    
In [14]:
    
new_edges[1]
    
    Out[14]:
In [15]:
    
go_compact = {
    'data': {
        'name': 'GO Tree'
    },
    'elements': {
        'nodes': new_nodes,
        'edges': new_edges
    }
}
with open('./data/go-sparse-compact.json', 'w') as outfile:
    json.dump(go_compact, outfile)