In [6]:
import json
import csv
In [22]:
hum_ensembl_entrez_map = {}
with open('mart_export_hum.txt', 'r') as mar_export_hum_file:
mar_export_hum = csv.reader(mar_export_hum_file,delimiter='\t')
for row in mar_export_hum:
if len(row[1]) > 0:
hum_ensembl_entrez_map[row[0]] = row[1]
In [34]:
mus_refseq_entrez_map = {}
mus_refseq_ensembl_map = {}
with open('mart_export_mus.txt', 'r') as mar_export_mus_file:
mar_export_mus = csv.reader(mar_export_mus_file,delimiter='\t')
for row in mar_export_mus:
# if len(row[1]) > 0 and len(row[2]) > 0:
# print "NM and NR in same row", row[1],row[2]
if len(row[1]) > 0:
if len(row[0]) > 0:
mus_refseq_entrez_map[row[1]] = row[0]
if len(row[3]) > 0:
mus_refseq_ensembl_map[row[1]] = row[3]
if len(row[2]) > 0:
if len(row[0]) > 0:
mus_refseq_entrez_map[row[2]] = row[0]
if len(row[3]) > 0:
mus_refseq_ensembl_map[row[2]] = row[3]
In [80]:
with open('genes_list.json') as hum_genes_file:
hum_data = json.load(hum_genes_file)
with open('genes_list_GRCm38.txt') as mus_genes_file:
mus_data = json.load(mus_genes_file)
print hum_data[0]['ensembl_id'].split('.')[0]
print mus_data[0]
In [77]:
'hello'.upper()
Out[77]:
In [78]:
'3333.333'.split('.')[0]
Out[78]:
In [82]:
for idx,gene in enumerate(hum_data):
hum_data[idx]['name'] = hum_data[idx]['name'].upper()
if gene['ensembl_id'].split('.')[0] in hum_ensembl_entrez_map:
hum_data[idx]['entrez_id'] = hum_ensembl_entrez_map[gene['ensembl_id'].split('.')[0]]
else:
hum_data[idx]['entrez_id'] = ""
#hum_data[idx]['entrez_full'] = "Entrez" + hum_ensembl_entrez_map[gene['ensembl_id'][:-2]]
In [83]:
with open('genes_list.json2', 'w') as genes_list:
genes_list.write(json.dumps(hum_data))
In [84]:
for idx,gene in enumerate(mus_data):
if gene['ensembl_id'] in mus_refseq_entrez_map:
mus_data[idx]['entrez_id'] = mus_refseq_entrez_map[gene['ensembl_id']]
else:
mus_data[idx]['entrez_id'] = ""
if gene['ensembl_id'] in mus_refseq_ensembl_map:
mus_data[idx]['ensembl_id_real'] = mus_refseq_ensembl_map[gene['ensembl_id']]
else:
mus_data[idx]['ensembl_id_real'] = ""
In [85]:
with open('genes_list_GRCm38.txt2', 'w') as genes_list:
genes_list.write(json.dumps(mus_data))
In [86]:
print mus_data[0]
print hum_data[0]
In [ ]: