In [1]:
filename_top = '../../data/03_ShawnJe/Je201608_MouseArray/Je201608_MouseArray_v3.top.txt'
p2gene = dict()
transporters = "Slc6a4 Slc18a2 Slc18a1 Slc6a2 Chrm5 Drd1 Slc22a1 Slc22a2 Slc22a3 Slc6a3".split()
synthases = "Tph1 Tph2".split()
filename_p = '../../data/03_ShawnJe/Je201608_MouseArray/MOUSE_ens90_MOE403AB_SerotonineGO.txt'
f_p = open(filename_p,'r')
f_p.readline()
for line in f_p:
tokens = line.split("\t")
tmp_p = tokens[3].strip()
p2gene[tmp_p] = tokens[1]
tmp_p = tokens[2].strip()
p2gene[tmp_p] = tokens[1]
f_p.close()
"""
gene2p = dict()
tmp_count = 0
filename_p = '../../data/03_ShawnJe/Je201608_MouseArray/MOUSE_ens90_MOE403AB.txt'
f_p = open(filename_p,'r')
f_p.readline()
for line in f_p:
tokens = line.split("\t")
#if not tokens[1].startswith('Htr') or tokens[1].startswith('Htra'):
#if not tokens[1] in transporters:
#if not tokens[1] in synthases:
# continue
tmp_gene = tokens[1]
tmp_p1 = tokens[3].strip()
tmp_p2 = tokens[2].strip()
if tmp_p1 == '' and tmp_p2 == '':
continue
p2gene[tmp_p1] = tmp_gene
p2gene[tmp_p2] = tmp_gene
if not tmp_gene in gene2p:
gene2p[tmp_gene] = []
gene2p[tmp_gene].append(tmp_p1)
gene2p[tmp_gene].append(tmp_p2)
print(tmp_count, tokens)
tmp_count += 1
f_p.close()
print(len(gene2p))
print("\n".join(sorted(gene2p.keys())))
"""
#filename_out = '../../data/03_ShawnJe/Je201608_MouseArray/Je201608_MouseArray_v3.HTR_top.txt'
#filename_out = '../../data/03_ShawnJe/Je201608_MouseArray/Je201608_MouseArray_v3.transporter_top.txt'
#filename_out = '../../data/03_ShawnJe/Je201608_MouseArray/Je201608_MouseArray_v3.synthase_top.txt'
filename_out = '../../data/03_ShawnJe/Je201608_MouseArray/Je201608_MouseArray_v3.serotonineGO_top.txt'
f_out = open(filename_out,'w')
f_top = open(filename_top,'r')
f_out.write(f_top.readline())
for line in f_top:
tokens = line.strip().split("\t")
tmp_p = tokens[0].replace('"','')
if tmp_p in p2gene:
f_out.write("%s\n"%line.strip())
print(tmp_p, p2gene[tmp_p], tokens[2])
f_top.close()
f_out.close()
In [ ]: