In [1]:
filename_top = '../../data/03_ShawnJe/Je201608_MouseArray/Je201608_MouseArray_v3.top.txt'

p2gene = dict()

transporters = "Slc6a4 Slc18a2 Slc18a1 Slc6a2 Chrm5 Drd1 Slc22a1 Slc22a2 Slc22a3 Slc6a3".split()
synthases = "Tph1 Tph2".split()

filename_p = '../../data/03_ShawnJe/Je201608_MouseArray/MOUSE_ens90_MOE403AB_SerotonineGO.txt'
f_p = open(filename_p,'r')
f_p.readline()
for line in f_p:
    tokens = line.split("\t")
    tmp_p = tokens[3].strip()
    p2gene[tmp_p] = tokens[1]
    tmp_p = tokens[2].strip()
    p2gene[tmp_p] = tokens[1]
f_p.close()
"""
gene2p = dict()
tmp_count = 0
filename_p = '../../data/03_ShawnJe/Je201608_MouseArray/MOUSE_ens90_MOE403AB.txt'
f_p = open(filename_p,'r')
f_p.readline()
for line in f_p:
    tokens = line.split("\t")
    #if not tokens[1].startswith('Htr') or tokens[1].startswith('Htra'):
    #if not tokens[1] in transporters:
    #if not tokens[1] in synthases:
    #    continue
        
    tmp_gene = tokens[1]
    tmp_p1 = tokens[3].strip()
    tmp_p2 = tokens[2].strip()
    if tmp_p1 == '' and tmp_p2 == '':
        continue
    p2gene[tmp_p1] = tmp_gene
    p2gene[tmp_p2] = tmp_gene
    if not tmp_gene in gene2p:
        gene2p[tmp_gene] = []
    gene2p[tmp_gene].append(tmp_p1)
    gene2p[tmp_gene].append(tmp_p2)
    print(tmp_count, tokens)
    tmp_count += 1
f_p.close()

print(len(gene2p))
print("\n".join(sorted(gene2p.keys())))
"""

#filename_out = '../../data/03_ShawnJe/Je201608_MouseArray/Je201608_MouseArray_v3.HTR_top.txt'
#filename_out = '../../data/03_ShawnJe/Je201608_MouseArray/Je201608_MouseArray_v3.transporter_top.txt'
#filename_out = '../../data/03_ShawnJe/Je201608_MouseArray/Je201608_MouseArray_v3.synthase_top.txt'
filename_out = '../../data/03_ShawnJe/Je201608_MouseArray/Je201608_MouseArray_v3.serotonineGO_top.txt'
f_out = open(filename_out,'w')
f_top = open(filename_top,'r')
f_out.write(f_top.readline())
for line in f_top:
    tokens = line.strip().split("\t")
    tmp_p = tokens[0].replace('"','')
    if tmp_p in p2gene:
        f_out.write("%s\n"%line.strip())
        print(tmp_p, p2gene[tmp_p], tokens[2])
f_top.close()
f_out.close()


1418262_at Syk 5.16226226733051
1445292_at Cd300a 3.28969937703505
1422949_at Nos1 5.9797173369766
1438483_at Nos1 7.96033929405608
1422288_at Htr1b 8.0255193166074
1417150_at Slc6a4 5.75719953680082
1425797_a_at Syk 3.34022845569421
1425886_at Fev 4.70844387986161
1429794_a_at P2rx1 5.40650098911271
1418340_at Fcer1g 9.1782522241611
1429887_at Nos1 8.51306438246219
1442077_at Htr1b 8.13353916284828
1435903_at Cd300a 5.99277093011761
1418261_at Syk 5.57711599863286
1460719_a_at P2rx1 5.18781521511437
1418493_a_at Snca 13.0622709368567
1438710_at Htr1a 7.91559307535386
1423091_a_at Gpm6b 13.5705530306315
1436853_a_at Snca 12.9823341994084
1457984_at Crh 8.68996141765134
1434354_at Maob 9.70589861984421
1425942_a_at Gpm6b 12.8994959671943
1448807_at Hrh3 10.1613782949949
1450219_at Htr1a 6.80254760242425
1421775_at Fcer1a 2.41574857228516

In [ ]: