In [1]:
import numpy as np
import pylab as pb
import GPy
import Bio.Affy.CelFile as ss
from xlrd import open_workbook
import pickle
%pylab inline
In [2]:
num_genes=10000
from xlrd import open_workbook
wb = open_workbook('ann1.xlsx')
ann=np.repeat("ccccccccccccc",num_genes*2).flatten()
ann=ann.reshape(num_genes,2)
for ss in wb.sheets():
for row in range(1,num_genes+1):
ann[row-1,0] = ss.cell(row,0).value
ann[row-1,1] = ss.cell(row,1).value
#print ann[row,1]
#print ann[36,0], ann[36,1]
In [3]:
def maxx(odds):
if odds[0]<0.5 :
if odds[3]<0.5:
if odds[5]<0.5:
return 4
else :
return 3
elif odds[4]<0.5:
return 4
else : return 2
elif odds[1]<0.5:
if odds[5]<0.5:
return 4
else :
return 3
elif odds[2]<0.5:
return 4
else : return 1
max1=np.zeros(num_genes).flatten()
num_genes=1000
import pickle
kk=np.repeat('cccccccccccccc',(num_genes+1)*25).reshape(num_genes+1,25)
odds=np.zeros(6)
kk[0,:]=['Prob_ID','Gene symbol', 'likelihood value C1','likelihood value C2','likelihood value C3','likelihood value C4','propab. C1','propab.C2','propab. C3'
,'propab. C4','ODDS C1/c2','ODDS C1/c3','ODDS C1/c4' ,'ODDS . C2/c3',' ODDS . C2/c4','ODDS . C3/c4', ' logit C1/c2',' logit C1/c3',' logit C1/c4'
,' logit C2/c3',' logit C2/c4',' logit C3/c4','best model','Gene No.','model 1 or 2' ]
new=[]
m=[0,1,2,3]
for j in range(0,4):
new.append(numpy.load("Case{}_models.npy".format(j+1)))
norm_case=np.zeros(4*num_genes).reshape(4,num_genes)
for i in range(0,num_genes):
for j in range(0,4):
f= open('Case{}_models{}/model_{}.pickle'.format(j+1,int (new[j][i,0]),int (new[j][i,1])),'r' )
m[j]=pickle.load(f).log_likelihood()
m1=np.exp(m)
prop=m1/np.sum(m1)
l=0
for k in range(0,3):
for j in range(k+1,4):
odds[l]=(prop[k]/prop[j])/(1+(prop[k]/prop[j]))
l=l+1
logit=np.log(odds)
max1[i]=int(maxx(odds)) # find the best model by it's Odds value
kk[i+1,:]=[ann[i,0],ann[i,1],m1[0],m1[1],m1[2],m1[3],prop[0],prop[1],prop[2],prop[3],
odds[0],odds[1],odds[2],odds[3],odds[4],odds[5],logit[0],logit[1],logit[2],logit[3]
,logit[4],logit[5] ,int(max1[i]),i ,int(new[int(max1[i])-1][i,0])]
In [4]:
for i in range(0,25):
print kk[0,i], '\t:\t', kk[1,i]
In [10]:
numpy.save("All_Prop_models.npy", kk)
In [11]:
import csv
with open('All_Prop_models.csv', 'wb') as csvfile:
spamwriter = csv.writer(csvfile, delimiter=' ')
for rowx in kk:
spamwriter.writerow(rowx)
In [12]:
kk=numpy.load("All_Prop_models.npy")
In [13]:
s=np.repeat('ccccccccc',25)
for i in range(1,500):
for j in range(i+1,num_genes+1):
if kk[i,int(kk[i,22])+5]<kk[j,int(kk[j,22])+5]:
#print i , j
s[:]=kk[i,:]
kk[i,:]=kk[j,:]
kk[j,:]=s[:]
#print kk[i,:],kk[j,:]
In [17]:
import csv
with open('After_Ranking_Prop_models.csv', 'wb') as csvfile:
spamwriter = csv.writer(csvfile, delimiter=' ')
for rowx in kk:
spamwriter.writerow(rowx)
# another way to save file
numpy.save("After_Ranking_Prop_models.npy", kk)
In [18]:
kk=numpy.load("After_Ranking_Prop_models.npy")
In [19]:
list_Ntg_Ntg_down=[22,71,0,89,74,39,6,52,56,75,18,50,8,29,57,63,4,36,12,190,697,513,264,600,105,808,836,412,11,259,357,925,245,830,427,162,617,339,792,900,612,665,608,881,463,129,338,787,968,649]
list_Ntg_Ntg_up=[269,402,396,651,519,712,222,761,959,639,140,701,236,466]
list_Ntg_SOD1=[484,994,920,858,737,390]
list_same_behaver =[430,318,242,352,967,195]
In [44]:
x=0
for i in range(0,1000):
if int(kk[i+1,23]) in list_Ntg_Ntg_down :
f=open('Case{}_models{}/model_{}.pickle'.format(int(kk[i+1,22]),int(kk[i+1,24]),int(kk[i+1,23])),'r')
mm= pickle.load(f)
mm.plot(fixed_inputs=[(1, 0), (2, 0)],which_data_rows=range(16),linecol='red')#,fillcol='red')
mm.plot(fixed_inputs=[(1, 0), (2, 1)],which_data_rows=range(16,32), linecol='blue',ax=pb.gca())
mm.plot(fixed_inputs=[(1, 1), (2, 0)], which_data_rows=range(32,48),linecol='green',resolution=8, ax=pb.gca())
mm.plot(fixed_inputs=[(1, 1), (2, 1)], which_data_rows=range(48,64),linecol='yellow',ax=pb.gca())
pb.text(140, -.1,'Case{}_models{}/model_{}'.format(int(kk[i+1,22]),int(kk[i+1,24]),int(kk[i+1,23])), fontsize = 12, color = 'b')
pb.xlabel('The time series ')
pb.ylabel('Gene expression {} \n for each mutation for each strain '.format(kk[i+1,23]))
pb.title( ' The gene "{}" and the probe_ID "{}"'.format( kk[i+1,1] ,kk[i+1,0]))
#savefig('only_g93A/fig{}.jpg'.format(i))
In [41]:
for i in range(0,1000):
if int(kk[i+1,23]) in list_Ntg_Ntg_up :
f=open('Case{}_models{}/model_{}.pickle'.format(int(kk[i+1,22]),int(kk[i+1,24]),int(kk[i+1,23])),'r')
mm= pickle.load(f)
mm.plot(fixed_inputs=[(1, 0), (2, 0)],which_data_rows=range(16),linecol='red')#,fillcol='red')
mm.plot(fixed_inputs=[(1, 0), (2, 1)],which_data_rows=range(16,32), linecol='blue',ax=pb.gca())
mm.plot(fixed_inputs=[(1, 1), (2, 0)], which_data_rows=range(32,48),linecol='green',resolution=8, ax=pb.gca())
mm.plot(fixed_inputs=[(1, 1), (2, 1)], which_data_rows=range(48,64),linecol='yellow',ax=pb.gca())
pb.text(140, -.1,'Case{}_models{}/model_{}'.format(int(kk[i+1,22]),int(kk[i+1,24]),int(kk[i+1,23])), fontsize = 12, color = 'b')
pb.xlabel('The time series ')
pb.ylabel('Gene expression {} \n for each mutation for each strain '.format(kk[i+1,23]))
pb.title( ' The gene "{}" and the probe_ID "{}"'.format( kk[i+1,1] ,kk[i+1,0]))
#savefig('only_g93A/fig{}.jpg'.format(i))
In [42]:
for i in range(0,1000):
if int(kk[i+1,23]) in list_Ntg_SOD1 :
f=open('Case{}_models{}/model_{}.pickle'.format(int(kk[i+1,22]),int(kk[i+1,24]),int(kk[i+1,23])),'r')
mm= pickle.load(f)
mm.plot(fixed_inputs=[(1, 0), (2, 0)],which_data_rows=range(16),linecol='red')#,fillcol='red')
mm.plot(fixed_inputs=[(1, 0), (2, 1)],which_data_rows=range(16,32), linecol='blue',ax=pb.gca())
mm.plot(fixed_inputs=[(1, 1), (2, 0)], which_data_rows=range(32,48),linecol='green',resolution=8, ax=pb.gca())
mm.plot(fixed_inputs=[(1, 1), (2, 1)], which_data_rows=range(48,64),linecol='yellow',ax=pb.gca())
pb.text(140, -.1,'Case{}_models{}/model_{}'.format(int(kk[i+1,22]),int(kk[i+1,24]),int(kk[i+1,23])), fontsize = 12, color = 'b')
pb.xlabel('The time series ')
pb.ylabel('Gene expression {} \n for each mutation for each strain '.format(kk[i+1,23]))
pb.title( ' The gene "{}" and the probe_ID "{}"'.format( kk[i+1,1] ,kk[i+1,0]))
#savefig('only_g93A/fig{}.jpg'.format(i))
In [43]:
for i in range(0,1000):
if int(kk[i+1,23]) in list_same_behaver :
f=open('Case{}_models{}/model_{}.pickle'.format(int(kk[i+1,22]),int(kk[i+1,24]),int(kk[i+1,23])),'r')
mm= pickle.load(f)
mm.plot(fixed_inputs=[(1, 0), (2, 0)],which_data_rows=range(16),linecol='red')#,fillcol='red')
mm.plot(fixed_inputs=[(1, 0), (2, 1)],which_data_rows=range(16,32), linecol='blue',ax=pb.gca())
mm.plot(fixed_inputs=[(1, 1), (2, 0)], which_data_rows=range(32,48),linecol='green',resolution=8, ax=pb.gca())
mm.plot(fixed_inputs=[(1, 1), (2, 1)], which_data_rows=range(48,64),linecol='yellow',ax=pb.gca())
pb.text(140, -.1,'Case{}_models{}/model_{}'.format(int(kk[i+1,22]),int(kk[i+1,24]),int(kk[i+1,23])), fontsize = 12, color = 'b')
pb.xlabel('The time series ')
pb.ylabel('Gene expression {} \n for each mutation for each strain '.format(kk[i+1,23]))
pb.title( ' The gene "{}" and the probe_ID "{}"'.format( kk[i+1,1] ,kk[i+1,0]))
#savefig('only_g93A/fig{}.jpg'.format(i))
In [ ]: