In [26]:
%pylab
In [27]:
%matplotlib inline
In [28]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
In [29]:
data = pd.read_csv("~/datamodified.csv")
data.head()
Out[29]:
In [30]:
data['dtror'] = data.strain == 'tror'
data['didr1'] = data.strain == 'idr1'
data['didr2'] = data.strain == 'idr2'
data['dsirr'] = data.strain == 'sirr'
data.head()
Out[30]:
In [31]:
sum(data.gene=='tror')
Out[31]:
In [32]:
data['normexpression'] = np.log2(data.expression)
data['normexpression'] = (data['normexpression']-data['normexpression'].mean())/data['normexpression'].std()
data.head()
Out[32]:
In [33]:
g = data.groupby(['gene', 'time'])
def scale(x):
x.normexpression = (x.normexpression - x.normexpression.mean())/x.normexpression.std()
return x
data = g.apply(scale)
In [34]:
def plotGene(g,time=None):
temp = data[data.gene==g]
if not time is None:
temp = temp[temp.time==time]
for i,d in enumerate(['dtror', 'didr1', 'didr2', 'dsirr']):
if d[1:] == g:
continue
plt.subplot(1,4,i+1)
plt.title(d)
plt.boxplot([temp[temp[d]].normexpression, temp[~temp[d]].normexpression], labels=[d,'dura3'])
plt.tight_layout()
In [35]:
plotGene('idr1',0)
In [36]:
plotGene('idr2',0)
In [37]:
plotGene('sirr',0)
In [38]:
plotGene('tror',0)
In [39]:
# eq = 'expression ~ C(condition, Sum)*C(dsirr, Sum)' + \
# '+ C(condition, Sum)*C(dtror, Sum) + ' + \
# '+ C(condition, Sum)*C(didr1, Sum) + ' + \
# '+ C(condition, Sum)*C(didr2, Sum) + '
def eq(s):
e = 'normexpression ~'
for d in ['dtror', 'didr1', 'didr2', 'dsirr']:
if d[1:] == s:
continue
e += 'C(%s, Sum) + '%d
return e[:-2]
# eq = 'normexpression ~ C(dsirr, Sum)' + \
# '+ C(dtror, Sum) + ' + \
# '+ C(didr1, Sum) + ' + \
# '+ C(didr2, Sum)'
lm_idr1 = ols(eq('idr1'), data=data[(data.gene=='idr1') & (data.time==0)]).fit()
lm_idr2 = ols(eq('idr2'), data=data[(data.gene=='idr2') & (data.time==0)]).fit()
lm_sirr = ols(eq('sirr'), data=data[(data.gene=='sirr') & (data.time==0)]).fit()
lm_tror = ols(eq('tror'), data=data[(data.gene=='tror') & (data.time==0)]).fit()
In [40]:
sm.stats.anova_lm(lm_idr1, typ=2)
Out[40]:
In [41]:
lm_idr1.summary()
Out[41]:
In [42]:
sm.stats.anova_lm(lm_idr2, typ=2)
Out[42]:
In [43]:
lm_idr2.summary()
Out[43]:
In [44]:
sm.stats.anova_lm(lm_sirr, typ=2)
Out[44]:
In [45]:
sm.stats.anova_lm(lm_tror, typ=2)
Out[45]:
In [46]:
lm_idr1.params
Out[46]:
In [47]:
lm_idr2.params
Out[47]:
In [48]:
lm_sirr.params
Out[48]:
In [49]:
lm_sirr.conf_int().loc["C(%s, Sum)[S.False]"%d,:]
In [50]:
lm_tror.params
Out[50]:
In [51]:
lm_tror.pvalues
Out[51]:
In [52]:
coeff = []
sig = []
conf = []
for t in [0, 5, 20, 60]:
lm_idr1 = ols(eq('idr1'), data=data[(data.gene=='idr1') & (data.time==t)]).fit()
lm_idr2 = ols(eq('idr2'), data=data[(data.gene=='idr2') & (data.time==t)]).fit()
lm_sirr = ols(eq('sirr'), data=data[(data.gene=='sirr') & (data.time==t)]).fit()
lm_tror = ols(eq('tror'), data=data[(data.gene=='tror') & (data.time==t)]).fit()
coeff.append([])
sig.append([])
conf.append([])
for lm in [lm_idr1, lm_idr2, lm_sirr, lm_tror]:
coeff[-1].append([])
sig[-1].append([])
conf[-1].append([])
for d in ['didr1', 'didr2', 'dsirr', 'dtror']:
try:
coeff[-1][-1].append(lm.params["C(%s, Sum)[S.False]"%d])
sig[-1][-1].append(lm.pvalues["C(%s, Sum)[S.False]"%d])
conf[-1][-1].append(lm.conf_int().loc["C(%s, Sum)[S.False]"%d,:])
except:
coeff[-1][-1].append(np.nan)
sig[-1][-1].append(np.nan)
conf[-1][-1].append([np.nan]*2)
coeff = np.array(coeff)
sig = np.array(sig)
conf = np.array(conf)
In [53]:
conf[0,0,1:,0]
Out[53]:
In [54]:
conf[0,0,1:,0]
Out[54]:
In [65]:
i = 0
j = 0
ind = range(4)
ind.remove(i)
plt.errorbar(range(3), coeff[0,0,ind], yerr=coeff[0,0,ind]-conf[0,0,ind,0])
plt.plot([0,2], [0,0], lw=2, c='k')
plt.xticks(range(3), [order[z] for z in ind])
plt.title('%s, %s min' % (order[i], [0,5,20,60][j]))
Out[65]:
In [66]:
order = ['idr1', 'idr2', 'sirr', 'tror']
In [67]:
##Preliminary visualization of node activities.
for i in range(4):
plt.figure()
for j in range(4):
ind = range(4)
ind.remove(i)
plt.subplot(2,2,j+1)
plt.errorbar(range(3), coeff[j,i,ind], yerr=coeff[j,i,ind]-conf[j,i,ind,0])
plt.plot([0,2], [0,0], lw=2, c='k')
plt.xticks(range(3), [order[z] for z in ind])
plt.title('%s, %s min' % (order[i], [0,5,20,60][j]))
plt.tight_layout()
plt.savefig('%s.pdf'% (order[i]),bbox_inches='tight')
In [68]:
#final networks used to generate topologies for paper Figure 4.
net0 = pd.DataFrame(coeff[0,:,:], columns=order, index=order)
net0.values[sig[0,:,:] > 5e-2] = 0
net0.to_csv("net0.csv")
net0
Out[68]:
In [69]:
net5 = pd.DataFrame(coeff[1,:,:], columns=order, index=order)
net5.values[sig[1,:,:] > 5e-2] = 0
net5.to_csv("net5.csv")
net5
Out[69]:
In [60]:
net20 = pd.DataFrame(coeff[2,:,:], columns=order, index=order)
net20.values[sig[2,:,:] > 5e-2] = 0
net20.to_csv("net20.csv")
net20
Out[60]:
In [61]:
net60 = pd.DataFrame(coeff[3,:,:], columns=order, index=order)
net60.values[sig[3,:,:] > 5e-2] = 0
net60.to_csv("net60.csv")
net60
Out[61]:
In [62]:
pd.DataFrame(sig[0,:,:], columns=order, index=order).to_csv("sig0.csv")
pd.DataFrame(sig[1,:,:], columns=order, index=order).to_csv("sig5.csv")
pd.DataFrame(sig[2,:,:], columns=order, index=order).to_csv("sig20.csv")
pd.DataFrame(sig[3,:,:], columns=order, index=order).to_csv("sig60.csv")
In [63]:
import networkx as nx
In [64]:
##Preliminary visulization of resultant network graphs at each time point.
##Cytoscape used in final paper Figure 5.
for t in range(4):
G=nx.DiGraph()
colors = []
for i, g in enumerate(['idr1', 'idr2', 'sirr', 'tror']):
for j, d in enumerate(['didr1', 'didr2', 'dsirr', 'dtror']):
if i==j:
continue
if sig[t,i,j] < 5e-2:
G.add_edge(d[1:],g,{"value":coeff[t,i,j]})
colors.append(coeff[t,i,j])
pos=nx.spring_layout(G)
plt.figure()
nx.draw(G,pos,node_color='#A0CBE2',edge_color=colors,width=5,
edge_cmap=plt.cm.RdBu,with_labels=True,arrows=True,
edge_vmin=-1,edge_vmax=1)
plt.title('time=%d'%[0,5,20,60][t])
In [ ]:
In [ ]: