In [1]:
import NotebookImport
from Imports import *
In [2]:
import seaborn as sns
sns.set_context('paper',font_scale=1.5)
sns.set_style('white')
In [3]:
matched_rna = pd.read_hdf('/data_ssd/RNASeq_2015_04_02.h5', 'matched_tn')
In [4]:
dx_rna = binomial_test_screen(matched_rna, fc=1.)
dx_rna = dx_rna[dx_rna.num_dx > 300]
In [261]:
standardize = lambda s: s.sub(s.mean(1),0).div(s.std(1), 0)
In [134]:
df = matched_rna.sub(matched_rna.mean(1),0).div(matched_rna.std(1),0)
dx = df.xs('01',1,1) - df.xs('11',1,1)
In [275]:
df_s = pd.concat([standardize(matched_rna.ix[:, ti(codes==c)])
for c in codes.unique()], axis=1)
dx = df_s.xs('01',1,1) - df_s.xs('11',1,1)
In [276]:
dx.stack().clip(-10,10).hist(bins=100)
Out[276]:
In [135]:
mr = dx.rank(pct=True).mean(1)
In [136]:
from scipy.special import expit
In [297]:
fig, ax = subplots()
for i in range(1,10):
pd.Series(expit(np.arange(-5,5,.1)*i), np.arange(-5,5,.1)).plot(ax=ax)
ax.set_ylabel('Patient Level Score')
ax.set_xlabel('Differential Expression Level')
Out[297]:
In [279]:
sig = (dx*2).apply(expit)
sig_weight = sig.mean(1).order().dropna()
In [280]:
pearson_pandas(sig_weight, dx_rna.frac)['rho']
Out[280]:
In [281]:
plot_regression(sig_weight, dx_rna.frac, density=True, rad=.1)
In [282]:
draw_dist(sig_weight)
draw_dist(dx_rna.frac)
In [283]:
gs2 = gene_sets.ix[dx_rna.index].fillna(0)
In [284]:
rr = screen_feature(sig_weight, rev_kruskal, gs2.T,
align=False)
fp = (1.*gene_sets.T * sig_weight).T.dropna().replace(0, np.nan).mean().order()
fp.name = 'mean frac'
In [285]:
rr.ix[ti(fp > .5)].join(fp).sort('p').head()
Out[285]:
In [286]:
rr.ix[ti(fp < .5)].join(fp).sort('p').head(10)
Out[286]:
In [287]:
def fig_1f(vec, ax):
v = pd.concat([vec,
vec.ix[ti(gs2['REACTOME_CELL_CYCLE']>0)],
vec.ix[ti(gs2['KEGG_FATTY_ACID_METABOLISM']>0)]])
v1 = pd.concat([pd.Series('All Genes', vec.index),
pd.Series('Cell Cycle',
ti(gs2['REACTOME_CELL_CYCLE']>0)),
pd.Series('Fatty Acid\nMetabolism',
ti(gs2['KEGG_FATTY_ACID_METABOLISM']>0))])
v1.name = ''
v.name = 'Fraction Overexpressed'
violin_plot_pandas(v1, v, ann=None, ax=ax)
prettify_ax(ax)
ax.spines['bottom'].set_visible(False)
ax.axhline(.5, color='grey', lw=2, ls='--')
return ax
In [288]:
fig, ax = subplots(1,1, figsize=(5,3))
fig_1f(sig_weight, ax)
Out[288]:
In [291]:
d2 = matched_rna.ix[:, ti(codes=='PRAD')].dropna()
df = d2.sub(d2.mean(1),0).div(d2.std(1),0)
dx = df.xs('01',1,1) - df.xs('11',1,1)
sig = (dx*2).apply(expit)
sig_weight = sig.mean(1).order().dropna()
dx_brca = binomial_test_screen(d2, fc=1.)
In [289]:
d2 = df_s.ix[:, ti(codes!='PRAD')].dropna()
df = d2.sub(d2.mean(1),0).div(d2.std(1),0)
dx = df.xs('01',1,1) - df.xs('11',1,1)
sig = (dx*2).apply(expit)
sig_rest = sig.mean(1).order().dropna()
dx_rest = binomial_test_screen(d2, fc=1.)
In [248]:
gg = ti(dx_brca.num_dx > 30)
In [249]:
plot_regression(dx_brca.frac.ix[gg], dx_rest.frac, density=True, rad=.1)
In [292]:
plot_regression(sig_weight.ix[gg], sig_rest, density=True, rad=.1)
In [259]:
(sig_weight.ix[gg] - sig_rest).dropna().order().tail()
Out[259]:
In [257]:
paired_bp_tn_split(matched_rna.ix['WASF3'], codes)
In [255]:
paired_bp_tn_split(matched_rna.ix['SGEF'], codes)