In [1]:
import NotebookImport
from Imports import *
In [2]:
tab = binomial_test_screen(matched_tn, fc=1.)
In [3]:
fig, ax = subplots(figsize=(6,4))
tab.frac.hist(ax=ax)
ax.set_xlabel('Fraction of Patients Induced')
ax.set_ylabel('# of Genes')
prettify_ax(ax)
In [4]:
tab.sort('p').head(10)
Out[4]:
In [4]:
cancers = codes.ix[matched_tn.columns.get_level_values(0)].unique()
pts = matched_tn.columns.get_level_values(0)
cancers_to_use = ti(codes.ix[pts].value_counts() > 15)
In [5]:
df_inf = {c: infer_normal_knn(rna_df.ix[:, ti(codes == c)], r_curtoff=.3, k=5)
for c in cancers_to_use}
tn_inf_c = pd.concat(df_inf.values(), axis=1)
In [6]:
tab_inf = binomial_test_screen(tn_inf_c, fc=1.5)
In [7]:
tab_inf[tab_inf.num_dx > 1000].sort('frac').tail(10)
Out[7]:
In [5]:
vec = tab_inf[tab_inf.num_dx > 1000].frac.ix[gene_sets.index] > .85
print vec.value_counts()
mm = run_mgsa(vec)
mm.sort('estimate', ascending=False).head(10)
In [28]:
vec = tab[tab.num_dx > 100].frac.ix[gene_sets.index] > .85
print vec.value_counts()
mm2 = run_mgsa(vec)
In [9]:
vec = tab_inf[tab_inf.num_dx > 1000].frac.ix[gene_sets.index] < .15
print vec.value_counts()
mm = run_mgsa(vec)
mm.sort('estimate', ascending=False).head(10)
Out[9]:
In [85]:
tab_lusc = binomial_test_screen(matched_tn.ix[:, ti(codes=='BRCA')], fc=1.5)
In [86]:
tab_lusc.sort('p').head()
Out[86]:
In [87]:
vec = tab_lusc.frac.ix[gene_sets.index] > .85
print vec.value_counts()
mm_lusc = run_mgsa(vec)
mm_lusc.sort('estimate', ascending=False).head(10)
Out[87]:
In [77]:
tab_rest = binomial_test_screen(matched_tn.ix[:, ti(codes!='BRCA')], fc=1.5)
In [81]:
vec = tab_rest.frac.ix[gene_sets.index] > .85
print vec.value_counts()
mm_rest = run_mgsa(vec)
mm_rest.sort('estimate', ascending=False).head(10)
Out[81]:
In [88]:
series_scatter(mm_rest.estimate, mm_lusc.estimate)
In [90]:
(mm_lusc.estimate - mm_rest.estimate).order()
Out[90]: