In [1]:
    
import NotebookImport
from Imports import *
    
    
In [2]:
    
tab = binomial_test_screen(matched_tn, fc=1.)
    
In [3]:
    
fig, ax = subplots(figsize=(6,4))
tab.frac.hist(ax=ax)
ax.set_xlabel('Fraction of Patients Induced')
ax.set_ylabel('# of Genes')
prettify_ax(ax)
    
    
In [4]:
    
tab.sort('p').head(10)
    
    Out[4]:
In [4]:
    
cancers = codes.ix[matched_tn.columns.get_level_values(0)].unique()
pts = matched_tn.columns.get_level_values(0)
cancers_to_use = ti(codes.ix[pts].value_counts() > 15)
    
In [5]:
    
df_inf = {c: infer_normal_knn(rna_df.ix[:, ti(codes == c)], r_curtoff=.3, k=5)
          for c in cancers_to_use}
tn_inf_c = pd.concat(df_inf.values(), axis=1)
    
In [6]:
    
tab_inf = binomial_test_screen(tn_inf_c, fc=1.5)
    
In [7]:
    
tab_inf[tab_inf.num_dx > 1000].sort('frac').tail(10)
    
    Out[7]:
In [5]:
    
vec = tab_inf[tab_inf.num_dx > 1000].frac.ix[gene_sets.index] > .85
print vec.value_counts()
mm = run_mgsa(vec)
mm.sort('estimate', ascending=False).head(10)
    
    
In [28]:
    
vec = tab[tab.num_dx > 100].frac.ix[gene_sets.index] > .85
print vec.value_counts()
mm2 = run_mgsa(vec)
    
    
In [9]:
    
vec = tab_inf[tab_inf.num_dx > 1000].frac.ix[gene_sets.index] < .15
print vec.value_counts()
mm = run_mgsa(vec)
mm.sort('estimate', ascending=False).head(10)
    
    
    Out[9]:
In [85]:
    
tab_lusc = binomial_test_screen(matched_tn.ix[:, ti(codes=='BRCA')], fc=1.5)
    
In [86]:
    
tab_lusc.sort('p').head()
    
    Out[86]:
In [87]:
    
vec = tab_lusc.frac.ix[gene_sets.index] > .85
print vec.value_counts()
mm_lusc = run_mgsa(vec)
mm_lusc.sort('estimate', ascending=False).head(10)
    
    
    Out[87]:
In [77]:
    
tab_rest = binomial_test_screen(matched_tn.ix[:, ti(codes!='BRCA')], fc=1.5)
    
In [81]:
    
vec = tab_rest.frac.ix[gene_sets.index] > .85
print vec.value_counts()
mm_rest = run_mgsa(vec)
mm_rest.sort('estimate', ascending=False).head(10)
    
    
    Out[81]:
In [88]:
    
series_scatter(mm_rest.estimate, mm_lusc.estimate)
    
    
In [90]:
    
(mm_lusc.estimate - mm_rest.estimate).order()
    
    Out[90]: