In [1]:
cd ..
In [2]:
import NotebookImport
from DX_screen import *
In [3]:
from metaPCNA import *
In [4]:
gs2 = gene_sets.ix[dx_rna.index].fillna(0)
rr = screen_feature(dx_rna.frac, rev_kruskal, gs2.T,
align=False)
fp = (1.*gene_sets.T * dx_rna.frac).T.dropna().replace(0, np.nan).mean().order()
fp.name = 'mean frac'
In [6]:
rr.join(fp).to_csv(FIGDIR + 'f_up_gene_sets.csv')
In [7]:
ff_u = filter_pathway_hits(rr.ix[ti(fp>.5)].p.order(), gs2)
ff_p = filter_pathway_hits(rr.ix[ti(fp<.5)].p.order(), gs2)
ff = ff_u.append(ff_p)
selected = rr.ix[ff[ff < .00001].index].join(fp)
selected.sort('p')
Out[7]:
In [8]:
selected.to_csv(FIGDIR + 'f_up_gene_sets_selected.csv')
Looking for subsets of the cell-cycle pathway
In [9]:
d = pd.DataFrame({g: gs2['REACTOME_CELL_CYCLE'] for g in gs2.columns})
a,b = odds_ratio_df(d.T>0, gs2.T>0)
In [10]:
dd = rr.ix[ti((a > 100) & (rr.q < 10e-15))].join(fp).sort(fp.name, ascending=False)
filter_pathway_hits(dd, gs2)
Out[10]:
In [11]:
(combine(gs2['REACTOME_M_G1_TRANSITION']>0,
gs2['REACTOME_DEPOSITION_OF_NEW_CENPA_CONTAINING_NUCLEOSOMES_AT_THE_CENTROMERE']>0)
).value_counts()
Out[11]:
In [12]:
f2 = fp.ix[ti(rr.q < .00001)]
ff_u = filter_pathway_hits(fp.ix[ti(f2>.5)].order()[::-1], gs2)
ff_p = filter_pathway_hits(fp.ix[ti(f2<.5)].order(), gs2)
ff = ff_u.append(ff_p)
selected = rr.ix[ff.index].join(f2)
selected.ix[(f2 - .5).abs().order().index[::-1]].dropna()
Out[12]:
In [13]:
selected.to_csv(FIGDIR + 'f_up_gene_sets_selected_fc.csv')
In [14]:
gs2 = gene_sets.ix[pcna_corr.dropna().index].fillna(0)
rr = screen_feature(pcna_corr, rev_kruskal, gs2.T,
align=False)
fp = (1.*gene_sets.T * pcna_corr).T.dropna().replace(0, np.nan).mean().order()
fp.name = 'mean score'
In [15]:
rr.join(fp).to_csv(FIGDIR + 'pcna_gene_sets.csv')
In [16]:
ff_u = filter_pathway_hits(rr.ix[ti(fp>0)].p.order(), gs2)
ff_p = filter_pathway_hits(rr.ix[ti(fp<0)].p.order(), gs2)
ff = ff_u.append(ff_p)
selected = rr.ix[ff[ff < .00001].index].join(fp)
selected.sort('p')
Out[16]:
In [17]:
selected.to_csv(FIGDIR + 'pcna_gene_sets_selected.csv')
In [18]:
f2 = fp.ix[ti(rr.q < .0001)]
ff_u = filter_pathway_hits(fp.ix[ti(f2>0)].order()[::-1], gs2)
ff_p = filter_pathway_hits(fp.ix[ti(f2<0)].order(), gs2)
ff = ff_u.append(ff_p)
selected = rr.ix[ff.index].join(f2)
selected.sort('p')
Out[18]:
In [19]:
selected.to_csv(FIGDIR + 'pcna_gene_sets_selected_fc.csv')
In [31]:
gs2 = gene_sets.ix[f_win.dropna().index].fillna(0)
rr = screen_feature(f_win, rev_kruskal, gs2.T,
align=False)
fp = (1.*gene_sets.T * f_win).T.dropna().replace(0, np.nan).mean().order()
fp.name = 'mean score'
In [35]:
(rr.q < .00001).value_counts()
Out[35]:
In [22]:
rr.join(fp).to_csv(FIGDIR + 'detrended_fup_sets.csv')
In [23]:
f2 = fp.ix[ti(rr.q < .0001)]
ff_u = filter_pathway_hits(rr.ix[ti(f2>0)].p.order(), gs2)
ff_p = filter_pathway_hits(rr.ix[ti(f2<0)].p.order(), gs2)
ff = ff_u.append(ff_p)
selected = rr.ix[ff[ff < .00001].index].join(fp)
selected.sort('p')
Out[23]:
In [24]:
selected.to_csv(FIGDIR + 'detrended_fup_sets_selected.csv')
In [25]:
f2 = fp.ix[ti(rr.q < .0001)]
ff_u = filter_pathway_hits(fp.ix[ti(f2>0)].order()[::-1], gs2)
ff_p = filter_pathway_hits(fp.ix[ti(f2<0)].order(), gs2)
ff = ff_u.append(ff_p)
selected = rr.ix[ff.index].join(f2)
selected.sort('p')
Out[25]:
In [26]:
selected.to_csv(FIGDIR + 'detrended_fup_sets_selected_fc.csv')
In [27]:
[g for g in gs2 if 'TELO' in g]
Out[27]:
In [28]:
a = gs2['REACTOME_PACKAGING_OF_TELOMERE_ENDS']>0
b = gs2['REACTOME_EXTENSION_OF_TELOMERES']>0
a.name = 'end packaging'
b.name = 'extension'
cc = combine(a,b).replace('neither', np.nan).dropna()
In [29]:
fig, ax = subplots(figsize=(4,4))
series_scatter(dx_rna.frac, pcna_corr.ix[ti(cc == 'end packaging')],
ax=ax, color=colors[0], ann=None, alpha=1, s=30)
series_scatter(dx_rna.frac, pcna_corr.ix[ti(cc == 'extension')],
ax=ax, color=colors[1], ann=None, alpha=1, s=30)
ax.set_xlabel('Fraction Upregulated')
ax.set_ylabel('Proliferation Score')
ax.legend(['end packaging','extension'], loc='upper left', frameon=True)
prettify_ax(ax)
fig.tight_layout()
fig.savefig(FIGDIR + 'S3_Fig.pdf')
In [30]:
violin_plot_pandas(cc, f_win)