Import everything from the imports notebook. This reads in all of the expression data as well as the functions needed to analyse differential expression data.
In [1]:
import NotebookImport
from Imports import *
In [2]:
import seaborn as sns
sns.set_context('paper',font_scale=1.5)
sns.set_style('white')
matched_meth is our matched methylation data.
In [3]:
store = '/data_ssd/TCGA_methylation_2014_04_16.h5'
matched_meth = pd.read_hdf(store, 'matched_tn')
matched_meth = matched_meth.groupby(axis=1, level=[0,1]).first()
In [4]:
matched_meth.T.head(10).T.head()
Out[4]:
Read in matched Gene and miRNA expression data.
In [5]:
matched_rna = pd.read_hdf('/data_ssd/RNASeq_2014_07_15.h5', 'matched_tn')
matched_mir = pd.read_hdf('/data_ssd/miRNASeq_2014_07_15.h5', 'matched_tn')
In [6]:
dx_rna = binomial_test_screen(matched_rna, fc=1.)
dx_rna = dx_rna[dx_rna.num_dx > 300]
In [7]:
dx_rna.ix[['ADH1A','ADH1B','ADH1C']]
Out[7]:
In [8]:
dx_rna.shape
Out[8]:
In [9]:
dx_rna.p.rank().ix[['ADH1A','ADH1B','ADH1C']]
Out[9]:
In [10]:
dx_rna.sort('p').head(10)
Out[10]:
In [11]:
plt.rcParams['savefig.dpi'] = 150
In [12]:
fig, ax = subplots(figsize=(7.75,2.))
paired_bp_tn_split(matched_rna.ix['ADH1B'], codes, data_type='mRNA',
ax=ax)
fig.tight_layout()
fig.savefig('/cellar/users/agross/figures/ADH1B.pdf')
In [55]:
dx_mir = binomial_test_screen(matched_mir, fc=1.)
dx_mir = dx_mir[dx_mir.num_dx > 300]
In [56]:
dx_mir.sort('p').head()
Out[56]:
In [57]:
paired_bp_tn_split(matched_mir.ix['hsa-mir-139'], codes, data_type='')
In [64]:
fig, ax = subplots(figsize=(6.5,2.))
paired_bp_tn_split(matched_mir.ix['hsa-mir-21'], codes, data_type='',
ax=ax)
fig.tight_layout()
fig.savefig('/cellar/users/agross/figures/mir21.pdf')
In [59]:
dx_meth = binomial_test_screen(matched_meth, fc=1.)
dx_meth = dx_meth[dx_meth.num_dx > 300]
In [60]:
dx_meth.sort('p').head()
Out[60]:
In [61]:
paired_bp_tn_split(matched_meth.ix['cg10216717'], codes, data_type='Beta')
We are going to want to reuse this plot so here I'm wrapping it in a function.
In [62]:
def fig_1e(ax):
draw_dist(dx_meth.frac, ax=ax, lw=2.5)
draw_dist(dx_rna.frac, ax=ax, lw=2.5, bins=200)
draw_dist(dx_mir.frac, ax=ax, lw=2.5, bins=100)
ax.set_yticks([])
ax.set_xticks([0,.5,1])
ax.set_ylabel('Density')
ax.set_xlabel('Fraction')
ax.legend(('Methylation','mRNA','miRNA'), frameon=False)
prettify_ax(ax)
return ax
In [63]:
#Do not import
fig, ax = subplots(1,1, figsize=(5,3))
fig_1e(ax);
In [67]:
gs2 = gene_sets.ix[dx_rna.index].fillna(0)
In [68]:
rr = screen_feature(dx_rna.frac, rev_kruskal, gs2.T,
align=False)
fp = (1.*gene_sets.T * dx_rna.frac).T.dropna().replace(0, np.nan).mean().order()
fp.name = 'mean frac'
Overexpressed pathways
In [69]:
rr.ix[ti(fp > .5)].join(fp).sort('p').head()
Out[69]:
Underexpressed pathways
In [70]:
rr.ix[ti(fp < .5)].join(fp).sort('p').head()
Out[70]:
I am folling up on Fatty Acid Metabolism as opposed to biological oxidations, because it has a larger effect size, although the smaller gene-set size gives it a less extreme p-value.
In [71]:
def fig_1f(ax):
v = pd.concat([dx_rna.frac,
dx_rna.frac.ix[ti(gs2['REACTOME_CELL_CYCLE_MITOTIC']>0)],
dx_rna.frac.ix[ti(gs2['KEGG_FATTY_ACID_METABOLISM']>0)]])
v1 = pd.concat([pd.Series('All Genes', dx_rna.frac.index),
pd.Series('Cell Cycle\nMitotic',
ti(gs2['REACTOME_CELL_CYCLE_MITOTIC']>0)),
pd.Series('Fatty Acid\nMetabolism',
ti(gs2['KEGG_FATTY_ACID_METABOLISM']>0))])
v1.name = ''
v.name = 'Fraction Overexpressed'
violin_plot_pandas(v1, v, ann=None, ax=ax)
prettify_ax(ax)
return ax
In [72]:
#Do not import
fig, ax = subplots(1,1, figsize=(4,3))
fig_1f(ax)
fig.tight_layout()
fig.savefig('/cellar/users/agross/figures/gsea.pdf')
In [73]:
os.chdir('../Methlation/')
In [74]:
import Setup.DX_Imports as DX
In [75]:
#Do not import
fig, axs = subplots(2,4, figsize=(12,5))
axs = axs.flatten()
for i,p in enumerate(DX.probe_sets.keys()):
draw_dist(dx_meth.frac, DX.probe_sets[p], ax=axs[i])
axs[i].legend().set_visible(False)
axs[i].set_yticks([])
axs[i].set_title(p)
prettify_ax(axs[i])
f_odds = pd.DataFrame({f: fisher_exact_test((dx_meth.frac - .5).abs() > .25, v)
for f,v in DX.probe_sets.iteritems()}).T
np.log2(f_odds.odds_ratio).plot(kind='bar', ax=axs[-1])
prettify_ax(axs[-1])
fig.tight_layout()
In [76]:
def fig_1g(ax):
lw = 2.5
draw_dist(dx_meth.frac.ix[ti(DX.probe_sets['Promoter'])], ax=ax, lw=lw)
draw_dist(dx_meth.frac.ix[ti(DX.probe_sets['CpG Island'])], ax=ax, lw=lw)
draw_dist(dx_meth.frac.ix[ti(DX.probe_sets['PRC2'])], ax=ax, lw=lw)
draw_dist(dx_meth.frac, ax=ax, colors='grey', lw=lw)
ax.set_yticks([])
ax.set_xticks([0,.5,1])
ax.set_ylabel('Density')
ax.set_xlabel('Fraction with Increased Methylation')
ax.legend(('Promoter','CpG Island','PRC2','All Probes'))
prettify_ax(ax)
return ax
In [79]:
plt.rcParams['savefig.dpi'] = 150
In [81]:
#Do not import
fig, ax = subplots(1,1, figsize=(5,3))
fig_1g(ax);
fig.tight_layout()
fig.savefig('/cellar/users/agross/figures/fx_enrichment.pdf')
In [32]:
#Do not import
fig, axs = subplots(1,3, figsize=(15,3.5))
fig_1e(axs[0])
fig_1f(axs[1])
fig_1g(axs[2])
fig.tight_layout()
fig.savefig('/cellar/users/agross/figures/fig1_bottom.png', dpi=300)