Import everything from the imports notebook. This reads in all of the expression data as well as the functions needed to analyse differential expression data.
In [1]:
import NotebookImport
from Imports import *
In [2]:
import seaborn as sns
sns.set_context('paper',font_scale=1.5)
sns.set_style('white')
Read in matched Gene expression data.
In [3]:
matched_rna = pd.read_hdf('/data_ssd/RNASeq_2014_07_15.h5', 'matched_tn')
rna_microarray = pd.read_hdf('/data_ssd/GEO_microarray_dx.h5', 'data')
matched_rna = rna_microarray.join(matched_rna)
In [8]:
dx_rna = binomial_test_screen(matched_rna, fc=1.)
dx_rna = dx_rna[dx_rna.num_dx > 300]
In [21]:
dx_rna.frac.hist(bins=30)
Out[21]:
In [9]:
dx_rna.ix[['ADH1A','ADH1B','ADH1C']]
Out[9]:
In [10]:
dx_rna.shape
Out[10]:
In [11]:
dx_rna.p.rank().ix[['ADH1A','ADH1B','ADH1C']]
Out[11]:
In [12]:
dx_rna.sort('p').head(10)
Out[12]:
In [13]:
paired_bp_tn_split(matched_rna.ix['ADH1B'], codes, data_type='mRNA')
In [14]:
gs2 = gene_sets.ix[dx_rna.index].fillna(0)
In [15]:
rr = screen_feature(dx_rna.frac, rev_kruskal, gs2.T,
align=False)
fp = (1.*gene_sets.T * dx_rna.frac).T.dropna().replace(0, np.nan).mean().order()
fp.name = 'mean frac'
Overexpressed pathways
In [16]:
rr.ix[ti(fp > .5)].join(fp).sort('p').head()
Out[16]:
Underexpressed pathways
In [17]:
rr.ix[ti(fp < .5)].join(fp).sort('p').head()
Out[17]:
I am folling up on Fatty Acid Metabolism as opposed to biological oxidations, because it has a larger effect size, although the smaller gene-set size gives it a less extreme p-value.
In [18]:
def fig_1f(ax):
v = pd.concat([dx_rna.frac,
dx_rna.frac.ix[ti(gs2['REACTOME_CELL_CYCLE_MITOTIC']>0)],
dx_rna.frac.ix[ti(gs2['KEGG_FATTY_ACID_METABOLISM']>0)]])
v1 = pd.concat([pd.Series('All Genes', dx_rna.frac.index),
pd.Series('Cell Cycle\nMitotic',
ti(gs2['REACTOME_CELL_CYCLE_MITOTIC']>0)),
pd.Series('Fatty Acid\nMetabolism',
ti(gs2['KEGG_FATTY_ACID_METABOLISM']>0))])
v1.name = ''
v.name = 'Fraction Overexpressed'
violin_plot_pandas(v1, v, ann=None, ax=ax)
prettify_ax(ax)
return ax
In [19]:
#Do not import
fig, ax = subplots(1,1, figsize=(5,3))
fig_1f(ax);