In [1]:
import NotebookImport
from metaPCNA import *
GABRD is the top hit for tumor-associated, proliferation independent genes.
In [2]:
f_win.order().tail()
Out[2]:
In [3]:
switch_plot('GABRD')
GABRD is one of many GABA receptor subunits. Many of which are down-regulated.
In [4]:
gabr = [g for g in rna_df.index if g.startswith('GABR')]
f = dx_rna.ix[gabr].dropna()
f.join(f_win).sort(f_win.name)
Out[4]:
In [5]:
paired_boxplot_tumor_normal(matched_rna.ix[gabr].clip(-9,10).T,
order=list(f.frac.order().index))
prettify_ax(plt.gca())
In [6]:
paired_boxplot_tumor_normal(matched_rna.ix[gabr, ti(codes.str.startswith('KIRC'))].clip(-9,10).T,
order=list(f.frac.order().index), sig=False)
prettify_ax(plt.gca())
In [7]:
paired_boxplot_tumor_normal(matched_rna.ix[gabr, ti(codes.str.startswith('BRCA'))].clip(-9,10).T,
order=list(f.frac.order().index), sig=False)
prettify_ax(plt.gca())
GABRA2 and GABRA4 seem to be downregulated most of the time.
In [8]:
switch_plot('GABRA2')
In [9]:
switch_plot('GABRA4')
Looking for gene sets concordinant with GABRD differential expression.
In [10]:
rr = df_s.corrwith(df_s.ix['GABRD'], 1).dropna()
enr = screen_feature(rr.order(), rev_kruskal, gene_sets.T, align=False)
enr.head(4)
Out[10]:
In [11]:
violin_plot_pandas(gene_sets['NABA_CORE_MATRISOME'], rr.replace(1, np.nan))
I'm storing intermediate results here because they can take a bit to compute. If anything gets changed, you are going to want to restore the results by setting the RERUN flag.
In [12]:
RERUN = False
try:
assert RERUN == False
corr_profile_t = pd.read_hdf(RNA_SUBREAD_STORE, 'dfs_correlations')
corr_profile_m = pd.read_hdf(RNA_SUBREAD_STORE, 'dx_correlations')
corr_change = corr_profile_m - corr_profile_t
except:
corr_profile_t = df_s.T.corr()
corr_profile_t.to_hdf(RNA_SUBREAD_STORE, 'dfs_correlations')
corr_profile_m = dx.T.corr()
corr_profile_m.to_hdf(RNA_SUBREAD_STORE, 'dx_correlations')
In [13]:
rr = screen_feature(corr_change.ix['GABRD'], rev_kruskal, gene_sets.T, align=False)
rr.head()
Out[13]:
In [14]:
%%prun
tstats = {}
for gs in rr[rr.q < 10e-10].index:
mm = corr_change.groupby(gene_sets[gs]).mean()
vv = corr_change.groupby(gene_sets[gs]).var()
tt = ((mm.ix[1] - mm.ix[0]) /
np.sqrt(vv.div(gene_sets[gs].value_counts(), axis=0)).sum())
tt = tt.dropna().order()
tstats[gs] = tt
tstats = pd.DataFrame(tstats)
In [15]:
tstats.rank(ascending=False).ix['GABRD'].order().head(10)
Out[15]:
In [37]:
tstats['BIOCARTA_NKT_PATHWAY'].order().tail()
Out[37]:
In [40]:
corr_change.ix['GABRD'].ix[ti(gene_sets['BIOCARTA_NKT_PATHWAY'] > 0)].order()
Out[40]:
In [273]:
pd.crosstab(gene_sets['BIOCARTA_NKT_PATHWAY'],
gene_sets['KEGG_HEMATOPOIETIC_CELL_LINEAGE'])
Out[273]:
In [271]:
violin_plot_pandas(gene_sets['KEGG_HEMATOPOIETIC_CELL_LINEAGE'],
corr_change.ix['GABRD'].dropna())
In [272]:
violin_plot_pandas(gene_sets['BIOCARTA_NKT_PATHWAY'], corr_change.ix['GABRD'].dropna())
In [275]:
gs = 'BIOCARTA_NKT_PATHWAY'
fig, ax = subplots()
series_scatter(corr_profile_m['GABRD'], corr_profile_t['GABRD'], s=10, ax=ax, ann=None,
alpha=.1)
series_scatter(corr_profile_m['GABRD'], corr_profile_t['GABRD'].ix[ti(gene_sets[gs]>0)],
s=10, ax=ax, color='red', alpha=1, ann=None)
In [267]:
cc = combine(gene_sets['KEGG_HEMATOPOIETIC_CELL_LINEAGE'],
gene_sets['BIOCARTA_NKT_PATHWAY'])
violin_plot_pandas(cc, corr_change.ix['GABRD'].dropna())