In [1]:
    
import NotebookImport
from metaPCNA import *
    
    
    
    
GABRD is the top hit for tumor-associated, proliferation independent genes.
In [2]:
    
f_win.order().tail()
    
    Out[2]:
In [3]:
    
switch_plot('GABRD')
    
    
GABRD is one of many GABA receptor subunits. Many of which are down-regulated.
In [4]:
    
gabr = [g for g in rna_df.index if g.startswith('GABR')]
f = dx_rna.ix[gabr].dropna()
f.join(f_win).sort(f_win.name)
    
    Out[4]:
In [5]:
    
paired_boxplot_tumor_normal(matched_rna.ix[gabr].clip(-9,10).T, 
                            order=list(f.frac.order().index))
prettify_ax(plt.gca())
    
    
In [6]:
    
paired_boxplot_tumor_normal(matched_rna.ix[gabr, ti(codes.str.startswith('KIRC'))].clip(-9,10).T, 
                            order=list(f.frac.order().index), sig=False)
prettify_ax(plt.gca())
    
    
In [7]:
    
paired_boxplot_tumor_normal(matched_rna.ix[gabr, ti(codes.str.startswith('BRCA'))].clip(-9,10).T, 
                            order=list(f.frac.order().index), sig=False)
prettify_ax(plt.gca())
    
    
GABRA2 and GABRA4 seem to be downregulated most of the time.
In [8]:
    
switch_plot('GABRA2')
    
    
In [9]:
    
switch_plot('GABRA4')
    
    
Looking for gene sets concordinant with GABRD differential expression.
In [10]:
    
rr = df_s.corrwith(df_s.ix['GABRD'], 1).dropna()
enr = screen_feature(rr.order(), rev_kruskal, gene_sets.T, align=False)
enr.head(4)
    
    Out[10]:
In [11]:
    
violin_plot_pandas(gene_sets['NABA_CORE_MATRISOME'], rr.replace(1, np.nan))
    
    
I'm storing intermediate results here because they can take a bit to compute. If anything gets changed, you are going to want to restore the results by setting the RERUN flag.
In [12]:
    
RERUN = False
try:
    assert RERUN == False
    corr_profile_t = pd.read_hdf(RNA_SUBREAD_STORE, 'dfs_correlations')
    corr_profile_m = pd.read_hdf(RNA_SUBREAD_STORE, 'dx_correlations')
    corr_change = corr_profile_m - corr_profile_t
except:
    corr_profile_t = df_s.T.corr()
    corr_profile_t.to_hdf(RNA_SUBREAD_STORE, 'dfs_correlations')
    corr_profile_m = dx.T.corr()
    corr_profile_m.to_hdf(RNA_SUBREAD_STORE, 'dx_correlations')
    
In [13]:
    
rr = screen_feature(corr_change.ix['GABRD'], rev_kruskal, gene_sets.T, align=False)
rr.head()
    
    Out[13]:
In [14]:
    
%%prun
tstats = {}
for gs in rr[rr.q < 10e-10].index:
    mm = corr_change.groupby(gene_sets[gs]).mean()
    vv = corr_change.groupby(gene_sets[gs]).var()
    tt = ((mm.ix[1] - mm.ix[0]) / 
          np.sqrt(vv.div(gene_sets[gs].value_counts(), axis=0)).sum())
    tt = tt.dropna().order()
    tstats[gs] = tt
tstats = pd.DataFrame(tstats)
    
    
In [15]:
    
tstats.rank(ascending=False).ix['GABRD'].order().head(10)
    
    Out[15]:
In [37]:
    
tstats['BIOCARTA_NKT_PATHWAY'].order().tail()
    
    Out[37]:
In [40]:
    
corr_change.ix['GABRD'].ix[ti(gene_sets['BIOCARTA_NKT_PATHWAY'] > 0)].order()
    
    Out[40]:
In [273]:
    
pd.crosstab(gene_sets['BIOCARTA_NKT_PATHWAY'], 
            gene_sets['KEGG_HEMATOPOIETIC_CELL_LINEAGE'])
    
    Out[273]:
In [271]:
    
violin_plot_pandas(gene_sets['KEGG_HEMATOPOIETIC_CELL_LINEAGE'], 
                   corr_change.ix['GABRD'].dropna())
    
    
In [272]:
    
violin_plot_pandas(gene_sets['BIOCARTA_NKT_PATHWAY'], corr_change.ix['GABRD'].dropna())
    
    
In [275]:
    
gs = 'BIOCARTA_NKT_PATHWAY'
fig, ax = subplots()
series_scatter(corr_profile_m['GABRD'], corr_profile_t['GABRD'], s=10, ax=ax, ann=None,
               alpha=.1)
series_scatter(corr_profile_m['GABRD'], corr_profile_t['GABRD'].ix[ti(gene_sets[gs]>0)], 
               s=10, ax=ax, color='red', alpha=1, ann=None)
    
    
In [267]:
    
cc = combine(gene_sets['KEGG_HEMATOPOIETIC_CELL_LINEAGE'],
            gene_sets['BIOCARTA_NKT_PATHWAY'])
violin_plot_pandas(cc, corr_change.ix['GABRD'].dropna())