In [11]:
from __future__ import division
import os
import math
%matplotlib inline
import numpy as np
import pylab
import matplotlib.pyplot as plt
from matplotlib import rc
import scipy.stats as stats
import pandas as pd
from clean_data import CANCER_TYPES
from IPython.html.widgets import interact
from IPython.html import widgets
from IPython.display import display
In [14]:
can_types = []
for c in CANCER_TYPES:
f1 = '../results/' + c + os.sep + 'NB_rnaseq_pathway_score.txt'
f2 = '../results/' + c + os.sep + 'PT_rnaseq_pathway_score.txt'
if os.path.exists(f1) and os.path.exists(f2):
can_types.append(c)
print "There are %d cancer types ready to be analysed" % len(can_types)
can_type_wid = widgets.DropdownWidget(description="Select Cancer Type", values=can_types)
display(can_type_wid)
In [19]:
# http://en.wikipedia.org/wiki/DNA_codon_table
amino_acids = 'arndcqeghi'
amino_acids += 'lkmfpstwyv'
amino_acids += 'bo'
codons = '4622222423'
codons += '6212464124'
codons += '13'
spr = 1; nspr = 1;
# spr = sum([int(x)-1 for x in codons])/(22*27)
# nspr = 1 - spr
can = can_type_wid.value
opt = 'NB'
input_fpath = '../results/' + can + os.sep + opt +'_rnaseq_pathway_score.txt'
sdf = pd.read_table(input_fpath, sep='\t', header=0, index_col=0)
opt = 'PT'
input_fpath = '../results/' + can + os.sep + opt +'_rnaseq_pathway_score.txt'
nsdf = pd.read_table(input_fpath, sep='\t', header=0, index_col=0)
res = pd.Series(index=sdf.index)
for p in sdf.index:
res.loc[p] = -1*math.log10(stats.ttest_ind(sdf.loc[p]/spr, nsdf.loc[p]/nspr, equal_var=False)[1])
@interact(pval=widgets.FloatSliderWidget(min=res.min(), max=res.max(), value=max(res.max()-5, res.min()), step=1))
def plot_entiched(pval):
pylab.rcParams['figure.figsize'] = (12.0, 8.0)
res[res > pval].order().plot(title=can + " Enriched Pathways vs t-test P-values (-log10)", kind='barh', rot=0)
print sdf.shape, nsdf.shape