notebook.community

Edit and run



In [11]:

    
from __future__ import division
import os
import math
%matplotlib inline
import numpy as np
import pylab
import matplotlib.pyplot as plt
from matplotlib import rc
import scipy.stats as stats
import pandas as pd
from clean_data import CANCER_TYPES

from IPython.html.widgets import interact
from IPython.html import widgets
from IPython.display import display



In [14]:

    
can_types = []
for c in CANCER_TYPES:
    f1 = '../results/' + c + os.sep + 'NB_rnaseq_pathway_score.txt'
    f2 = '../results/' + c + os.sep + 'PT_rnaseq_pathway_score.txt'

    if os.path.exists(f1) and os.path.exists(f2):
        can_types.append(c)

print "There are %d cancer types ready to be analysed" % len(can_types)
can_type_wid = widgets.DropdownWidget(description="Select Cancer Type", values=can_types)
display(can_type_wid)









    



There are 2 cancer types ready to be analysed



In [19]:

    
# http://en.wikipedia.org/wiki/DNA_codon_table
amino_acids = 'arndcqeghi'
amino_acids += 'lkmfpstwyv'
amino_acids += 'bo'
codons = '4622222423'
codons += '6212464124'
codons += '13'
spr = 1; nspr = 1;
# spr =  sum([int(x)-1 for x in codons])/(22*27)
# nspr = 1 - spr

can = can_type_wid.value
opt = 'NB'
input_fpath = '../results/' + can + os.sep + opt +'_rnaseq_pathway_score.txt'
sdf = pd.read_table(input_fpath, sep='\t', header=0, index_col=0)
opt = 'PT'
input_fpath = '../results/' + can + os.sep + opt +'_rnaseq_pathway_score.txt'
nsdf = pd.read_table(input_fpath, sep='\t', header=0, index_col=0)

res = pd.Series(index=sdf.index)
for p in sdf.index:
    res.loc[p] = -1*math.log10(stats.ttest_ind(sdf.loc[p]/spr, nsdf.loc[p]/nspr, equal_var=False)[1])

@interact(pval=widgets.FloatSliderWidget(min=res.min(), max=res.max(), value=max(res.max()-5, res.min()), step=1))
def plot_entiched(pval):
    pylab.rcParams['figure.figsize'] = (12.0, 8.0)
    res[res > pval].order().plot(title=can + " Enriched Pathways vs t-test P-values (-log10)", kind='barh', rot=0)


print sdf.shape, nsdf.shape