notebook.community

Edit and run



In [19]:

    
from __future__ import division
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import pylab
import itertools
import math
from matplotlib import rc
import scipy as sc
import pandas as pd
from scipy import stats

from cna_analysis import get_pancancer_cna_summary
from mutation_analysis import get_pancancer_mutation_summary



In [2]:

    
gene = 0; refresh = False
summary = get_pancancer_cna_summary(gene, refresh)
pylab.rcParams['figure.figsize'] = (18.0, 12.0)
summary.plot(kind='box', logy=True, rot=30); 
plt.ylabel('Number of Extreme CNA Events in Samples');



In [72]:

    
# Do pancancer aggregation 
gene = 0; refresh = False    
ns_summary, s_summary = get_pancancer_mutation_summary(gene, refresh)
cna_summary = get_pancancer_cna_summary(gene, refresh)
common_cancers = cna_summary.columns.intersection(ns_summary.columns)
cna_summary = cna_summary[common_cancers]
ns_summary = ns_summary[common_cancers]
cna_ns_summary = cna_summary.join(ns_summary, how='inner', rsuffix='_MUT')
cna_ns_summary.dropna(axis=1, how='all', inplace=True)



In [52]:

    
# Show common patients and summary stats for mutation and cna
cna_ns_summary.describe().transpose().head()



In [73]:

    
n = len(common_cancers[:3])
for i, can in enumerate(common_cancers[:3]):
    pylab.rcParams['figure.figsize'] = (10.0, 15.0)
    plt.subplot(n, 1, i+1)
    x = cna_ns_summary[can]
    y = cna_ns_summary[can+'_MUT']
    rho = stats.spearmanr(x,y)
    plt.scatter(x, y)
    plt.ylabel('Mutation {}'.format(can))
    
    plt.title("Spearman's rho: %.2f, p-val: %.2g" %(rho[0], rho[1]))
    plt.grid()

    plt.tight_layout(pad=0.2, w_pad=0.5, h_pad=0.2)
plt.xlabel('CNA Events');



In [74]:

    
# Do pancancer aggregation 
gene = 1; refresh = False    
ns_summary, s_summary = get_pancancer_mutation_summary(gene, refresh)
cna_summary = get_pancancer_cna_summary(gene, refresh)
common_cancers = cna_summary.columns.intersection(ns_summary.columns)
cna_summary = cna_summary[common_cancers]
ns_summary = ns_summary[common_cancers]
cna_ns_summary = cna_summary.join(ns_summary, how='inner', rsuffix='_MUT')
cna_ns_summary.dropna(axis=0, how='all', inplace=True)



In [75]:

    
n = len(common_cancers)
for i, can in enumerate(common_cancers):    
    x = cna_ns_summary[can]
    y = cna_ns_summary[can+'_MUT']
    rho = stats.spearmanr(x,y)
    print can, rho









    



ACC (-0.016859387896217395, 0.027574159706398396)
BLCA (-0.0098666271401155782, 0.19726829239665877)
BRCA (-1.1992478854616361e-05, 0.99874960198249918)
CESC (-0.0062994194186555968, 0.41039545112966513)
COAD (0.035548066662470811, 3.3714626920226116e-06)
COADREAD (0.04024969983029681, 1.426166036468846e-07)
GBM (-0.023074910185447328, 0.0025633314694865302)
HNSC (0.001231235284917619, 0.87217665456952098)
KICH (-0.020677645306561257, 0.0068844353345042754)
KIRC (-0.010025921208089181, 0.19013120821848839)
KIRP (-0.0056578406951789647, 0.45969083169100933)
LGG (-0.02252099409762405, 0.0032467998350591055)
LIHC (0.010275485290335878, 0.17933483566479988)
LUAD (0.052045045925146596, 1.0055412084428499e-11)
LUSC (0.0035213603580745898, 0.645399566776796)
OV (-0.028838815699143296, 0.00016366915435613179)
PAAD (0.0058333578300912899, 0.44588534775416799)
PRAD (-0.0069363419957385909, 0.36470732572727471)
READ (0.01419485190195221, 0.063591187314414377)
STAD (0.02148136286149922, 0.0049936089296128807)
THCA (-0.010184916239893711, 0.18319899368745346)
UCEC (-0.047424701351438094, 5.6173116539456433e-10)
UCS (0.015586503562389026, 0.041657804947505206)

	count	mean	std	25%	50%	75%	max
ACC	88	314.715909	511.283644	12.75	94.5	489.25	3306
BLCA	127	698.244094	724.908563	105.00	503.0	1042.00	3096
BRCA	960	648.186458	758.579459	44.75	434.0	1044.25	5440
CESC	191	397.413613	566.251991	28.50	204.0	580.50	4235
COAD	152	210.940789	305.911173	11.00	72.0	301.75	1773