In [19]:
from __future__ import division
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import pylab
import itertools
import math
from matplotlib import rc
import scipy as sc
import pandas as pd
from scipy import stats

from cna_analysis import get_pancancer_cna_summary
from mutation_analysis import get_pancancer_mutation_summary

In [2]:
gene = 0; refresh = False
summary = get_pancancer_cna_summary(gene, refresh)
pylab.rcParams['figure.figsize'] = (18.0, 12.0)
summary.plot(kind='box', logy=True, rot=30); 
plt.ylabel('Number of Extreme CNA Events in Samples');



In [72]:
# Do pancancer aggregation 
gene = 0; refresh = False    
ns_summary, s_summary = get_pancancer_mutation_summary(gene, refresh)
cna_summary = get_pancancer_cna_summary(gene, refresh)
common_cancers = cna_summary.columns.intersection(ns_summary.columns)
cna_summary = cna_summary[common_cancers]
ns_summary = ns_summary[common_cancers]
cna_ns_summary = cna_summary.join(ns_summary, how='inner', rsuffix='_MUT')
cna_ns_summary.dropna(axis=1, how='all', inplace=True)

In [52]:
# Show common patients and summary stats for mutation and cna
cna_ns_summary.describe().transpose().head()


Out[52]:
count mean std min 25% 50% 75% max
ACC 88 314.715909 511.283644 0 12.75 94.5 489.25 3306
BLCA 127 698.244094 724.908563 0 105.00 503.0 1042.00 3096
BRCA 960 648.186458 758.579459 0 44.75 434.0 1044.25 5440
CESC 191 397.413613 566.251991 0 28.50 204.0 580.50 4235
COAD 152 210.940789 305.911173 0 11.00 72.0 301.75 1773

In [73]:
n = len(common_cancers[:3])
for i, can in enumerate(common_cancers[:3]):
    pylab.rcParams['figure.figsize'] = (10.0, 15.0)
    plt.subplot(n, 1, i+1)
    x = cna_ns_summary[can]
    y = cna_ns_summary[can+'_MUT']
    rho = stats.spearmanr(x,y)
    plt.scatter(x, y)
    plt.ylabel('Mutation {}'.format(can))
    
    plt.title("Spearman's rho: %.2f, p-val: %.2g" %(rho[0], rho[1]))
    plt.grid()

    plt.tight_layout(pad=0.2, w_pad=0.5, h_pad=0.2)
plt.xlabel('CNA Events');



In [74]:
# Do pancancer aggregation 
gene = 1; refresh = False    
ns_summary, s_summary = get_pancancer_mutation_summary(gene, refresh)
cna_summary = get_pancancer_cna_summary(gene, refresh)
common_cancers = cna_summary.columns.intersection(ns_summary.columns)
cna_summary = cna_summary[common_cancers]
ns_summary = ns_summary[common_cancers]
cna_ns_summary = cna_summary.join(ns_summary, how='inner', rsuffix='_MUT')
cna_ns_summary.dropna(axis=0, how='all', inplace=True)

In [75]:
n = len(common_cancers)
for i, can in enumerate(common_cancers):    
    x = cna_ns_summary[can]
    y = cna_ns_summary[can+'_MUT']
    rho = stats.spearmanr(x,y)
    print can, rho


ACC (-0.016859387896217395, 0.027574159706398396)
BLCA (-0.0098666271401155782, 0.19726829239665877)
BRCA (-1.1992478854616361e-05, 0.99874960198249918)
CESC (-0.0062994194186555968, 0.41039545112966513)
COAD (0.035548066662470811, 3.3714626920226116e-06)
COADREAD (0.04024969983029681, 1.426166036468846e-07)
GBM (-0.023074910185447328, 0.0025633314694865302)
HNSC (0.001231235284917619, 0.87217665456952098)
KICH (-0.020677645306561257, 0.0068844353345042754)
KIRC (-0.010025921208089181, 0.19013120821848839)
KIRP (-0.0056578406951789647, 0.45969083169100933)
LGG (-0.02252099409762405, 0.0032467998350591055)
LIHC (0.010275485290335878, 0.17933483566479988)
LUAD (0.052045045925146596, 1.0055412084428499e-11)
LUSC (0.0035213603580745898, 0.645399566776796)
OV (-0.028838815699143296, 0.00016366915435613179)
PAAD (0.0058333578300912899, 0.44588534775416799)
PRAD (-0.0069363419957385909, 0.36470732572727471)
READ (0.01419485190195221, 0.063591187314414377)
STAD (0.02148136286149922, 0.0049936089296128807)
THCA (-0.010184916239893711, 0.18319899368745346)
UCEC (-0.047424701351438094, 5.6173116539456433e-10)
UCS (0.015586503562389026, 0.041657804947505206)