In [19]:
from __future__ import division
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import pylab
import itertools
import math
from matplotlib import rc
import scipy as sc
import pandas as pd
from scipy import stats
from cna_analysis import get_pancancer_cna_summary
from mutation_analysis import get_pancancer_mutation_summary
In [2]:
gene = 0; refresh = False
summary = get_pancancer_cna_summary(gene, refresh)
pylab.rcParams['figure.figsize'] = (18.0, 12.0)
summary.plot(kind='box', logy=True, rot=30);
plt.ylabel('Number of Extreme CNA Events in Samples');
In [72]:
# Do pancancer aggregation
gene = 0; refresh = False
ns_summary, s_summary = get_pancancer_mutation_summary(gene, refresh)
cna_summary = get_pancancer_cna_summary(gene, refresh)
common_cancers = cna_summary.columns.intersection(ns_summary.columns)
cna_summary = cna_summary[common_cancers]
ns_summary = ns_summary[common_cancers]
cna_ns_summary = cna_summary.join(ns_summary, how='inner', rsuffix='_MUT')
cna_ns_summary.dropna(axis=1, how='all', inplace=True)
In [52]:
# Show common patients and summary stats for mutation and cna
cna_ns_summary.describe().transpose().head()
Out[52]:
In [73]:
n = len(common_cancers[:3])
for i, can in enumerate(common_cancers[:3]):
pylab.rcParams['figure.figsize'] = (10.0, 15.0)
plt.subplot(n, 1, i+1)
x = cna_ns_summary[can]
y = cna_ns_summary[can+'_MUT']
rho = stats.spearmanr(x,y)
plt.scatter(x, y)
plt.ylabel('Mutation {}'.format(can))
plt.title("Spearman's rho: %.2f, p-val: %.2g" %(rho[0], rho[1]))
plt.grid()
plt.tight_layout(pad=0.2, w_pad=0.5, h_pad=0.2)
plt.xlabel('CNA Events');
In [74]:
# Do pancancer aggregation
gene = 1; refresh = False
ns_summary, s_summary = get_pancancer_mutation_summary(gene, refresh)
cna_summary = get_pancancer_cna_summary(gene, refresh)
common_cancers = cna_summary.columns.intersection(ns_summary.columns)
cna_summary = cna_summary[common_cancers]
ns_summary = ns_summary[common_cancers]
cna_ns_summary = cna_summary.join(ns_summary, how='inner', rsuffix='_MUT')
cna_ns_summary.dropna(axis=0, how='all', inplace=True)
In [75]:
n = len(common_cancers)
for i, can in enumerate(common_cancers):
x = cna_ns_summary[can]
y = cna_ns_summary[can+'_MUT']
rho = stats.spearmanr(x,y)
print can, rho