In this notebook, we use PaSDqc to explore the properties of individual chromosomes within a sample and use those properties to classify chromosomes as either copy neutral and well amplified, possible copy gain, possible copy loss, or poorly amplified. We use micronucleated single-cells sequenced by Zhang et al, 2015.
We also demonstrate the convenience functions that PaSDqc includes to perform amplicon distribution estimation. These functions abstract away the technicalities demonstrated in 02_example-basic_PSD.
In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors
# import matplotlib.gridspec as gridspec
import seaborn as sns
import pathlib
import PaSDqc
%matplotlib inline
In [2]:
sns.set_context('poster')
sns.set_style("ticks", {'ytick.minor.size': 0.0, 'xtick.minor.size': 0.0})
In [3]:
p = pathlib.Path("../01_example_report/psd/")
f_psd_list = sorted(p.glob("*.spec"))
psd_list = [PaSDqc.PSDTools.SamplePSD.load_from_file(str(f), name=f.stem) for f in f_psd_list]
med_list = [PaSDqc.PSDTools.normalize_psd(psd.avg_PSD()) for psd in psd_list]
In [4]:
freq = psd_list[0].freq
period = 1 / freq
In [5]:
chroms = PaSDqc.extra_tools.chroms_from_build('grch37')
[psd.fit_chrom_curves(chroms) for psd in psd_list]
[psd.fit_sample_curves() for psd in psd_list];
In [6]:
[psd.calc_chrom_props(chroms) for psd in psd_list];
In [7]:
psd_list[0].chrom_props
Out[7]:
In [8]:
idx = [psd.name.split('.')[0] for psd in psd_list]
df_stat = PaSDqc.extra_tools.summarize_chrom_classif_by_sample(psd_list, idx)
In [9]:
df_stat.iloc[0:5, 0:12]
Out[9]:
In [10]:
[psd.infer_chrom_amplicon_dist(chroms) for psd in psd_list];
In [11]:
psd_list[0].chrom_dist.keys()
Out[11]:
In [12]:
[psd.calc_sample_props() for psd in psd_list];
[psd.infer_sample_amplicon_dist() for psd in psd_list];
In [13]:
PaSDqc.extra_tools.summarize_sample_props(psd_list, idx)
Out[13]:
In [14]:
df_cnvs = pd.read_table("CNVs_Zhang_2015_MN.txt", index_col=0)
columns = [c for c in df_cnvs.columns if c.startswith('MN')]
cols2 = [c.split('.')[0] for c in columns]
df_cnv2 = df_cnvs.loc[:, columns].iloc[:-2, :] - df_cnvs[columns].iloc[:-2, :].median() # removing sex chromosomes
df_cnv2.index = [c.replace('chr', '') for c in df_cnv2.index]
df_cnv2.columns = cols2
# Mask MN9_d since CNV caller failed
mask = (df_cnv2 == 0)
mask.loc[:, 'MN9_d'] = True
In [15]:
def plot_cnv_heatmap(df_cnv, ax=None, add_cbar=True, cbar_ax=None):
if not ax:
f = plt.figure()
ax = f.add_subplot(111)
nd = df_cnv.T.as_matrix()
np_mask = np.ma.array(nd, mask=mask.T)
dcp = sns.diverging_palette(255, 133, l=60, n=7)
dcp_list = [dcp[0], dcp[1], dcp[2], dcp[3], dcp[3], dcp[3], dcp[3], dcp[4], dcp[5], dcp[6]]
cmap = matplotlib.colors.LinearSegmentedColormap.from_list('mycmap', dcp_list)
cax = ax.imshow(np_mask, aspect='equal', cmap=cmap, interpolation=None, vmax=1, vmin=-1)
ax.set_yticks(np.arange(0, df_cnv2.shape[1]))
ax.set_xticks(np.arange(0, df_cnv2.shape[0]))
ax.set_xticks(np.arange(0.5, df_cnv2.shape[0]+0.5), minor=True)
ax.set_xlabel('chromosome')
for y in np.arange(0.5, df_cnv2.shape[1], 1):
ax.axhline(y, linestyle='--', color='black', linewidth=1)
ax.set_yticklabels([])
ax.set_yticklabels([s.replace('_', '') for s in df_cnv2.columns])
ax.set_xticklabels(df_cnv2.index);
ax.grid(which='minor', color='black', linewidth=1)
#colorbar
if add_cbar:
cbar = ax.figure.colorbar(cax, ticks=[-1, 0, 1], cax=cbar_ax, orientation='horizontal')
cbar.ax.set_xticklabels(['Loss', 'Neutral', 'Gain'])
cbar.ax.xaxis.tick_top()
cbar.ax.tick_params(axis='x', which='major', pad=0)
In [16]:
MN2b = psd_list[3]
MN2b_kl = MN2b.KL_div_by_chrom()
freq = MN2b.ampl.freq['erf']
# Sample curves
MN2b_fit = MN2b.sample_curves['avg']
MN2b_fit_lower = MN2b.sample_curves['lower']
MN2b_fit_upper = MN2b.sample_curves['upper']
MN2b_dist = MN2b.sample_dist
# Chrom curves
MN2b_chr2_fit = MN2b.chrom_curves['2']
MN2b_chr2_dist = MN2b.chrom_dist['2']
MN2b_chr12_fit = MN2b.chrom_curves['12']
MN2b_chr12_dist = MN2b.chrom_dist['12']
MN2b_chr21_fit = MN2b.chrom_curves['21']
MN2b_chr21_dist = MN2b.chrom_dist['21']
In [17]:
# MAKE A GIANT FIGURE
f = plt.figure(figsize=(12, 14))
ax0 = plt.subplot2grid((7, 6), (0, 0), colspan=6, rowspan=2)
ax1 = plt.subplot2grid((7, 6), (2, 0), colspan=3, rowspan=2)
ax2 = plt.subplot2grid((7, 6), (2, 3), colspan=3, rowspan=2)
ax3 = plt.subplot2grid((7, 6), (4, 0), colspan=3, rowspan=3)
ax3_cbar = f.add_axes([0.11, 0.39, 0.375, 0.01])
ax4 = plt.subplot2grid((7, 6), (4, 3), colspan=3, rowspan=3)
ax4_cbar = f.add_axes([0.11+0.48, 0.39, 0.375, 0.01])
cp = sns.color_palette()
## KL divergence plot
PaSDqc.extra_tools.plot_KL_div_by_chrom(MN2b_kl, ax=ax0)
## Smooth spectrograms
ax1.plot(freq, MN2b_fit, label='Sample\n Avg')
ax1.fill_between(freq, MN2b_fit_lower, MN2b_fit_upper, color=cp[0], alpha=0.25)
ax1.plot(freq, MN2b_chr2_fit, label='MN2b Chr2 \n (Loss)', color=cp[5])
ax1.plot(freq, MN2b_chr12_fit, label='MN2b Chr12 \n (Gain)', color=cp[1])
ax1.plot(freq, MN2b_chr21_fit, label='MN2b Chr21\n (Aberrant)', color=cp[2])
ax1.set_xscale('log')
ax1.set_xticklabels(["0", "100 bp", "1 kb", "10 kb", "100 kb", "1 mb"])
ax1.set_xlabel('Genomic scale')
ax1.set_ylabel('PSD (dB)')
ax1.legend(bbox_to_anchor=(0, 1, 2.25, .102), loc=(0, 0), ncol=5, mode="expand", borderaxespad=0.)
## Distribution Plots
ax2.plot(MN2b_dist['freq'], MN2b_dist['dist'], label='MN3 Sample Avg', color=cp[0])
ax2.plot(MN2b_chr2_dist['freq'], MN2b_chr2_dist['dist'], label='MN3 chr2 (loss)', color=cp[5])
ax2.plot(MN2b_chr12_dist['freq'], MN2b_chr12_dist['dist'], label='MN3 chr12 (gain)', color=cp[1])
ax2.plot(MN2b_chr21_dist['freq'], MN2b_chr21_dist['dist'], label='MN3 chr21 (aberrant)', color=cp[2])
ax2.set_xscale('log')
ax2.set_xticklabels(["0", "100 bp", "1 kb", "10 kb", "100 kb", "1 mb"])
ax2.set_xlabel('Genomic scale')
ax2.set_ylabel('Density')
PaSDqc.extra_tools.plot_chrom_classification(df_stat, ax=ax3, cbar_ax=ax3_cbar)
ax3.tick_params(axis='x', which='major', labelsize=14, pad=5)
plot_cnv_heatmap(df_cnv2, ax=ax4, cbar_ax=ax4_cbar)
ax4.tick_params(axis='x', which='major', labelsize=14, pad=5)
plt.tight_layout(h_pad=2.5)
sns.despine(ax=ax0)
sns.despine(ax=ax1)
sns.despine(ax=ax2)
f.text(0.02, 0.97, "A", weight="bold", horizontalalignment='left', verticalalignment='center', fontsize=20)
f.text(0.02, 0.70, "B", weight="bold", horizontalalignment='left', verticalalignment='center', fontsize=20)
#f.text(0.49, 0.66, "C", weight="bold", horizontalalignment='left', verticalalignment='center', fontsize=20)
f.text(0.02, 0.43, "C", weight="bold", horizontalalignment='left', verticalalignment='center', fontsize=20)
f.text(0.51, 0.43, "D", weight="bold", horizontalalignment='left', verticalalignment='center', fontsize=20)
Out[17]:
In [ ]: