"Clonal heterogeneity influences the fate of new adaptive mutations"
Ignacio Vázquez-García, Francisco Salinas, Jing Li, Andrej Fischer, Benjamin Barré, Johan Hallin, Anders Bergström, Elisa Alonso-Pérez, Jonas Warringer, Ville Mustonen, Gianni Liti
This IPython notebook is provided for reproduction of Figures 4 and S5 of the paper. It can be viewed by copying its URL to nbviewer and it can be run by opening it in binder.
In [1]:
# Load external dependencies
from setup import *
# Load internal dependencies
import config,plot,utils
%load_ext autoreload
%autoreload 2
%matplotlib inline
In [2]:
chr_coords = utils.chr_coords()
chr_coords = chr_coords[~chr_coords['chr_arabic'].isin([17,18])]
chr_coords.head()
Out[2]:
In [3]:
# Load data
seq_mut_df = pd.read_csv(dir_data+'seq/de-novo/seq_de_novo_snv_indel.csv', encoding='utf-8', keep_default_na=False)
seq_mut_df = seq_mut_df[(seq_mut_df.clone!='')].reset_index(drop=True)
# Remap genotype values
seq_mut_df['genotype'] = seq_mut_df['genotype'].map({'0': 0, '1': 2,'0/0': 0, '0/1': 1, '1/0': 1, '1/1': 2})
seq_mut_df['type'] = 'snv_indel'
seq_mut_df.head()
Out[3]:
In [4]:
# Load data
seq_cn_loh_df = pd.read_csv(dir_data+'seq/de-novo/seq_de_novo_cn_loh.csv.gz', encoding='utf-8', keep_default_na=False)
seq_cn_loh_df['genotype'] = pd.to_numeric(seq_cn_loh_df['genotype'], errors='coerce')
seq_cn_loh_df['pos_cum'] = pd.to_numeric(seq_cn_loh_df['pos_cum'], errors='coerce')
Join all variants (SNVs, indels, LOH, copy number) into a single dataframe:
In [5]:
columns=['selection','population','clone']
seq_mut_fixed_df = pd.merge(
seq_mut_df,
seq_cn_loh_df[['selection','population','clone','set','lineage']].drop_duplicates(columns),
on=columns, how='left'
)
seq_cn_loh_fixed_df = pd.merge(
seq_cn_loh_df,
seq_mut_df[['selection','population','clone']].drop_duplicates(columns),
on=columns, how='left'
)
seq_dn_df = pd.concat([seq_cn_loh_fixed_df, seq_mut_fixed_df], axis=0).reset_index(drop=True)
seq_dn_df.head()
Out[5]:
In [6]:
# Keep ancestral haploid isolates only
seq_dn_df_sliced = seq_dn_df[(seq_dn_df['type'].isin(['genotype','snv_indel'])) & \
(seq_dn_df['time']==0) & \
(seq_dn_df['ploidy']=='haploid')]
seq_dn_df_sliced['clone'] = seq_dn_df_sliced['clone'].astype(int)
seq_dn_df_sliced = seq_dn_df_sliced.sort_values('clone')
seq_dn_df_sliced = pd.pivot_table(
seq_dn_df_sliced,
index=['selection','population','time','clone','lineage','type','ploidy'],
columns=['chr_arabic','chr_roman','pos_cum','gene'],
values='genotype'
)
fig = plt.figure(figsize=(8, 6))
shape = pd.DataFrame({k: x.shape for k,x in seq_dn_df_sliced.groupby(level=['population'])})
grid = gridspec.GridSpec(1, 2, wspace=0.2)
gs = {}
for ii, (p, sdf) in enumerate(seq_dn_df_sliced.groupby(level=['population'])):
nrows = 1
ncols = 1
gs[p] = gridspec.GridSpecFromSubplotSpec(nrows, ncols,
subplot_spec=grid[ii],
hspace=0, wspace=0)
# Retrieve axis
ax = plt.subplot(gs[p][:])
### Background genotype ###
indexer = [slice(None)]*len(sdf.index.names)
indexer[sdf.index.names.index('type')] = ['genotype']
gt = sdf.loc[tuple(indexer),:].dropna(axis=1, how='all')
x = gt.columns.get_level_values('pos_cum').values
y = np.arange(len(gt.index))
data = gt.fillna(method='pad', axis=1).values
# Make a color map of fixed colors
cmap = plt.cm.colors.ListedColormap([config.background['color']['NA'],
config.background['color']['WA']])
bounds = [0,1]
norm = plt.cm.colors.BoundaryNorm(bounds, cmap.N)
title = ''
xlabel= ''
ylabel= ''
xticklabels = []
yticklabels = []
plot.heatmap(np.r_[x, x.max()+1], np.r_[y, y.max()+1], data,
ax, title, xlabel, ylabel, xticklabels, yticklabels, cmap=cmap,
vmin=np.nanmin(data), vmax=np.nanmax(data), speed='fast')
# Set title
ax.set_title(p.replace('_',' ')+' T0', fontsize=6, weight='bold')
# Set ticks
lineage = gt.index.get_level_values('lineage').unique()[0]
ax.tick_params(axis='y', colors=config.lineages[lineage]['fill'], width=5, which='both')
### De novo genotype ###
indexer = [slice(None)]*len(sdf.index.names)
indexer[sdf.index.names.index('type')] = ['snv_indel']
gn = sdf.loc[tuple(indexer),:].dropna(axis=1, how='all')
if len(gn) > 0:
for i,(k,g) in enumerate(gn.groupby(level='clone')):
g = g.dropna(axis=1)
x = g.columns.get_level_values('pos_cum').values
y = np.repeat(g.index.get_level_values('clone').values+.5, len(x))
data = g.values.flatten()
ax.scatter(x, y,
facecolors=[config.genotype['color'][d] for d in data],
edgecolors='lightgray', s=8, rasterized=False, zorder=3)
genes = g.columns.get_level_values('gene')
loc = zip(x, y-.4)
for l, xy in zip(genes, loc):
ann = ax.annotate(l, xy=xy, xytext=(0, 0), textcoords='offset points',
bbox=dict(boxstyle="round,pad=0", fc="w", alpha=0.5),
fontsize=4.5, va='bottom', ha='center',
color='k', style=('italic' if l!='non-coding' else 'normal'),
path_effects=[path_effects.withStroke(linewidth=0.35, foreground='w')])
# Set grid
xlabels = chr_coords.chr_start + (chr_coords.chr_end - chr_coords.chr_start)/2.
xstart = chr_coords.chr_start
ax.set_xticks(xlabels, minor=False)
ax.set_xticklabels(chr_coords.chr_roman, minor=False)
ax.set_xlabel('Chromosome')
xgrid=[xst+1. for xst in list(set(xstart))]
[ax.axvline(g, lw=0.5, ls="-", color="lightgray") for g in xgrid]
ylabels = gt.index.get_level_values('clone').map(str)
ax.set_yticks(np.arange(len(ylabels)) + 0.5, minor=False)
ax.set_yticklabels('C' + ylabels, fontweight='bold', va='center', minor=False)
[ax.axhline(g, lw=0.5, ls="-", color="lightgray") for g in np.arange(len(ylabels))]
# Set legend
for k, xy in zip(['WAxNA_F12_2_YPD'],[(2,0),(2,0)]):
ax = plt.subplot(gs[k][:])
wt_artist = patches.Rectangle((0,0), width=1, height=1, color=config.background['color']['WA'])
hom_artist = patches.Rectangle((0,0), width=1, height=1, color=config.background['color']['NA'])
leg1 = ax.legend([wt_artist,hom_artist],
['WA','NA'],
bbox_to_anchor=(0.7, -0.1), ncol=1,
frameon=False, loc='center',
borderaxespad=0, handlelength=0.75,
title='Background\ngenotype', prop={'size':5})
hom_artist = lines.Line2D((0,1),(0,0), mec='k', mfc=config.genotype['color'][2],
marker='o', markersize=3, linestyle='')
leg2 = ax.legend([hom_artist],
['hom.'],
bbox_to_anchor=(0.9, -0.1), ncol=1,
frameon=False, loc='center',
borderaxespad=0, handlelength=0.75,
title='De novo\ngenotype', prop={'size':5})
ax.add_artist(leg1)
for leg in [leg1,leg2]:
plt.setp(leg.get_title(),fontsize=6, multialignment='center')
for ax in fig.get_axes():
ax.xaxis.label.set_size(6)
ax.yaxis.label.set_size(6)
ax.tick_params(axis='x', which='major', size=2, labelsize=6)
ax.tick_params(axis='y', which='major', size=2, labelsize=4.5)
ax.tick_params(axis='both', which='minor', size=1, labelsize=4)
plot.save_figure(dir_supp+'figures/supp_figure_seq_clones/supp_figure_seq_ancestral_clones')
plt.show()
Fig. S5: Sequences of ancestral clones sampled from the WAxNA $\text{F}_\text{12}$ founder populations, which were obtained by bulk crossing between the WA and NA parents. Pre-existing and de novo SNVs and indels were detected by whole-genome sequencing in single-cell derived clones from ancestral populations at ${t\,{=}\,0}$ days. Chromosomes are shown on the $x$-axis; clone isolates are listed on the left. WA (in blue) and NA (in red) represent haploid genotypes. Individual cells with unique background genotypes carry private de novo SNVs and indels (circles).
In [7]:
# Keep evolved isolates only
seq_dn_df_sliced = seq_dn_df[(seq_dn_df['type'].isin(['consensus','snv_indel','copy_number','loh'])) & \
(seq_dn_df['time']==32)]
seq_dn_df_sliced = pd.pivot_table(
seq_dn_df_sliced,
index=['selection','population','time','set','clone','lineage','type','ploidy'],
columns=['chr_arabic','chr_roman','pos_cum','gene'],
values='genotype'
)
fig = plt.figure(figsize=(4, 18))
indexer = [slice(None)]*len(seq_dn_df_sliced.index.names)
indexer[seq_dn_df_sliced.index.names.index('type')] = ['consensus','loh']
seq_dn_df_shape = seq_dn_df_sliced.loc[tuple(indexer),:].dropna(axis=1, how='all')
shape = pd.DataFrame({k:
x.shape for k,x in seq_dn_df_shape.groupby(level=['selection','population'])
})
ygrid = shape.ix[0].values
grid = gridspec.GridSpec(len(ygrid), 1, height_ratios=ygrid, hspace=0.9)
gs = {}
for ii, ((e,p), data) in enumerate(seq_dn_df_sliced.groupby(level=['selection','population'])):
idx = 0
nrows = ygrid[ii]
ncols = 1
gs[(e,p)] = gridspec.GridSpecFromSubplotSpec(nrows, ncols,
subplot_spec=grid[ii],
hspace=0, wspace=0)
ax = gs[(e,p)]
print(e, p, data.shape)
plot.genome_instability(data, ax=ax, title=p.replace('_',' ')+' T32')
# Set panel labels
ax1 = plt.subplot(gs['HU','WAxNA_F12_1_HU_1'][(0,0)])
ax1.text(-0.05, 4, chr(ord('A')), transform=ax1.transAxes,
fontsize=9, fontweight='bold', va='center', ha='right')
ax1.text(0, 4, 'Selection: %s' % config.selection['long_label']['HU'],
transform=ax1.transAxes, fontsize=8, va='center', ha='left')
ax1 = plt.subplot(gs['RM','WAxNA_F12_1_RM_1'][(0,0)])
ax1.text(-0.05, 4, chr(ord('B')), transform=ax1.transAxes,
fontsize=9, fontweight='bold', va='center', ha='right')
ax1.text(0, 4, 'Selection: %s' % config.selection['long_label']['RM'],
transform=ax1.transAxes, fontsize=8, va='center', ha='left')
# Set legend
for k, xy in zip([('HU','WAxNA_F12_2_HU_1'),('RM','WAxNA_F12_1_RM_3')],[(2,0),(2,0)]):
ax3 = plt.subplot(gs[k][(0,0)])
wt_artist = patches.Rectangle((0,0), width=1, height=1, color=config.background['color']['WA/WA'])
het_artist = patches.Rectangle((0,0), width=1, height=1, color=config.background['color']['WA/NA'])
hom_artist = patches.Rectangle((0,0), width=1, height=1, color=config.background['color']['NA/NA'])
leg1 = ax3.legend([wt_artist,hom_artist,het_artist],
['WA/WA','NA/NA','WA/NA'],
bbox_to_anchor=(1.1, 2.5), ncol=1,
frameon=False, loc='center',
borderaxespad=0, handlelength=0.75,
title='Background', prop={'size':5})
ax4 = plt.subplot(gs[k][(2,0)])
snv_indel_artist = lines.Line2D((0,1),(0,0), mec='k', mfc='w', marker='o', markersize=3, linestyle='')
loh_artist = patches.Rectangle((0,0), width=1, height=1, color='k')
cn_gain_artist = patches.Rectangle((0,0), width=1, height=1, color='k', hatch='---', fill=False)
cn_loss_artist = patches.Rectangle((0,0), width=1, height=1, color='k', hatch='xxx', fill=False)
leg2 = ax4.legend([snv_indel_artist,loh_artist,cn_gain_artist,cn_loss_artist],
['SNV/indel','LOH','Copy gain','Copy loss'],
bbox_to_anchor=(1.1, 0.0), ncol=1,
frameon=False, loc='center',
borderaxespad=0, handlelength=0.75,
title='Variant type', prop={'size':5})
het_artist = lines.Line2D((0,1),(0,0), mec='k', mfc=config.genotype['color'][1],
marker='o', markersize=3, linestyle='')
hom_artist = lines.Line2D((0,1),(0,0), mec='k', mfc=config.genotype['color'][2],
marker='o', markersize=3, linestyle='')
leg3 = ax4.legend([het_artist,hom_artist],
['het.','hom.'],
bbox_to_anchor=(1.1, -4), ncol=1,
frameon=False, loc='center',
borderaxespad=0, handlelength=0.75,
title='Genotype', prop={'size':5})
ax4.add_artist(leg2)
for leg in [leg1,leg2,leg3]:
plt.setp(leg.get_title(),fontsize=6)
# Axes limits
for ax in fig.get_axes():
ax.xaxis.label.set_size(6)
ax.yaxis.label.set_size(6)
ax.tick_params(axis='both', which='major', size=2, labelsize=6)
ax.tick_params(axis='both', which='minor', size=1, labelsize=4)
plot.save_figure(dir_supp+'figures/supp_figure_seq_clones/supp_figure_seq_evolved_clones')
plt.show()
Fig. 4: Genome instability in sequences of evolved clones. SNVs, indels and chromosome-level aberrations were detected by whole-genome sequencing in single-cell derived clones from evolved populations, after ${t\,{=}\,32}$ days in (A) hydroxurea or (B) rapamycin (see Table S1). Chromosomes are shown on the $x$-axis; clone isolates are listed on the left, colored by lineage (see Fig. S7). The consensus shows the majority genotype across population isolates with sequence identity greater than 80%. WA/WA (in blue) and NA/NA (in red) represent homozygous diploid genotypes and WA/NA (in purple) represents a heterozygous genotype. Individual cells with shared background genotype carry de novo SNVs and indels (circles), de novo mis-segregations with loss-of-heterozygosity (solid segments) and de novo gains or losses in copy number (hatched segments). Driver and passenger mutations are listed along the bottom (drivers are in boldface). Populations marked by $\otimes$ indicate cross-contamination during the selection phase, but any derived events are independent.