In [1]:
%run ../../shared_setup.ipynb
In [2]:
def impute_inheritance_nearest(inheritance, pos, pos_impute):
"""Impute inheritance at unknown positions, by copying from
nearest neighbouring position where inheritance is known.
Parameters
----------
inheritance : array_like, int, shape (n_variants, n_gametes)
An array of integers coding the allelic inheritance state at the
known positions.
pos : array_like, int, shape (n_variants,)
Array of genomic positions at which `inheritance` was determined.
pos_impute : array_like, int
Array of positions at which to impute inheritance.
Returns
-------
imputed_inheritance : ndarray, int
An array of integers coding the imputed allelic inheritance.
"""
# check inputs
inheritance = np.asarray(inheritance)
assert inheritance.ndim == 2
pos = np.asarray(pos)
assert pos.ndim == 1
pos_impute = np.asarray(pos_impute)
assert pos_impute.ndim == 1
n_variants = pos.size
assert inheritance.shape[0] == n_variants
# find indices of neighbouring variants
indices_left = np.clip(np.searchsorted(pos, pos_impute), 0, n_variants - 1)
indices_right = np.clip(indices_left + 1, 0, n_variants - 1)
inh_left = np.take(inheritance, indices_left, axis=0)
inh_right = np.take(inheritance, indices_right, axis=0)
# find positions of neighbouring variants
pos_left = np.take(pos, indices_left)
pos_right = np.take(pos, indices_right)
# compute distance to neighbours
dist_left = np.abs(pos_impute - pos_left)
dist_right = np.abs(pos_right - pos_impute)
# build output
out = np.zeros_like(inh_left)
out[(dist_left <= dist_right)] = inh_left[(dist_left <= dist_right)]
out[(dist_left > dist_right)] = inh_right[(dist_left > dist_right)]
return out
In [3]:
def impute_inheritance_neighbours(inheritance, pos, pos_impute):
"""Impute inheritance at unknown positions, by copying from
neighbouring position where inheritance is known.
Parameters
----------
inheritance : array_like, int, shape (n_variants, n_gametes)
An array of integers coding the allelic inheritance state at the
known positions.
pos : array_like, int, shape (n_variants,)
Array of genomic positions at which `inheritance` was determined.
pos_impute : array_like, int
Array of positions at which to impute inheritance.
Returns
-------
imputed_inheritance : ndarray, int
An array of integers coding the imputed allelic inheritance.
"""
# check inputs
inheritance = np.asarray(inheritance)
assert inheritance.ndim == 2
pos = np.asarray(pos)
assert pos.ndim == 1
pos_impute = np.asarray(pos_impute)
assert pos_impute.ndim == 1
n_variants = pos.size
assert inheritance.shape[0] == n_variants
# find indices of neighbouring variants
indices_left = np.clip(np.searchsorted(pos, pos_impute), 0, n_variants - 1)
indices_right = np.clip(indices_left + 1, 0, n_variants - 1)
inh_left = np.take(inheritance, indices_left, axis=0)
inh_right = np.take(inheritance, indices_right, axis=0)
# build output
pos_left = np.take(pos, indices_left)
out = np.zeros_like(inh_left)
out[inh_left == inh_right] = inh_left[inh_left == inh_right]
out[pos_left == pos_impute] = inh_left[pos_left == pos_impute]
return out
In [4]:
gatk = load_callsets(GATK_CALLSET_FN_TEMPLATE,
variant_filter='FILTER_PASS',
call_filter=gatk_conf_calls,
sample_exclusions=excessive_recomb_samples)
In [5]:
cortex = load_callsets(CORTEX_CALLSET_FN_TEMPLATE,
variant_filter='FILTER_PASS',
call_filter=cortex_conf_calls,
sample_exclusions=excessive_recomb_samples)
In [6]:
combined = load_callsets(COMBINED_CALLSET_FN_TEMPLATE,
variant_filter='FILTER_PASS',
call_filter=combined_conf_calls,
sample_exclusions=excessive_recomb_samples)
In [7]:
combined_snp = filter_callsets(combined, variant_filter='is_snp')
In [8]:
combined_indel = filter_callsets(combined, variant_filter='~is_snp')
In [9]:
def compare_inheritance_chrom(cs1, cs2, chrom, mode='nearest'):
# filter to chromosome
cs1 = filter_variants(cs1, query="CHROM == b'%s'" % chrom, verbose=False)
cs2 = filter_variants(cs2, query="CHROM == b'%s'" % chrom, verbose=False)
# unpack callsets
v1, _, c2d1 = unpack_callset(cs1)
pos1 = v1['POS']
g1 = c2d1['genotype']
inh1 = inheritance(g1)
v2, _, c2d2 = unpack_callset(cs2)
pos2 = v2['POS']
g2 = c2d2['genotype']
inh2 = inheritance(g2)
# impute inheritance
if mode == 'nearest':
inh2_imputed = impute_inheritance_nearest(inh1, pos1, pos2)
else:
inh2_imputed = impute_inheritance_neighbours(inh1, pos1, pos2)
# calculate discordance
is_both_called = (((inh2 == INHERITANCE_PARENT1) | (inh2 == INHERITANCE_PARENT2)) &
((inh2_imputed == INHERITANCE_PARENT1) | (inh2_imputed == INHERITANCE_PARENT2)))
is_discord = (inh2 != inh2_imputed) & is_both_called
n_discord = nnz(is_discord)
is_concord = (inh2 == inh2_imputed) & is_both_called
n_concord = nnz(is_concord)
return n_discord, n_concord
def compare_inheritance(cs1, cs2, mode='nearest'):
n_discord = n_concord = 0
for chrom in CHROMOSOMES:
chrom = str(chrom, 'ascii')
n_discord_chrom, n_concord_chrom = compare_inheritance_chrom(cs1, cs2, chrom, mode=mode)
n_discord += n_discord_chrom
n_concord += n_concord_chrom
return n_discord, n_concord
In [10]:
cross = '3d7_hb3'
compare_inheritance(gatk[cross], cortex[cross])
Out[10]:
In [11]:
cross = '3d7_hb3'
compare_inheritance(gatk[cross], cortex[cross], mode='neighbours')
Out[11]:
In [12]:
cross = '3d7_hb3'
compare_inheritance(cortex[cross], gatk[cross])
Out[12]:
In [13]:
cross = '3d7_hb3'
compare_inheritance(cortex[cross], gatk[cross], mode='neighbours')
Out[13]:
In [14]:
cross = 'hb3_dd2'
compare_inheritance(gatk[cross], cortex[cross])
Out[14]:
In [15]:
cross = 'hb3_dd2'
compare_inheritance(cortex[cross], gatk[cross])
Out[15]:
In [16]:
cross = '7g8_gb4'
compare_inheritance(gatk[cross], cortex[cross])
Out[16]:
In [17]:
cross = '7g8_gb4'
compare_inheritance(cortex[cross], gatk[cross])
Out[17]:
In [18]:
cross = '3d7_hb3'
compare_inheritance(combined_snp[cross], combined_indel[cross])
Out[18]:
In [19]:
cross = '3d7_hb3'
compare_inheritance(combined_indel[cross], combined_snp[cross])
Out[19]:
In [20]:
cross = 'hb3_dd2'
compare_inheritance(combined_snp[cross], combined_indel[cross])
Out[20]:
In [21]:
cross = 'hb3_dd2'
compare_inheritance(combined_indel[cross], combined_snp[cross])
Out[21]:
In [22]:
cross = '7g8_gb4'
compare_inheritance(combined_snp[cross], combined_indel[cross])
Out[22]:
In [23]:
cross = '7g8_gb4'
compare_inheritance(combined_indel[cross], combined_snp[cross])
Out[23]:
In [25]:
tbl = [['cross', 'Cortex|GATK', 'GATK|Cortex', 'INDELs|SNPs', 'SNPs|INDELs']]
mode = 'nearest'
for cross in CROSSES:
row = [LABELS[cross]]
n_discord, n_concord = compare_inheritance(gatk[cross], cortex[cross], mode=mode)
val = '%s/%s' % (n_discord, n_discord + n_concord)
row.append(val)
n_discord, n_concord = compare_inheritance(cortex[cross], gatk[cross], mode=mode)
val = '%s/%s (%.4f%%)' % (n_discord, n_discord + n_concord, n_discord * 100 / (n_discord + n_concord))
row.append(val)
n_discord, n_concord = compare_inheritance(combined_snp[cross], combined_indel[cross], mode=mode)
val = '%s/%s (%.4f%%)' % (n_discord, n_discord + n_concord, n_discord * 100 / (n_discord + n_concord))
row.append(val)
n_discord, n_concord = compare_inheritance(combined_indel[cross], combined_snp[cross], mode=mode)
val = '%s/%s (%.4f%%)' % (n_discord, n_discord + n_concord, n_discord * 100 / (n_discord + n_concord))
row.append(val)
tbl += [row]
etl.wrap(tbl).displayall(index_header=False)
In [26]:
tbl = [['cross', 'Cortex|GATK', 'GATK|Cortex', 'INDELs|SNPs', 'SNPs|INDELs']]
mode = 'neighbours'
for cross in CROSSES:
row = [LABELS[cross]]
n_discord, n_concord = compare_inheritance(gatk[cross], cortex[cross], mode=mode)
val = '%s/%s (%.4f%%)' % (n_discord, n_discord + n_concord, n_discord * 100 / (n_discord + n_concord))
row.append(val)
n_discord, n_concord = compare_inheritance(cortex[cross], gatk[cross], mode=mode)
val = '%s/%s (%.4f%%)' % (n_discord, n_discord + n_concord, n_discord * 100 / (n_discord + n_concord))
row.append(val)
n_discord, n_concord = compare_inheritance(combined_snp[cross], combined_indel[cross], mode=mode)
val = '%s/%s (%.4f%%)' % (n_discord, n_discord + n_concord, n_discord * 100 / (n_discord + n_concord))
row.append(val)
n_discord, n_concord = compare_inheritance(combined_indel[cross], combined_snp[cross], mode=mode)
val = '%s/%s (%.4f%%)' % (n_discord, n_discord + n_concord, n_discord * 100 / (n_discord + n_concord))
row.append(val)
tbl += [row]
etl.wrap(tbl).displayall(index_header=False)
In [26]:
def fig_inheritance(fig, callsets, cross, chrom, filename=None, step=100):
callset = callsets[cross]
ax = plt.subplot2grid((9, 1), (0, 0), rowspan=6)
# don't include parents
n_samples = len(callset['calldata'].dtype.names)
plot_inheritance(ax, callset, chrom, sample_indices=range(2, n_samples))
ax.set_xticks([])
# ax.set_title('%s, chromosome %s' % (LABELS[cross], int(chrom[6:8])), fontsize=8)
ax.set_ylabel('progeny clones', rotation=90)
ax.yaxis.tick_left()
ax.set_yticklabels([])
ax = plt.subplot2grid((9, 1), (6, 0))
plot_variants_locator(ax, callset, chrom, step=step)
#ax.set_xticklabels([])
ax.set_xticks([])
ax = plt.subplot2grid((9, 1), (7, 0))
plot_accessibility(ax, chrom, cross, linewidth=.5)
ax.set_ylabel('accessibility', rotation=0, ha='right', va='center')
ax = plt.subplot2grid((9, 1), (8, 0))
plot_genes(ax, chrom)
# ax.set_xticks([])
# ax.spines['bottom'].set_visible(False)
ax.set_ylabel('genes', rotation=0, ha='right', va='center')
ax.set_xlabel('position (bp)')
fig.subplots_adjust(hspace=0.02, left=.12, top=.9, right=.98, bottom=.13)
In [27]:
chrom = 'Pf3D7_12_v3'
callsets = combined_snp
cross = 'hb3_dd2'
fig = plt.figure(figsize=(7, 4))
fig.suptitle('%s, chromosome %s, combined callset, SNPs' % (LABELS[cross], int(chrom[6:8])),
fontsize=10, fontweight='bold')
fig_inheritance(fig, callsets, cross, chrom, step=10)
fn = '../../artwork/supp/inheritance_combined_snps.{dpi}.{fmt}'
for fmt in 'jpeg', 'png':
for dpi in 120, 300:
fig.savefig(fn.format(dpi=dpi, fmt=fmt), dpi=dpi, jpeg_quality=100)
In [28]:
chrom = 'Pf3D7_12_v3'
callsets = combined_indel
cross = 'hb3_dd2'
fig = plt.figure(figsize=(7, 4))
fig.suptitle('%s, chromosome %s, combined callset, INDELs' % (LABELS[cross], int(chrom[6:8])),
fontsize=10, fontweight='bold')
fig_inheritance(fig, callsets, cross, chrom, step=10)
fn = '../../artwork/supp/inheritance_combined_indel.{dpi}.{fmt}'
for fmt in 'jpeg', 'png':
for dpi in 120, 300:
fig.savefig(fn.format(dpi=dpi, fmt=fmt), dpi=dpi, jpeg_quality=100)
In [ ]: