In [1]:
%run ../../shared_setup.ipynb
In [2]:
tbl_regions_1b.display(5, caption='Genome regions classification')
In [3]:
tbl_regions_total_size = (
tbl_regions_1b
.aggregate('region_type', sum, 'region_size')
.rename('value', 'total_size')
)
lkp_regions_total_size = tbl_regions_total_size.lookupone('region_type', 'total_size')
tbl_regions_total_size.displayall(caption='Total size of region types')
In [4]:
max_chr_size = max(len(fasta[str(chrom, 'ascii')]) for chrom in CHROMOSOMES)
fig = plt.figure(figsize=(7, 9))
gs = mpl.gridspec.GridSpec(4*14, 1, height_ratios=[1, 1, .5, .5]*14)
gs.update(hspace=0)
for i, chrom in enumerate(CHROMOSOMES):
chrom = str(chrom, 'ascii')
ax = fig.add_subplot(gs[i*4+0])
plot_hp1(ax, chrom)
ax.set_xlim(-5000, max_chr_size)
ax = fig.add_subplot(gs[i*4+1])
plot_accessibility(ax, chrom, linewidth=.5)
ax.set_xlim(-5000, max_chr_size)
chrnum = int(chrom[6:8])
ax.set_ylabel(chrnum, ha='right', va='center', rotation=0)
ax = fig.add_subplot(gs[i*4+2])
plot_genes(ax, chrom, hypervariable_color='red', normal_color='#bbbbbb', width=.5, divider_linewidth=.5)
ax.spines['bottom'].set_visible(False)
ax.set_xlim(-5000, max_chr_size)
ax.set_xticks([])
fig.tight_layout()
# legend
ax = fig.add_axes([0, 0, 1, 1])
ax.set_axis_off()
ax.set_xticks([])
ax.set_yticks([])
region_types = 'Core', 'InternalHypervariable', 'SubtelomericHypervariable', 'SubtelomericRepeat', 'Centromere'
leg_artists = [plt.Rectangle((0, 0), 1, 1, fc=accessibility_colors[r]) for r in region_types]
leg_labels = ['%s (%.1fMb)' % (r, lkp_regions_total_size[r]*1./1e6) for r in region_types]
ax.legend(leg_artists, leg_labels, bbox_to_anchor=(1, 1), loc='upper right', borderaxespad=0.1)
fn = '../../artwork/supp/genome_regions_map.{dpi}.{fmt}'
for fmt in 'jpeg', 'png':
for dpi in 120, 300:
fig.savefig(fn.format(dpi=dpi, fmt=fmt), dpi=dpi, jpeg_quality=100)
In [5]:
tbl_coverage_summary_by_region = etl.fromtsv(os.path.join(PUBLIC_DIR, 'tbl_coverage_summary_by_region.wg.txt')).convertnumbers()
tbl_coverage_summary_by_region.eq('region', 'Core').displayall()
In [6]:
tbl_mq0_summary_by_region = etl.fromtsv(os.path.join(PUBLIC_DIR, 'tbl_mq0_summary_by_region.wg.txt')).convertnumbers()
tbl_mq0_summary_by_region.eq('region', 'Core').displayall()
In [7]:
LABELS
Out[7]:
In [8]:
tbl1 = tbl_coverage_summary_by_region.convert('run', LABELS).rename('pc_not_covered', 'pc_positions').addfield('state', 'no_coverage')
tbl2 = tbl_mq0_summary_by_region.convert('run', LABELS).rename('pc_mq0', 'pc_positions').addfield('state', 'mq0')
df_summary_by_region = tbl1.cat(tbl2).rename('run', 'sample').todataframe()
df_summary_by_region.head()
Out[8]:
In [9]:
region_colours = [accessibility_colors[r] for r in region_types]
region_types = 'Core', 'InternalHypervariable', 'SubtelomericHypervariable', 'SubtelomericRepeat'
parent_samples = ['3D7', 'HB3(1)', 'HB3(2)', 'Dd2', '7G8', 'GB4']
fg = sns.factorplot(x='sample', y='pc_positions', hue='region', data=df_summary_by_region, kind='bar',
hue_order=region_types, col='state',
x_order=parent_samples,
palette=region_colours, legend=False)
fg.fig.set_size_inches(7, 3)
fg.fig.tight_layout(w_pad=3)
fg.fig.subplots_adjust(bottom=.4)
# legend
ax = fg.fig.add_subplot(111)
#ax = plt.gca()
ax.set_axis_off()
ax.set_xticks([])
ax.set_yticks([])
leg_artists = [plt.Rectangle((0, 0), 1, 1, fc=accessibility_colors[r]) for r in region_types]
leg_labels = ['%s (%.1fMb)' % (r, lkp_regions_total_size[r]*1./1e6) for r in region_types]
ax.legend(leg_artists, leg_labels, bbox_to_anchor=(0.5, -0.4), loc='upper center', borderaxespad=0, fontsize=8, ncol=2)
fn = '../../artwork/supp/genome_regions_summary.{dpi}.{fmt}'
for fmt in 'jpeg', 'png':
for dpi in 120, 300:
fg.fig.savefig(fn.format(dpi=dpi, fmt=fmt), dpi=dpi, jpeg_quality=100)
In [10]:
bam_fn_template = '/data/plasmodium/pfalciparum/pf-crosses/data/public/20141022/bam/{run}.realigned.bam'
stats_fn_template = '/data/plasmodium/pfalciparum/pf-crosses/data/public/20141022/cache/{run}.{chrom}.{stats}.npy'
In [11]:
def load_stats(run, chrom, stats, requires_fasta=False):
stats_fn = stats_fn_template.format(run=run, chrom=chrom, stats=stats)
if os.path.exists(stats_fn):
a = np.load(stats_fn, mmap_mode='r').view(np.recarray)
else:
f = getattr(pysamstats, 'load_' + stats)
bam_fn = bam_fn_template.format(run=run)
bam = pysam.AlignmentFile(bam_fn)
if requires_fasta:
fasta = pysam.FastaFile(FASTA_FN)
a = f(bam, fasta, chrom=chrom)
else:
a = f(bam, chrom=chrom)
np.save(stats_fn, a)
return a
In [12]:
load_stats('ERR019054', 'Pf3D7_01_v3', 'mapq')
Out[12]:
In [13]:
load_stats('ERR019054', 'Pf3D7_01_v3', 'variation', requires_fasta=True)
Out[13]:
In [14]:
# tbl_samples = (etl
# .fromtsv('/data/plasmodium/pfalciparum/pf-crosses/data/public/20141022/samples.txt')
# .addfield('ID', lambda row: '%s/%s/%s' % (row.clone, row.sample, row.run))
# )
# tbl_samples
In [15]:
# run2id = tbl_samples.lookupone('run', 'ID')
In [16]:
def plot_accessibility_diagnostics(run, chrom, figsize=(7, 7)):
chrlen = len(fasta[chrom])
coverage_ext = load_stats(run, chrom, 'coverage_ext')
mapq = load_stats(run, chrom, 'mapq')
variation = load_stats(run, chrom, 'variation', requires_fasta=True)
fig = plt.figure(figsize=figsize)
rows = 6
i = 0
i += 1
ax = fig.add_subplot(rows, 1, i)
sns.despine(ax=ax, bottom=True, top=False, offset=5)
x = coverage_ext.pos
y = coverage_ext.reads_all
# ax.fill_between(x, 0, y, lw=.1, color='k')
ax.plot(x, y, lw=.2, color='k')
ax.set_xlim(0, chrlen)
ax.set_ylim(0, 400)
ax.set_ylabel('DP', rotation=0, ha='right')
ax.set_yticks([0, 100, 200, 300])
ax.set_yticklabels(['0', '100X', '200X', '300X'])
ax.set_xlabel('position (bp)')
ax.xaxis.set_label_position('top')
i += 1
ax = fig.add_subplot(rows, 1, i)
sns.despine(ax=ax, bottom=True, offset=5)
ax.set_xticks([])
x = coverage_ext.pos
y = coverage_ext.reads_pp * 100 / coverage_ext.reads_all
# ax.fill_between(x, 100, y, lw=.1, color='k')
ax.plot(x, y, lw=.2, color='k')
ax.set_xlim(0, chrlen)
ax.set_ylim(0, 102)
ax.set_yticks([0, 100])
ax.set_ylabel('PP', rotation=0, ha='right')
i += 1
ax = fig.add_subplot(rows, 1, i)
sns.despine(ax=ax, bottom=True, offset=5)
ax.set_xticks([])
x = mapq.pos
y = mapq.rms_mapq
# ax.fill_between(x, 0, y, lw=.1, color='k')
ax.plot(x, y, lw=.2, color='k')
ax.set_xlim(0, chrlen)
ax.set_ylim(0, 62)
ax.set_yticks([0, 60])
ax.set_ylabel('MQ', rotation=0, ha='right')
i += 1
ax = fig.add_subplot(rows, 1, i)
x = mapq.pos
y = mapq.reads_mapq0 * 100 / mapq.reads_all
# ax.fill_between(x, 0, y, lw=.1, color='k')
ax.plot(x, y, lw=.2, color='k')
ax.set_xlim(0, chrlen)
ax.set_ylim(0, 102)
ax.set_yticks([0, 100])
ax.set_ylabel('MQ0', rotation=0, ha='right')
sns.despine(ax=ax, bottom=True, offset=5)
ax.set_xticks([])
i += 1
ax = fig.add_subplot(rows, 1, i)
x = variation.pos
y = variation.mismatches * 100 / variation.reads_all
# ax.fill_between(x, 0, y, lw=.1, color='k')
ax.plot(x, y, lw=.05, color='k')
ax.set_xlim(0, chrlen)
ax.set_ylim(0, 102)
ax.set_yticks([0, 100])
ax.set_ylabel('MIS', rotation=0, ha='right')
sns.despine(ax=ax, bottom=True, offset=5)
ax.set_xticks([])
i += 1
ax = fig.add_subplot(2*rows, 1, 2*i-1)
sns.despine(ax=ax, bottom=True, offset=5)
ax.set_xticks([])
plot_genes(ax, chrom, hypervariable_color='red', normal_color='#bbbbbb', width=.5, divider_linewidth=.5)
ax.set_xlim(0, chrlen)
ax.set_ylabel('genes', rotation=0, ha='right')
ax = fig.add_subplot(2*rows, 1, 2*i)
sns.despine(ax=ax, bottom=False, offset=5)
plot_accessibility(ax, chrom, linewidth=.5)
ax.set_xlim(0, chrlen)
ax.set_ylabel('classification', rotation=0, ha='right')
fig.suptitle('sample %s, chromosome %s' % (run2id[run], chrom), fontsize=12, fontweight='bold', y=.98, va='top')
# legend
region_types = 'Core', 'InternalHypervariable', 'SubtelomericHypervariable', 'SubtelomericRepeat', 'Centromere'
leg_artists = [plt.Rectangle((0, 0), 1, 1, fc=accessibility_colors[r]) for r in region_types]
leg_labels = ['%s (%.1fMb)' % (r, lkp_regions_total_size[r]*1./1e6) for r in region_types]
ax.legend(leg_artists, leg_labels, bbox_to_anchor=(0.5, -0.3), loc='upper center', borderaxespad=0, fontsize=8, ncol=2)
fig.tight_layout()
fig.subplots_adjust(hspace=.2, top=.83, bottom=.15)
fn = '../../artwork/supp/genome_regions_example.{dpi}.{fmt}'
for fmt in 'jpeg', 'png':
for dpi in 120, 300:
fig.savefig(fn.format(dpi=dpi, fmt=fmt), dpi=dpi, jpeg_quality=100)
plot_accessibility_diagnostics('ERR019054', 'Pf3D7_04_v3', figsize=(7, 6))
In [17]:
fig, ax = plt.subplots(figsize=(8, .5))
plot_accessibility(ax, 'Pf3D7_04_v3')
In [18]:
fig, ax = plt.subplots(figsize=(8, .5))
plot_hp1(ax, 'Pf3D7_04_v3')
In [19]:
fig, ax = plt.subplots(figsize=(8, .5))
chrom = 'Pf3D7_04_v3'
plot_genes(ax, chrom, hypervariable_color='r', normal_color='#bbbbbb')
In [ ]: