In [3]:
    
%run ../../shared_setup.ipynb
    
    
    
In [4]:
    
# load indels for all three crosses
callsets = load_callsets(COMBINED_CALLSET_FN_TEMPLATE, 
                         variant_filter='FILTER_PASS & ~is_snp')
    
    
In [5]:
    
# define a generic function for plotting an indel size histogram
def plot_indel_size_stacked_hist(ax, coding, major_step, minor_step, cross=None, minsize=-33, maxsize=33):
    
    # obtain variants array
    if cross:
        variants = callsets[cross]['variants']
    else:
        variants = np.concatenate([callsets[cross]['variants'] for cross in CROSSES]).view(np.recarray)
        
    # filter coding/non-coding
    if coding:
        variants = variants[variants.CDSAnnotationID != b'.']
    else:
        variants = variants[variants.CDSAnnotationID == b'.']
        
    # obtain indel sizes
    # N.B., there are some multiallelic variants, however all PASS variants are Mendelian
    # segregating, therefore any multiallelic implies both parents have non-reference alleles
    # and can determine relative indel size difference by subtraction of alt1 and alt2 indel sizes.
    # N.B., this also works for biallelic because the svlen array defaults to zero.
    svlen = variants.svlen[:, 0] - variants.svlen[:, 1]
    # obtain STR information
    is_str = variants.STR
        
    # prepare data series for plotting
    x = [svlen[~is_str], svlen[is_str]]
    # prepare axes
    sns.despine(ax=ax, offset=0, left=False)
    
    # plot histogram 
    ax.hist(x, 
            bins=np.arange(minsize, maxsize, 1), 
            histtype='bar', 
            stacked=1, 
            color=['w', 'k'], 
            edgecolor='k', 
            linewidth=.5, 
            alpha=1,
            align='left')
    
    # tidy up axes
    ax.set_xlabel('size (bp)')
    ax.xaxis.tick_bottom()
    ax.set_xlim(minsize-1, maxsize+1)
    major_locator = mpl.ticker.MultipleLocator(major_step)
    minor_locator = mpl.ticker.MultipleLocator(minor_step)
    ax.xaxis.set_major_locator(major_locator)
    ax.xaxis.set_minor_locator(minor_locator)
    
    return ax
    
In [6]:
    
# plot coding indels
width = 8 * (1/3)
height = width / 1.2
fig, ax = plt.subplots(figsize=(width, height))
plot_indel_size_stacked_hist(ax, coding=True, major_step=10, minor_step=1, cross=None)
ax.set_title('Coding indels')
ax.set_ylabel('frequency')
fig.tight_layout()
fig.savefig('../../artwork/main/fig1A1.jpeg', dpi=1200, jpeg_quality=100);
    
    
In [7]:
    
# plot non-coding indels
width = 8 * (1/3)
height = width / 1.2
fig, ax = plt.subplots(figsize=(width, height))
plot_indel_size_stacked_hist(ax, coding=False, major_step=10, minor_step=1, cross=None)
ax.set_title('Non-coding indels')
ax.set_ylabel('frequency')
fig.tight_layout()
fig.savefig('../../artwork/main/fig1A2.jpeg', dpi=1200, jpeg_quality=100);
    
    
In [8]:
    
# plot legend
width = 8 * (1/3)
height = width / 1.2
fig, ax = plt.subplots(figsize=(width, height))
ax.set_axis_off()
p = plt.Rectangle((0, 0), 1, 1, fc="k")
q = plt.Rectangle((0, 0), 1, 1, fc="w")
ax.legend([p, q], ['STR', 'non-STR'], 
          ncol=1, loc='upper left', bbox_to_anchor=(0, 1))
fig.tight_layout()
fig.savefig('../../artwork/main/fig1A_legend.jpeg', dpi=1200, jpeg_quality=100);
    
    
In [ ]: