In [1]:
%run ~/relmapping/annot/notebooks/__init__.ipynb
In [2]:
fp_TFBS = 'annot/Fig2S1_overlaps/modERN_modENCODE/modERN_modENCODE_ext200_merge.bed'
fp_Ho2017 = 'annot/Fig2S1_overlaps/Ho2017/Ho2017_DNase_emb.bed'
df_TFBS = pd.read_csv(fp_TFBS, sep='\t', names=yp.NAMES_BED9[:4])
df_Ho2017 = pd.read_csv(fp_Ho2017, sep='\t', names=yp.NAMES_BED3)
In [3]:
gv = yp.GenomicVenn2(
BedTool.from_dataframe(df_regl_()[yp.NAMES_BED3]),
BedTool.from_dataframe(df_Ho2017),
label_a='Accessible\nsites',
label_b='(Ho et al.,\n2017) emb\nDNase-seq',
)
fig = plt.figure(figsize=(3,2))
plt.subplot(1,1,1)
v = gv.plot(style='compact')#fudge_labels_to_top=True)
v.get_patch_by_id('10').set_color(yp.RED)
v.get_patch_by_id('01').set_color(yp.GREEN)
v.get_patch_by_id('11').set_color(yp.YELLOW)
plt.savefig('annot_eLife_revised/_fig/Fig2S1CL_Ho2017.pdf', bbox_inches='tight', transparent=True)
In [4]:
gdf_a_w_b = yp.GenomicDataFrame(gv.df_a_with_b)
gdf_b_w_a = yp.GenomicDataFrame(gv.df_b_with_a)
gdf_a_only = yp.GenomicDataFrame(gv.df_a_only)
gdf_b_only = yp.GenomicDataFrame(gv.df_b_only)
l_gdf = [gdf_a_w_b, gdf_b_w_a, gdf_a_only, gdf_b_only]
for gdf_ in l_gdf:
fp_ = 'annot/Fig2S1_overlaps/modERN_modENCODE/modERN_modENCODE_peak_pileup.bw'
gdf_.add_track('TFBS_peak_calls', fp_, flank_len=1000, bin_size=5)
fp_ = 'WS260_ce10/WS260_ce10.exon_coverage_unstranded.bw'
gdf_.add_track('exon_coverage', fp_, flank_len=1000, bin_size=5)
gdf_.add_track('txn_initiation_fwd', config['annot_ce10']['fp_scap_fwd'], bin_size=1, flank_len=500)
gdf_.add_track('txn_initiation_rev', config['annot_ce10']['fp_scap_rev'], bin_size=1, flank_len=500)
In [5]:
label_a_w_b = 'This study overlapping (Ho et al., 2017) (n=%s)' % (yp.f_uk(len(gv.df_a_with_b)),)
label_b_w_a = '(Ho et al., 2017) overlapping this study (n=%s)' % (yp.f_uk(len(gv.df_b_with_a)),)
label_a_only = 'This study not overlapping (Ho et al., Ho2017) (n=%s)' % (yp.f_uk(len(gv.df_a_only)),)
label_b_only = '(Ho et al., 2017) not overlapping this study (n=%s)' % (yp.f_uk(len(gv.df_b_only)),)
def mean_gt1_fwd(x, a):
#x_ = np.nanmean(x, a)
x_ = np.mean(x > 0, a)
return x_
def mean_gt1_rev(x, a):
#x_ = np.nanmean(x, a)
x_ = np.mean(-x > 0, a)
return -x_
def gt1_fwd(x): return (x > 0).astype(int)
def gt1_rev(x): return -(x > 0).astype(int)
fig = plt.figure(figsize=(8,2)).subplots_adjust(wspace=0.7)#hspace=2,
plt.subplot(1,3,1)
plt.title('ChIP-seq peaks')
gdf_a_w_b.t['TFBS_peak_calls'].plot(label=label_a_w_b, color=yp.YELLOW)
#gdf_b_w_a.t['TFBS_peak_calls'].plot(label=label_b_w_a, color=yp.YELLOW, linestyle='dashed')
gdf_a_only.t['TFBS_peak_calls'].plot(label=label_a_only, color=yp.RED)
gdf_b_only.t['TFBS_peak_calls'].plot(label=label_b_only, color=yp.GREEN)
gdf_a_w_b.t['TFBS_peak_calls'].errorbar()
gdf_a_only.t['TFBS_peak_calls'].errorbar()
gdf_b_only.t['TFBS_peak_calls'].errorbar()
plt.xlabel('Distance from\naccessible site (bp)')
plt.ylabel('Average number of\nChIP-seq peak calls')
#plt.legend(loc='upper left')
#plt.ylim(0, 25)
plt.subplot(1,3,2)
plt.title('Transcription initiation')
gdf_a_w_b.t['txn_initiation_fwd'].plot(label=label_a_w_b, color=yp.YELLOW, f=mean_gt1_fwd)
#gdf_b_w_a.t['txn_initiation_fwd'].plot(label=label_b_w_a, color=yp.YELLOW, linestyle='dashed', f=mean_gt1_fwd)
gdf_a_only.t['txn_initiation_fwd'].plot(label=label_a_only, color=yp.RED, f=mean_gt1_fwd)
gdf_b_only.t['txn_initiation_fwd'].plot(label=label_b_only, color=yp.GREEN, f=mean_gt1_fwd)
l_ = [int(gdf_a_w_b.t['txn_initiation_fwd'].m.shape[1] / 2) + 57]
gdf_a_w_b.t['txn_initiation_fwd'].errorbar(l_index=l_, f=gt1_fwd)
#gdf_b_w_a.t['txn_initiation_fwd'].errorbar(l_index=l_, f=gt1_fwd)
gdf_a_only.t['txn_initiation_fwd'].errorbar(l_index=l_, f=gt1_fwd)
gdf_b_only.t['txn_initiation_fwd'].errorbar(l_index=l_, f=gt1_fwd)
gdf_a_w_b.t['txn_initiation_rev'].plot(label=label_a_w_b, color=yp.YELLOW, f=mean_gt1_rev)
#gdf_b_w_a.t['txn_initiation_rev'].plot(label=label_b_w_a, color=yp.YELLOW, linestyle='dashed', f=mean_gt1_rev)
gdf_a_only.t['txn_initiation_rev'].plot(label=label_a_only, color=yp.RED, f=mean_gt1_rev)
gdf_b_only.t['txn_initiation_rev'].plot(label=label_b_only, color=yp.GREEN, f=mean_gt1_rev)
plt.ylim(-.1, .1)
plt.axhline(0, color='k', linewidth=1, linestyle='dashed')
plt.axvline(0, color='k', linewidth=1, linestyle='dashed')
plt.xlabel('Distance from\naccessible site (bp)')
plt.ylabel('Fraction with\ntranscription initiation')
plt.legend(loc='lower center', borderaxespad=-10)#bbox_to_anchor=(-.07, 1.02, 1.15, .102), mode="expand", #, borderpad=4
plt.subplot(1,3,3)
plt.title('Annotated exons')
gdf_a_w_b.t['exon_coverage'].plot(label=label_a_w_b, color=yp.YELLOW)
#gdf_b_w_a.t['exon_coverage'].plot(label=label_b_w_a, color=yp.YELLOW, linestyle='dashed')
gdf_a_only.t['exon_coverage'].plot(label=label_a_only, color=yp.RED)
gdf_b_only.t['exon_coverage'].plot(label=label_b_only, color=yp.GREEN)
gdf_a_w_b.t['exon_coverage'].errorbar()
gdf_a_only.t['exon_coverage'].errorbar()
gdf_b_only.t['exon_coverage'].errorbar()
plt.xlabel('Distance from\naccessible site (bp)')
plt.ylabel('Fraction with\nexon coverage')
plt.savefig('annot_eLife_revised/_fig/Fig2S1CR_Ho2017.pdf', bbox_inches='tight', transparent=True)
In [6]:
#kwargs_ = {'header': False, 'sep': '\t', 'index': False}
#gv.df_a_with_b.to_csv('annot/FigA_overlaps/overlaps_Ho2017/%s.bed' % (label_a_w_b,), **kwargs_)
#gv.df_b_with_a.to_csv('annot/FigA_overlaps/overlaps_Ho2017/%s.bed' % (label_b_w_a,), **kwargs_)
#gv.df_a_only.to_csv('annot/FigA_overlaps/overlaps_Ho2017/%s.bed' % (label_a_only,), **kwargs_)
#gv.df_b_only.to_csv('annot/FigA_overlaps/overlaps_Ho2017/%s.bed' % (label_b_only,), **kwargs_)