notebook.community

Edit and run



In [1]:

    
%run ~/relmapping/annot/notebooks/__init__.ipynb









    



/mnt/home3/jj374/anaconda36/lib/python3.6/site-packages/statsmodels/compat/pandas.py:56: FutureWarning: The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead.
  from pandas.core import datetools






    



os.getcwd(): /mnt/beegfs/scratch_copy/ahringer/jj374/lab/relmapping



In [2]:

    
fp_TFBS = 'annot/Fig2S1_overlaps/modERN_modENCODE/modERN_modENCODE_ext200_merge.bed'
fp_Ho2017 = 'annot/Fig2S1_overlaps/Ho2017/Ho2017_DNase_emb.bed'

df_TFBS = pd.read_csv(fp_TFBS, sep='\t', names=yp.NAMES_BED9[:4])
df_Ho2017 = pd.read_csv(fp_Ho2017, sep='\t', names=yp.NAMES_BED3)



In [3]:

    
gv = yp.GenomicVenn2(
    BedTool.from_dataframe(df_regl_()[yp.NAMES_BED3]),
    BedTool.from_dataframe(df_Ho2017),
    label_a='Accessible\nsites',
    label_b='(Ho et al.,\n2017) emb\nDNase-seq',
)

fig = plt.figure(figsize=(3,2))
plt.subplot(1,1,1)
v = gv.plot(style='compact')#fudge_labels_to_top=True)
v.get_patch_by_id('10').set_color(yp.RED)
v.get_patch_by_id('01').set_color(yp.GREEN)
v.get_patch_by_id('11').set_color(yp.YELLOW)

plt.savefig('annot_eLife_revised/_fig/Fig2S1CL_Ho2017.pdf', bbox_inches='tight', transparent=True)



In [4]:

    
gdf_a_w_b = yp.GenomicDataFrame(gv.df_a_with_b)
gdf_b_w_a = yp.GenomicDataFrame(gv.df_b_with_a)
gdf_a_only = yp.GenomicDataFrame(gv.df_a_only)
gdf_b_only = yp.GenomicDataFrame(gv.df_b_only)

l_gdf = [gdf_a_w_b, gdf_b_w_a, gdf_a_only, gdf_b_only]
for gdf_ in l_gdf:
    fp_ = 'annot/Fig2S1_overlaps/modERN_modENCODE/modERN_modENCODE_peak_pileup.bw'
    gdf_.add_track('TFBS_peak_calls', fp_, flank_len=1000, bin_size=5)
    fp_ = 'WS260_ce10/WS260_ce10.exon_coverage_unstranded.bw'
    gdf_.add_track('exon_coverage', fp_, flank_len=1000, bin_size=5)
    gdf_.add_track('txn_initiation_fwd', config['annot_ce10']['fp_scap_fwd'], bin_size=1, flank_len=500)
    gdf_.add_track('txn_initiation_rev', config['annot_ce10']['fp_scap_rev'], bin_size=1, flank_len=500)









    



gdf: pos_column unspecified, reverting to mean(start, end)
gdf: pos_column unspecified, reverting to mean(start, end)
gdf: pos_column unspecified, reverting to mean(start, end)
gdf: pos_column unspecified, reverting to mean(start, end)
mread_regions True modERN_modENCODE_peak_pileup.bw.mread_regions_ea42fca412266eec4accbac0005308ec.tmp
mread_regions True WS260_ce10.exon_coverage_unstranded.bw.mread_regions_ec6e8476e73323d0732861f7ec56ae04.tmp
mread_regions True scap_wt_all_fwd.bw.mread_regions_72e0a639c763146a2f6c1f194fbe9cc0.tmp
mread_regions True scap_wt_all_rev.bw.mread_regions_26ab9453cd9a44b925d2d4bcc102845c.tmp
mread_regions True modERN_modENCODE_peak_pileup.bw.mread_regions_23ec96a4db3e8b4101a115e280af807d.tmp
mread_regions True WS260_ce10.exon_coverage_unstranded.bw.mread_regions_9140a4e3236e4b1c65753e1545b66fe0.tmp
mread_regions True scap_wt_all_fwd.bw.mread_regions_7c468552e8c5f6c31d2363eaa82bf168.tmp
mread_regions True scap_wt_all_rev.bw.mread_regions_5127d8aa1bcd09b31f24dbfa35ae53ae.tmp
mread_regions True modERN_modENCODE_peak_pileup.bw.mread_regions_ba880b102b63f4c16d6947abaf4fec67.tmp
mread_regions True WS260_ce10.exon_coverage_unstranded.bw.mread_regions_34d935fc2f313bdf0375c44fdbe848b5.tmp
mread_regions True scap_wt_all_fwd.bw.mread_regions_fd50028778d2210d5d05072e00ef98aa.tmp
mread_regions True scap_wt_all_rev.bw.mread_regions_3ff72e81a3ae050fdd089446435662e1.tmp
mread_regions True modERN_modENCODE_peak_pileup.bw.mread_regions_ff91d6e1d4db8c62183f9c3ff7902299.tmp
mread_regions True WS260_ce10.exon_coverage_unstranded.bw.mread_regions_a140b4b9718f488174762f6914ac6d43.tmp
mread_regions True scap_wt_all_fwd.bw.mread_regions_d006310f5ea836dd148c23a17e62a81c.tmp
mread_regions True scap_wt_all_rev.bw.mread_regions_27ad01ea35a3f3ed37b0296e75e1bbb2.tmp



In [5]:

    
label_a_w_b = 'This study overlapping (Ho et al., 2017) (n=%s)' % (yp.f_uk(len(gv.df_a_with_b)),)
label_b_w_a = '(Ho et al., 2017) overlapping this study (n=%s)' % (yp.f_uk(len(gv.df_b_with_a)),)
label_a_only = 'This study not overlapping (Ho et al., Ho2017) (n=%s)' % (yp.f_uk(len(gv.df_a_only)),)
label_b_only = '(Ho et al., 2017) not overlapping this study (n=%s)' % (yp.f_uk(len(gv.df_b_only)),)

def mean_gt1_fwd(x, a):
    #x_ = np.nanmean(x, a)
    x_ = np.mean(x > 0, a)
    return x_

def mean_gt1_rev(x, a):
    #x_ = np.nanmean(x, a)
    x_ = np.mean(-x > 0, a)
    return -x_

def gt1_fwd(x): return (x > 0).astype(int)

def gt1_rev(x): return -(x > 0).astype(int)

fig = plt.figure(figsize=(8,2)).subplots_adjust(wspace=0.7)#hspace=2, 

plt.subplot(1,3,1)
plt.title('ChIP-seq peaks')
gdf_a_w_b.t['TFBS_peak_calls'].plot(label=label_a_w_b, color=yp.YELLOW)
#gdf_b_w_a.t['TFBS_peak_calls'].plot(label=label_b_w_a, color=yp.YELLOW, linestyle='dashed')
gdf_a_only.t['TFBS_peak_calls'].plot(label=label_a_only, color=yp.RED)
gdf_b_only.t['TFBS_peak_calls'].plot(label=label_b_only, color=yp.GREEN)
gdf_a_w_b.t['TFBS_peak_calls'].errorbar()
gdf_a_only.t['TFBS_peak_calls'].errorbar()
gdf_b_only.t['TFBS_peak_calls'].errorbar()
plt.xlabel('Distance from\naccessible site (bp)')
plt.ylabel('Average number of\nChIP-seq peak calls')
#plt.legend(loc='upper left')
#plt.ylim(0, 25)

plt.subplot(1,3,2)
plt.title('Transcription initiation')
gdf_a_w_b.t['txn_initiation_fwd'].plot(label=label_a_w_b, color=yp.YELLOW, f=mean_gt1_fwd)
#gdf_b_w_a.t['txn_initiation_fwd'].plot(label=label_b_w_a, color=yp.YELLOW, linestyle='dashed', f=mean_gt1_fwd)
gdf_a_only.t['txn_initiation_fwd'].plot(label=label_a_only, color=yp.RED, f=mean_gt1_fwd)
gdf_b_only.t['txn_initiation_fwd'].plot(label=label_b_only, color=yp.GREEN, f=mean_gt1_fwd)
l_ = [int(gdf_a_w_b.t['txn_initiation_fwd'].m.shape[1] / 2) + 57]
gdf_a_w_b.t['txn_initiation_fwd'].errorbar(l_index=l_, f=gt1_fwd)
#gdf_b_w_a.t['txn_initiation_fwd'].errorbar(l_index=l_, f=gt1_fwd)
gdf_a_only.t['txn_initiation_fwd'].errorbar(l_index=l_, f=gt1_fwd)
gdf_b_only.t['txn_initiation_fwd'].errorbar(l_index=l_, f=gt1_fwd)
gdf_a_w_b.t['txn_initiation_rev'].plot(label=label_a_w_b, color=yp.YELLOW, f=mean_gt1_rev)
#gdf_b_w_a.t['txn_initiation_rev'].plot(label=label_b_w_a, color=yp.YELLOW, linestyle='dashed', f=mean_gt1_rev)
gdf_a_only.t['txn_initiation_rev'].plot(label=label_a_only, color=yp.RED, f=mean_gt1_rev)
gdf_b_only.t['txn_initiation_rev'].plot(label=label_b_only, color=yp.GREEN, f=mean_gt1_rev)
plt.ylim(-.1, .1)
plt.axhline(0, color='k', linewidth=1, linestyle='dashed')
plt.axvline(0, color='k', linewidth=1, linestyle='dashed')
plt.xlabel('Distance from\naccessible site (bp)')
plt.ylabel('Fraction with\ntranscription initiation')
plt.legend(loc='lower center', borderaxespad=-10)#bbox_to_anchor=(-.07, 1.02, 1.15, .102), mode="expand", #, borderpad=4

plt.subplot(1,3,3)
plt.title('Annotated exons')
gdf_a_w_b.t['exon_coverage'].plot(label=label_a_w_b, color=yp.YELLOW)
#gdf_b_w_a.t['exon_coverage'].plot(label=label_b_w_a, color=yp.YELLOW, linestyle='dashed')
gdf_a_only.t['exon_coverage'].plot(label=label_a_only, color=yp.RED)
gdf_b_only.t['exon_coverage'].plot(label=label_b_only, color=yp.GREEN)
gdf_a_w_b.t['exon_coverage'].errorbar()
gdf_a_only.t['exon_coverage'].errorbar()
gdf_b_only.t['exon_coverage'].errorbar()
plt.xlabel('Distance from\naccessible site (bp)')
plt.ylabel('Fraction with\nexon coverage')

plt.savefig('annot_eLife_revised/_fig/Fig2S1CR_Ho2017.pdf', bbox_inches='tight', transparent=True)



In [6]:

    
#kwargs_ = {'header': False, 'sep': '\t', 'index': False}
#gv.df_a_with_b.to_csv('annot/FigA_overlaps/overlaps_Ho2017/%s.bed' % (label_a_w_b,), **kwargs_)
#gv.df_b_with_a.to_csv('annot/FigA_overlaps/overlaps_Ho2017/%s.bed' % (label_b_w_a,), **kwargs_)
#gv.df_a_only.to_csv('annot/FigA_overlaps/overlaps_Ho2017/%s.bed' % (label_a_only,), **kwargs_)
#gv.df_b_only.to_csv('annot/FigA_overlaps/overlaps_Ho2017/%s.bed' % (label_b_only,), **kwargs_)