In [1]:
%run ~/relmapping/annot/notebooks/annot__init__.ipynb
annot_ = 'annot_ce10_tissues'
def mp(fp, annot_=annot_): return os.path.join(annot_, 'metrics_atac', fp)
In [2]:
"""
step = 'tg_se.bwa_se.rm_unmapped.rm_chrM.rm_blacklist.rm_q10.macs2_se_extsize150_shiftm75_keepdup_all'
suffix = '_treat_pileup.bw'
prefix = 'atac824'
!scripts/yapc/yapc annot_ce10_tissues/metrics_atac/atac_tissues \
hypod {pf('atac824_hypod_rep1', step, suffix, prefix)} {pf('atac824_hypod_rep2', step, suffix, prefix)} \
neurons {pf('atac824_neurons_rep1', step, suffix, prefix)} {pf('atac824_neurons_rep2', step, suffix, prefix)} \
gonad {pf('atac824_gonad_rep1', step, suffix, prefix)} {pf('atac824_gonad_rep2', step, suffix, prefix)} \
muscle {pf('atac824_muscle_rep1', step, suffix, prefix)} {pf('atac824_muscle_rep2', step, suffix, prefix)} \
intest {pf('atac824_intest_rep1', step, suffix, prefix)} {pf('atac824_intest_rep2', step, suffix, prefix)} \
--smoothing-window-width 150 --fixed-peak-halfwidth 75
"""
Out[2]:
In [3]:
df_wt_glp1 = pd.read_csv('annot_ce10/accessible_sites.tsv', sep='\t')[['chrom', 'start', 'end', 'atac_source']]
print('%d peaks from wt/glp1-mapping' % (len(df_wt_glp1),))
df_wt_glp1.head()
Out[3]:
In [4]:
df_tissues = read_gffbed('annot_ce10_tissues/metrics_atac/atac_tissues_0.001.bed')[yp.NAMES_BED3]
df_tissues['atac_source'] = 'atac_tissues'
print('%d peaks from tissue-specific data' % (len(df_tissues,)))
df_tissues.head()
Out[4]:
In [5]:
bt_tissues_only = BedTool.from_dataframe(df_tissues).subtract(b=BedTool.from_dataframe(df_wt_glp1), A=True)
df_tissues_only = pd.read_csv(bt_tissues_only.fn, sep='\t', names=df_wt_glp1.columns)
print('%d peaks added from tissue-specific data' % (len(df_tissues_only),))
df_tissues_only.sample(20)#head()
Out[5]:
In [6]:
fp_ = os.path.join(annot_, 'accessible_sites.tsv')
df_merged = pd.concat([df_wt_glp1, df_tissues_only], axis=0, ignore_index=True).sort_values(yp.NAMES_BED3).reset_index(drop=True)
print('%d peaks in final set' % (len(df_merged),))
df_merged.head()
df_merged[yp.NAMES_BED3 + ['atac_source']].to_csv(fp_, sep='\t', index=False, float_format='%.2f')
!wc -l {fp_}