In [ ]:
%run ~/relmapping/annot_cb/notebooks/annot__init__.ipynb
In [ ]:
l_atac_stages = list(config['annot_cb']['atac_samples'].keys())
args = ''
for stage in l_atac_stages:
fp_rep1 = pf('annot_cb_atac_%(stage)s_rep1' % locals(), config['annot_cb']['atac_step'], config['annot_cb']['atac_suffix'], 'atac')
fp_rep2 = pf('annot_cb_atac_%(stage)s_rep2' % locals(), config['annot_cb']['atac_step'], config['annot_cb']['atac_suffix'], 'atac')
fp_prp1 = pf('annot_cb_atac_%(stage)s_rep1' % locals(), config['annot_cb']['atac_step_prp'], config['annot_cb']['atac_suffix'], 'atac')
fp_prp2 = pf('annot_cb_atac_%(stage)s_rep2' % locals(), config['annot_cb']['atac_step_prp'], config['annot_cb']['atac_suffix'], 'atac')
args += '%(stage)s %(fp_rep1)s %(fp_rep2)s %(fp_prp1)s %(fp_prp2)s ' % locals()
!yapc/yapc annot_cb/metrics_atac/atac_cb {args} --smoothing-window-width 150 --fixed-peak-halfwidth 75 --pseudoreplicates
In [ ]:
df_sites = pd.read_csv('annot_cb/metrics_atac/atac_cb.tsv', sep='\t')
l_globalIDR = ['%(stage)s_globalIDR' % locals() for stage in config['annot_cb']['atac_samples'].keys() ]
df_sites['max_globalIDR'] = df_sites[l_globalIDR].max(axis=1)
for th in [0.001, 0.005, 0.01, 0.05]:
th_globalIDR = -math.log(th, 10)
df_sites_th = df_sites.query('max_globalIDR >= @th_globalIDR')
print('%d accessible sites at IDR=%.3f' % (len(df_sites_th), th))
df_sites.head()
In [ ]:
th_globalIDR = -math.log(0.001, 10)
fp_ = 'annot_cb/accessible_sites_cb.tsv'
df_sites.query('max_globalIDR > @th_globalIDR').to_csv(fp_, header=True, index=False, sep='\t')
!wc -l {fp_}