In [1]:
# Init
%run ~/relmapping/annot_cb/notebooks/annot__init__.ipynb
In [2]:
def mp(fp): return os.path.join('annot_cb', 'metrics_lcap', fp)
l_cond = list(config['annot_cb']['lcap_samples'].keys())
fp_atac = 'annot_cb/accessible_sites_cb.tsv'
df_atac = pd.read_csv(fp_atac, sep='\t')
l_atac_peak_pos = df_atac[['start', 'end']].mean(axis=1).map(int)
df_atac.head()
Out[2]:
In [3]:
# Jump test
# 2-condition, 2-replicate one-sided test
def deseq2x2(df_counts):
fh_inp, fp_inp = tempfile.mkstemp(dir=os.getcwd(), prefix='deseq2x2_inp_')
fh_out, fp_out = tempfile.mkstemp(dir=os.getcwd(), prefix='deseq2x2_out_')
df_counts.to_csv(fp_inp, sep='\t')
!cat {fp_inp} | scripts/deseq2x2_greater.R > {fp_out}
#!wc -l {fp_inp}
#!wc -l {fp_out}
#!tail -n 20 {fp_out}
df_out = pd.read_csv(fp_out, sep='\s+')
assert len(df_counts) == len(df_out)
!rm {fp_inp}
!rm {fp_out}
return df_out
# Count reads & call DESeq
def jump_test(fp_rep1, fp_rep2, chrom, u_start, u_end, d_start, d_end, f):
df_counts = pd.DataFrame()
df_counts['ucount_rep1'] = list(map(f, read_regions(fp_rep1, chrom, u_start, u_end)))
df_counts['ucount_rep2'] = list(map(f, read_regions(fp_rep2, chrom, u_start, u_end)))
df_counts['dcount_rep1'] = list(map(f, read_regions(fp_rep1, chrom, d_start, d_end)))
df_counts['dcount_rep2'] = list(map(f, read_regions(fp_rep2, chrom, d_start, d_end)))
# Sample pseudoreplicates
#np.random.seed(0)
#df_counts['ucount_sum'] = df_counts[ ['ucount_rep1', 'ucount_rep2'] ].sum(axis=1)
#df_counts['dcount_sum'] = df_counts[ ['dcount_rep1', 'dcount_rep2'] ].sum(axis=1)
#df_counts['ucount_rep1'] = list(map(lambda n: np.random.binomial(n, 0.5), df_counts['ucount_sum']))
#df_counts['dcount_rep1'] = list(map(lambda n: np.random.binomial(n, 0.5), df_counts['dcount_sum']))
#df_counts['ucount_rep2'] = df_counts['ucount_sum'] - df_counts['ucount_rep1']
#df_counts['dcount_rep2'] = df_counts['dcount_sum'] - df_counts['dcount_rep1']
df_deseq = deseq2x2(df_counts[['ucount_rep1', 'ucount_rep2', 'dcount_rep1', 'dcount_rep2']])
return pd.concat([df_counts, df_deseq], axis=1)
def itemRgb_(passed):
if passed:
return RED
else:
return BLUE
def jump_test_fwd(stage):
fp_rep1 = pf('annot_cb_lcap_%(stage)s_rep1' % locals(), config['annot_cb']['lcap_firstbp_fwd'], '.bw', 'lcap')
fp_rep2 = pf('annot_cb_lcap_%(stage)s_rep2' % locals(), config['annot_cb']['lcap_firstbp_fwd'], '.bw', 'lcap')
chrom = df_atac['chrom'].tolist()
u_start = (l_atac_peak_pos - 250).tolist()
u_end = (l_atac_peak_pos - 75 + 1).tolist()
d_start = (l_atac_peak_pos + 75).tolist()
d_end = (l_atac_peak_pos + 250 + 1).tolist()
df_jump = jump_test(fp_rep1, fp_rep2, chrom=chrom, u_start=u_start, u_end=u_end,
d_start=d_start, d_end=d_end, f=lambda s: int(np.nan_to_num(np.nansum(s))))
#df_jump['passed'] = (df_jump['log2FoldChange'] > 2) & (df_jump['padj'] < 0.05)
df_jump['passed_jump'] = (df_jump['log2FoldChange'] > 1.5) & (df_jump['padj'] < 0.1)
df_jump['passed_incr'] = (df_jump['ucount_rep1'] == 0) & (df_jump['ucount_rep2'] == 0) \
& (df_jump['dcount_rep1'] >= 1) & (df_jump['dcount_rep2'] >= 1) \
& ((df_jump['dcount_rep1'] + df_jump['dcount_rep2']) >= 3)
df_jump['passed'] = df_jump['passed_jump'] | df_jump['passed_incr']
df_jump['summary'] = ['%s / %.02f / %.3g' % (passed, lfc, padj) \
for passed, lfc, padj in zip(df_jump['passed'], df_jump['log2FoldChange'], df_jump['padj'])]
df_jump.columns = ['lcap_%s_fwd_%s' % (stage, col) for col in df_jump.columns]
write_gffbed(mp('lcap_%s_fwd.bed' % (stage,)),
chrom = chrom,
start = u_start,
thickStart = np.array(u_end) - 1,
thickEnd = np.array(d_start) + 1,
end = d_end,
attr = df_jump,
itemRgb = list(map(itemRgb_, df_jump['lcap_%s_fwd_passed' % (stage,)]
)))
return df_jump
df_jump_fwd = pd.concat(pmap(jump_test_fwd, l_cond, n_jobs=20), axis=1)
df_jump_fwd.to_csv(mp('lcap_all_fwd.tsv'), sep='\t', index=False)
In [4]:
def jump_test_rev(stage):
fp_rep1 = pf('annot_cb_lcap_%(stage)s_rep1' % locals(), config['annot_cb']['lcap_firstbp_rev'], '.bw', 'lcap')
fp_rep2 = pf('annot_cb_lcap_%(stage)s_rep2' % locals(), config['annot_cb']['lcap_firstbp_rev'], '.bw', 'lcap')
chrom = df_atac['chrom'].tolist()
d_start = (l_atac_peak_pos - 250).tolist()
d_end = (l_atac_peak_pos - 75 + 1).tolist()
u_start = (l_atac_peak_pos + 75).tolist()
u_end = (l_atac_peak_pos + 250 + 1).tolist()
df_jump = jump_test(fp_rep1, fp_rep2, chrom=chrom, u_start=u_start, u_end=u_end,
d_start=d_start, d_end=d_end, f=lambda s: int(np.nan_to_num(np.nansum(s))))
#df_jump['passed'] = (df_jump['log2FoldChange'] > 2) & (df_jump['padj'] < 0.05)
df_jump['passed_jump'] = (df_jump['log2FoldChange'] > 1.5) & (df_jump['padj'] < 0.1)
df_jump['passed_incr'] = (df_jump['ucount_rep1'] == 0) & (df_jump['ucount_rep2'] == 0) \
& (df_jump['dcount_rep1'] >= 1) & (df_jump['dcount_rep2'] >= 1) \
& ((df_jump['dcount_rep1'] + df_jump['dcount_rep2']) >= 3)
df_jump['passed'] = df_jump['passed_jump'] | df_jump['passed_incr']
df_jump['summary'] = ['%s / %.02f / %.3g' % (passed, lfc, padj) \
for passed, lfc, padj in zip(df_jump['passed'], df_jump['log2FoldChange'], df_jump['padj'])]
df_jump.columns = ['lcap_%s_rev_%s' % (stage, col) for col in df_jump.columns]
write_gffbed(mp('lcap_%s_rev.bed' % (stage,)),
chrom = chrom,
start = d_start,
thickStart = np.array(d_end) - 1,
thickEnd = np.array(u_start) + 1,
end = u_end,
attr = df_jump,
itemRgb = list(map(itemRgb_, df_jump['lcap_%s_rev_passed' % (stage,)]
)))
return df_jump
df_jump_rev = pd.concat(pmap(jump_test_rev, l_cond, n_jobs=20), axis=1)
df_jump_rev.to_csv(mp('lcap_all_rev.tsv'), sep='\t', index=False)
In [5]:
pd.concat([df_jump_fwd['lcap_%s_fwd_passed' % (stage,)].value_counts() for stage in l_cond], axis=1)
Out[5]:
In [6]:
pd.concat([df_jump_rev['lcap_%s_rev_passed' % (stage,)].value_counts() for stage in l_cond], axis=1)
Out[6]:
In [7]:
u_start = (l_atac_peak_pos - 250).tolist()
u_end = (l_atac_peak_pos - 75 + 1).tolist()
d_start = (l_atac_peak_pos + 75).tolist()
d_end = (l_atac_peak_pos + 250 + 1).tolist()
l_passed_any = df_jump_fwd[['lcap_%s_fwd_passed' % (stage,) for stage in l_cond]].any(axis=1)
write_gffbed(mp('lcap_all_fwd.bed'),
chrom = df_atac['chrom'],
start = u_start,
thickStart = np.array(u_end) - 1,
thickEnd = np.array(d_start) + 1,
end = d_end,
attr = df_jump_fwd[['lcap_%s_fwd_summary' % (stage,) for stage in l_cond]],
itemRgb = list(map(itemRgb_, l_passed_any)),
)
l_passed_any = df_jump_rev[['lcap_%s_rev_passed' % (stage,) for stage in l_cond]].any(axis=1)
write_gffbed(mp('lcap_all_rev.bed'),
chrom = df_atac['chrom'],
start = u_start,
thickStart = np.array(u_end) - 1,
thickEnd = np.array(d_start) + 1,
end = d_end,
attr = df_jump_rev[['lcap_%s_rev_summary' % (stage,) for stage in l_cond]],
itemRgb = list(map(itemRgb_, l_passed_any)),
)