In [1]:
%run ~/relmapping/annot/notebooks/__init__.ipynb
In [2]:
# Ni: ForJulie/20180223_modERN_modENCODE_peak_assignment
fp_ = 'annot/Fig2S3_overlaps/20180223_modERN_modENCODE_peak_assignment/ce10.summits.combined.txt'
names_ = ['summit_chrom', 'summit_start', 'summit_end', 'factor_name', 'stage', 'file_accession', 'peak_iv']
def chrom_(s): return s.split(':')[0]
def start_(s): return s.split(':')[1].split('-')[0]
def end_(s): return s.split(':')[1].split('-')[1]
df_ = pd.read_csv(fp_, sep='\t', names=names_)
df_['peak_chrom'] = df_['peak_iv'].map(chrom_)
df_['peak_start'] = df_['peak_iv'].map(start_)
df_['peak_end'] = df_['peak_iv'].map(end_)
df_.head()
Out[2]:
In [3]:
# Calculate peak call coverage of all factors
col_ = ['peak_chrom', 'peak_start', 'peak_end']
bg = BedTool.from_dataframe(df_[col_]).genome_coverage(bg=True, g='shared/ce10.chroms').to_dataframe()
fp_bg = 'annot/Fig2S3_overlaps/modERN_modENCODE/modERN_modENCODE_peak_pileup.bedGraph'
fp_bw = 'annot/Fig2S3_overlaps/modERN_modENCODE/modERN_modENCODE_peak_pileup.bw'
bg.to_csv(fp_bg, header=False, index=False, sep='\t')
# convert track to bigWig
!bedGraphToBigWig {fp_bg} shared/ce10.chroms {fp_bw}
!rm {fp_bg}
In [4]:
# modENCODE/modERN TFBS
df_ext200_ = pd.DataFrame()
df_ext200_['chrom'] = df_['summit_chrom']
df_ext200_['start'] = df_['summit_start'] - 200
df_ext200_['end'] = df_['summit_end'] + 200
df_ext200_['name'] = df_['file_accession']
df_ext200_merge_ = BedTool.from_dataframe(df_ext200_.query('chrom != "chrM"')).merge(c='4', o='count').to_dataframe()
fp_ = 'annot/Fig2S3_overlaps/modERN_modENCODE/modERN_modENCODE_ext200_merge.bed'
df_ext200_merge_.to_csv(fp_, sep='\t', index=False, header=False)
!wc -l {fp_}