In [1]:
%run ~/relmapping/annot/notebooks/__init__.ipynb


/mnt/home3/jj374/anaconda36/lib/python3.6/site-packages/statsmodels/compat/pandas.py:56: FutureWarning: The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead.
  from pandas.core import datetools
os.getcwd(): /mnt/b2/scratch/ahringer/jj374/lab/relmapping

In [2]:
# Ni: ForJulie/20180223_modERN_modENCODE_peak_assignment
fp_ = 'annot/Fig2S3_overlaps/20180223_modERN_modENCODE_peak_assignment/ce10.summits.combined.txt'
names_ = ['summit_chrom', 'summit_start', 'summit_end', 'factor_name', 'stage', 'file_accession', 'peak_iv']

def chrom_(s): return s.split(':')[0]
def start_(s): return s.split(':')[1].split('-')[0]
def end_(s): return s.split(':')[1].split('-')[1]

df_ = pd.read_csv(fp_, sep='\t', names=names_)
df_['peak_chrom'] = df_['peak_iv'].map(chrom_)
df_['peak_start'] = df_['peak_iv'].map(start_)
df_['peak_end'] = df_['peak_iv'].map(end_)
df_.head()


Out[2]:
summit_chrom summit_start summit_end factor_name stage file_accession peak_iv peak_chrom peak_start peak_end
0 chrI 3769 3770 elt-3-GFP L3 ENCFF724CLW chrI:3600-3940 chrI 3600 3940
1 chrI 3812 3813 blmp-1-GFP L2 ENCFF697TST chrI:3645-3981 chrI 3645 3981
2 chrI 3820 3821 ZK546.5-GFP L4 ENCFF905WSC chrI:3669-3973 chrI 3669 3973
3 chrI 3821 3822 efl-1-GFP YA ENCFF456YBJ chrI:3701-3893 chrI 3701 3893
4 chrI 3828 3829 rec-8-GFP YA ENCFF721FFG chrI:3614-4044 chrI 3614 4044

In [3]:
# Calculate peak call coverage of all factors
col_ = ['peak_chrom', 'peak_start', 'peak_end']
bg = BedTool.from_dataframe(df_[col_]).genome_coverage(bg=True, g='shared/ce10.chroms').to_dataframe()
fp_bg = 'annot/Fig2S3_overlaps/modERN_modENCODE/modERN_modENCODE_peak_pileup.bedGraph'
fp_bw = 'annot/Fig2S3_overlaps/modERN_modENCODE/modERN_modENCODE_peak_pileup.bw'
bg.to_csv(fp_bg, header=False, index=False, sep='\t')

# convert track to bigWig
!bedGraphToBigWig {fp_bg} shared/ce10.chroms {fp_bw}
!rm {fp_bg}

In [4]:
# modENCODE/modERN TFBS
df_ext200_ = pd.DataFrame()
df_ext200_['chrom'] = df_['summit_chrom']
df_ext200_['start'] = df_['summit_start'] - 200
df_ext200_['end'] = df_['summit_end'] + 200
df_ext200_['name'] = df_['file_accession']
df_ext200_merge_ = BedTool.from_dataframe(df_ext200_.query('chrom != "chrM"')).merge(c='4', o='count').to_dataframe()
fp_ = 'annot/Fig2S3_overlaps/modERN_modENCODE/modERN_modENCODE_ext200_merge.bed'
df_ext200_merge_.to_csv(fp_, sep='\t', index=False, header=False)
!wc -l {fp_}


36389 annot/Fig2S3_overlaps/modERN_modENCODE/modERN_modENCODE_ext200_merge.bed