In [1]:
%run ~/relmapping/annot/notebooks/annot__init__.ipynb
In [2]:
fp_ = 'annot_eLife_full/reg_elements_eLife_full_review_expanded.tsv'
df_regl = pd.read_csv(fp_, sep='\t').rename(columns={
'chrom': 'chrom_ce10', 'start': 'start_ce10', 'end': 'end_ce10',
'tss_fwd': 'tss_fwd_ce10', 'tss_rev': 'tss_rev_ce10'}
)
l_atac_peak_pos_ce10 = df_regl[['start_ce10', 'end_ce10']].mean(axis=1).map(int)
l_atac_peak_pos_ce11 = df_regl[['start_ce11', 'end_ce11']].mean(axis=1).map(int)
df_regl['tss_fwd_ce11'] = df_regl['tss_fwd_ce10'] - l_atac_peak_pos_ce10 + l_atac_peak_pos_ce11
df_regl['tss_rev_ce11'] = df_regl['tss_rev_ce10'] - l_atac_peak_pos_ce10 + l_atac_peak_pos_ce11
In [3]:
fp_ = 'annot_eLife_revised/20180223_modERN_modENCODE_peak_assignment/regulatory_elements.mid_flk200bp.mod_TF_assignment.with_HOTness.corrected.txt'
df_tf = pd.read_csv(fp_, sep='\t')
assert len(df_tf) == len(df_regl)
df_regl['HOTness'] = df_tf['HOTness']
df_regl['factor_count'] = df_tf['cnt']
df_regl['factor_names'] = df_tf['TF']
pd.crosstab(df_regl['factor_count'], df_regl['HOTness'])
Out[3]:
In [4]:
fp_ = 'annot_eLife_revised/Figure 1-source data 1. Accessible sites.txt'
col_ = ['chrom_ce10', 'start_ce10', 'end_ce10'] \
+ ['atac_%s_height' % (stage,) for stage in config['stages']] \
+ ['atac_source']
df_regl[col_].to_csv(fp_, header=True, index=False, sep='\t')
In [5]:
fp_ = 'annot_eLife_revised/Figure 2-source data 1. Regulatory annotation.txt'
col_ = ['chrom_ce10', 'start_ce10', 'end_ce10', 'chrom_ce11', 'start_ce11', 'end_ce11',
'annot', 'annot_fwd', 'annot_rev',
'promoter_gene_id_fwd', 'promoter_locus_id_fwd', 'promoter_gene_biotype_fwd',
'promoter_gene_id_rev', 'promoter_locus_id_rev', 'promoter_gene_biotype_rev',
'associated_gene_id', 'associated_locus_id',
'tss_fwd_ce10', 'tss_rev_ce10', 'tss_fwd_ce11', 'tss_rev_ce11',
'scap_fwd_passed', 'scap_rev_passed',] \
+ ['lcap_%s_fwd_passed_jump' % (stage,) for stage in config['stages']] \
+ ['lcap_%s_fwd_passed_incr' % (stage,) for stage in config['stages']] \
+ ['lcap_%s_rev_passed_jump' % (stage,) for stage in config['stages']] \
+ ['lcap_%s_rev_passed_incr' % (stage,) for stage in config['stages']]
df_regl[col_].to_csv(fp_, header=True, index=False, sep='\t')
In [6]:
fp_ = 'annot_eLife_revised/Figure 4-source data 1. Promoter accessibility.txt'
col_ = [
'chrom_ce10', 'start_ce10', 'end_ce10',
'devel_is_dynamic', 'ageing_is_dynamic', 'devel_cluster_label', 'ageing_cluster_label',
'HOTness', 'factor_count', 'factor_names',
]
df_regl[col_].to_csv(fp_, header=True, index=False, sep='\t')
In [7]:
fp_ = 'annot_eLife_revised/20180223_modERN_modENCODE_peak_assignment/ce10.mod_factors_datasets.txt'
df_ = pd.read_csv(fp_, sep='\t', names=['factor', 'dataset_name', 'dataset_id'])
fp_ = 'annot_eLife_revised/Figure 5-source data 1. TF datasets.txt'
df_.to_csv(fp_, header=True, index=False, sep='\t')