notebook.community

Edit and run



In [1]:

    
%run ~/relmapping/annot/notebooks/annot__init__.ipynb









    



/mnt/home3/jj374/anaconda36/lib/python3.6/site-packages/statsmodels/compat/pandas.py:56: FutureWarning: The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead.
  from pandas.core import datetools






    



os.getcwd(): /mnt/beegfs/scratch_copy/ahringer/jj374/lab/relmapping



In [2]:

    
fp_ = 'annot_eLife_full/reg_elements_eLife_full_review_expanded.tsv'
df_regl = pd.read_csv(fp_, sep='\t').rename(columns={
    'chrom': 'chrom_ce10', 'start': 'start_ce10', 'end': 'end_ce10',
    'tss_fwd': 'tss_fwd_ce10', 'tss_rev': 'tss_rev_ce10'}
)

l_atac_peak_pos_ce10 = df_regl[['start_ce10', 'end_ce10']].mean(axis=1).map(int)
l_atac_peak_pos_ce11 = df_regl[['start_ce11', 'end_ce11']].mean(axis=1).map(int)
df_regl['tss_fwd_ce11'] = df_regl['tss_fwd_ce10'] - l_atac_peak_pos_ce10 + l_atac_peak_pos_ce11
df_regl['tss_rev_ce11'] = df_regl['tss_rev_ce10'] - l_atac_peak_pos_ce10 + l_atac_peak_pos_ce11



In [3]:

    
fp_ = 'annot_eLife_revised/20180223_modERN_modENCODE_peak_assignment/regulatory_elements.mid_flk200bp.mod_TF_assignment.with_HOTness.corrected.txt'
df_tf = pd.read_csv(fp_, sep='\t')
assert len(df_tf) == len(df_regl)
df_regl['HOTness'] = df_tf['HOTness']
df_regl['factor_count'] = df_tf['cnt']
df_regl['factor_names'] = df_tf['TF']
pd.crosstab(df_regl['factor_count'], df_regl['HOTness'])









    Out[3]:







  
    
      HOTness
      HOT
      cold
      none
    
    
      factor_count
      
      
      
    
  
  
    
      0
      0
      0
      12854
    
    
      1
      0
      6301
      0
    
    
      2
      0
      3801
      0
    
    
      3
      0
      2473
      0
    
    
      4
      0
      1825
      0
    
    
      5
      0
      1519
      0
    
    
      6
      0
      1275
      0
    
    
      7
      0
      1118
      0
    
    
      8
      0
      866
      0
    
    
      9
      0
      733
      0
    
    
      10
      0
      644
      0
    
    
      11
      0
      511
      0
    
    
      12
      0
      455
      0
    
    
      13
      0
      434
      0
    
    
      14
      0
      359
      0
    
    
      15
      0
      322
      0
    
    
      16
      0
      318
      0
    
    
      17
      0
      280
      0
    
    
      18
      0
      263
      0
    
    
      19
      247
      0
      0
    
    
      20
      227
      0
      0
    
    
      21
      215
      0
      0
    
    
      22
      206
      0
      0
    
    
      23
      206
      0
      0
    
    
      24
      175
      0
      0
    
    
      25
      157
      0
      0
    
    
      26
      159
      0
      0
    
    
      27
      146
      0
      0
    
    
      28
      125
      0
      0
    
    
      29
      130
      0
      0
    
    
      ...
      ...
      ...
      ...
    
    
      138
      6
      0
      0
    
    
      139
      8
      0
      0
    
    
      140
      10
      0
      0
    
    
      141
      4
      0
      0
    
    
      142
      6
      0
      0
    
    
      143
      8
      0
      0
    
    
      144
      9
      0
      0
    
    
      145
      4
      0
      0
    
    
      146
      7
      0
      0
    
    
      147
      6
      0
      0
    
    
      148
      3
      0
      0
    
    
      149
      5
      0
      0
    
    
      150
      1
      0
      0
    
    
      151
      2
      0
      0
    
    
      152
      7
      0
      0
    
    
      153
      5
      0
      0
    
    
      154
      3
      0
      0
    
    
      155
      4
      0
      0
    
    
      156
      1
      0
      0
    
    
      157
      2
      0
      0
    
    
      158
      1
      0
      0
    
    
      159
      1
      0
      0
    
    
      160
      1
      0
      0
    
    
      162
      1
      0
      0
    
    
      164
      2
      0
      0
    
    
      165
      3
      0
      0
    
    
      166
      2
      0
      0
    
    
      169
      1
      0
      0
    
    
      170
      1
      0
      0
    
    
      173
      1
      0
      0
    
  

168 rows × 3 columns



In [4]:

    
fp_ = 'annot_eLife_revised/Figure 1-source data 1. Accessible sites.txt'
col_ = ['chrom_ce10', 'start_ce10', 'end_ce10'] \
     + ['atac_%s_height' % (stage,) for stage in config['stages']] \
     + ['atac_source']
df_regl[col_].to_csv(fp_, header=True, index=False, sep='\t')



In [5]:

    
fp_ = 'annot_eLife_revised/Figure 2-source data 1. Regulatory annotation.txt'
col_ = ['chrom_ce10', 'start_ce10', 'end_ce10', 'chrom_ce11', 'start_ce11', 'end_ce11',
    'annot', 'annot_fwd', 'annot_rev', 
    'promoter_gene_id_fwd', 'promoter_locus_id_fwd', 'promoter_gene_biotype_fwd',
    'promoter_gene_id_rev', 'promoter_locus_id_rev', 'promoter_gene_biotype_rev', 
    'associated_gene_id', 'associated_locus_id',
    'tss_fwd_ce10', 'tss_rev_ce10', 'tss_fwd_ce11', 'tss_rev_ce11',
    'scap_fwd_passed', 'scap_rev_passed',] \
    + ['lcap_%s_fwd_passed_jump' % (stage,) for stage in config['stages']] \
    + ['lcap_%s_fwd_passed_incr' % (stage,) for stage in config['stages']] \
    + ['lcap_%s_rev_passed_jump' % (stage,) for stage in config['stages']] \
    + ['lcap_%s_rev_passed_incr' % (stage,) for stage in config['stages']]
df_regl[col_].to_csv(fp_, header=True, index=False, sep='\t')



In [6]:

    
fp_ = 'annot_eLife_revised/Figure 4-source data 1. Promoter accessibility.txt'
col_ = [
    'chrom_ce10', 'start_ce10', 'end_ce10', 
    'devel_is_dynamic', 'ageing_is_dynamic', 'devel_cluster_label', 'ageing_cluster_label',
    'HOTness', 'factor_count', 'factor_names',
]
df_regl[col_].to_csv(fp_, header=True, index=False, sep='\t')



In [7]:

    
fp_ = 'annot_eLife_revised/20180223_modERN_modENCODE_peak_assignment/ce10.mod_factors_datasets.txt'
df_ = pd.read_csv(fp_, sep='\t', names=['factor', 'dataset_name', 'dataset_id'])

fp_ = 'annot_eLife_revised/Figure 5-source data 1. TF datasets.txt'
df_.to_csv(fp_, header=True, index=False, sep='\t')

HOTness	HOT	cold	none
factor_count
0	0	0	12854
1	0	6301	0
2	0	3801	0
3	0	2473	0
4	0	1825	0
5	0	1519	0
6	0	1275	0
7	0	1118	0
8	0	866	0
9	0	733	0
10	0	644	0
11	0	511	0
12	0	455	0
13	0	434	0
14	0	359	0
15	0	322	0
16	0	318	0
17	0	280	0
18	0	263	0
19	247	0	0
20	227	0	0
21	215	0	0
22	206	0	0
23	206	0	0
24	175	0	0
25	157	0	0
26	159	0	0
27	146	0	0
28	125	0	0
29	130	0	0
...	...	...	...
138	6	0	0
139	8	0	0
140	10	0	0
141	4	0	0
142	6	0	0
143	8	0	0
144	9	0	0
145	4	0	0
146	7	0	0
147	6	0	0
148	3	0	0
149	5	0	0
150	1	0	0
151	2	0	0
152	7	0	0
153	5	0	0
154	3	0	0
155	4	0	0
156	1	0	0
157	2	0	0
158	1	0	0
159	1	0	0
160	1	0	0
162	1	0	0
164	2	0	0
165	3	0	0
166	2	0	0
169	1	0	0
170	1	0	0
173	1	0	0