In [1]:
%run ~/relmapping/annot/notebooks/__init__.ipynb


/mnt/home3/jj374/anaconda36/lib/python3.6/site-packages/statsmodels/compat/pandas.py:56: FutureWarning: The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead.
  from pandas.core import datetools
os.getcwd(): /mnt/beegfs/scratch_copy/ahringer/jj374/lab/relmapping

In [2]:
def df_imshow(df_, ax=None, row_labels=None, col_labels=None, vmin=-0.6, vmax=+0.6, rotation=70, *args, **kwargs):
    if ax is None: ax = plt.gca()
    if row_labels is None: row_labels = df_.index
    if col_labels is None: col_labels = df_.columns
    ax.imshow(df_, cmap='coolwarm', interpolation='nearest', vmin=vmin, vmax=vmax, *args, **kwargs)
    ax.xaxis.tick_top()
    ax.set_xticks(range(len(col_labels)))
    ax.set_yticks(range(len(row_labels)))
    ax.set_xticklabels(col_labels, rotation=rotation)
    ax.set_yticklabels(row_labels)
    for (y, x), c in np.ndenumerate(df_):
        ax.text(x, y, '%.3f' % (c,), color='k', horizontalalignment='center', verticalalignment='center')

def dnase_mnase_staging(libid):
    fp = 'annot_Apr27/Fig2D2_regulatory_annotation_Apr27.tsv'
    #df = pd.read_csv(fp, sep='\t')[yp.NAMES_BED3].sample(1000).sort_values(yp.NAMES_BED3).reset_index(drop=True)
    df = pd.read_csv(fp, sep='\t')[yp.NAMES_BED3].sort_values(yp.NAMES_BED3).reset_index(drop=True)
    #df.head()
    
    # Peak heights for every hypersensitive site
    for stage in itertools.islice(config['stages_wt'], None):
        fp_ = 'atac814_geo/tracks/atac_%s.bw' % (stage,)
        print(stage, os.path.isfile(fp_))
        df['atac_%(stage)s' % locals()] = list(map(np.nanmax, yp.read_regions(fp_, 
                                            df.chrom.tolist(), df.start.tolist(), df.end.tolist())))

    def df_geoid(geoid):
        df_ = pd.read_csv('dnase_mnase819_geo/dnase_mnase_geo1_samples.tsv', sep='\t')
        return df_[df_.raw_file_1.str.startswith(geoid)].reset_index(drop=True)

    for i, r in df_geoid(libid).iterrows():
        step_ = 'tg_pe.bwa_pe.rm_unmapped_pe.rm_chrM.rm_blacklist.rm_q10.macs2_pe_lt300'
        fp = pf(r['bid'], step_, '_treat_pileup.bw', 'dnase_mnase')
        print(r['title'], r['bid'], os.path.isfile(fp))
        df[r['title']] = list(map(np.nanmax, yp.read_regions(fp, df.chrom.tolist(), df.start.tolist(), df.end.tolist())))

    df_ = collections.OrderedDict()
    for atac_stage in df.columns[3:3+6]:
        df_[atac_stage] = collections.OrderedDict()
        for dnase_sample in df_geoid(libid)['title']:
            corr_ = sp.stats.kendalltau(
                df[atac_stage],
                df[dnase_sample]
            ).correlation
            df_[atac_stage][dnase_sample] = corr_

    df_ct = pd.DataFrame.from_dict(df_, orient='columns').loc[df_geoid(libid)['title'].tolist()]
    #df_ct

    fp_ = 'dnase_mnase819_geo/staging/%s.staging.pdf' % (libid,)
    fig = plt.figure(figsize=(12, 12))
    plt.gca().set_xlabel('ATAC-seq stage')
    plt.gca().set_ylabel('DNase/MNase sample')
    df_imshow(df_ct)
    plt.savefig(fp_, dpi=600, transparent=True, bbox_inches='tight')

#for libid in config['dnase_mnase819_rep'].keys():
#    dnase_mnase_staging(libid)
pmap(dnase_mnase_staging, config['dnase_mnase819_rep'].keys(), n_jobs=10)


wt_emb True
wt_emb True
wt_emb True
wt_emb True
wt_emb True
wt_emb True
wt_emb True
wt_emb True
wt_emb True
wt_emb True
wt_l1 True
wt_l1 True
wt_l1 True
wt_l1 True
wt_l1 True
wt_l1 True
wt_l1 True
wt_l1 True
wt_l1 True
wt_l1 True
wt_l2 True
wt_l2 True
wt_l2 True
wt_l2 True
wt_l2 True
wt_l2 True
wt_l2 True
wt_l2 True
wt_l2 True
wt_l2 True
wt_l3 True
wt_l3 True
wt_l3 True
wt_l4 True
wt_l3 True
wt_l3 True
wt_l3 True
wt_l3 True
wt_l3 True
wt_l3 True
wt_l3 True
wt_ya True
wt_l4 True
wt_l4 True
wt_l4 True
wt_l4 True
wt_l4 True
wt_l4 True
wt_l4 True
wt_l4 True
wt_l4 True
wt_L3_DNase-seq_rep1_2.5U_ml mpg130809_4064 True
wt_ya True
wt_ya True
wt_ya True
wt_ya True
wt_ya True
wt_ya True
wt_ya True
wt_ya True
wt_ya True
wt_L3_DNase-seq_rep1_10U_ml mpg130809_4065 True
wt_L2_DNase-seq_rep1_2.5U_ml mpg150203_6307 True
wt_emb_DNase-seq_rep2_2.5U_ml mpg130828_4176 True
wt_L3_DNase-seq_rep2_2.5U_ml mpg130919_4279 True
wt_emb_DNase-seq_rep1_2.5U_ml mpg130828_4167 True
wt_L4_DNase-seq_rep1_2.5U_ml mpg150203_6318 True
wt_L1_DNase-seq_rep1_2.5U_ml mpg140522_5035 True
wt_L2_DNase-seq_rep2_2.5U_ml mpg150521_6953 True
wt_L4_DNase-seq_rep2_2.5U_ml mpg150203_6329 True
wt_L1_DNase-seq_rep2_2.5U_ml mpg150603_7043 True
wt_L3_DNase-seq_rep1_25U_ml mpg130809_4066 True
wt_L4_DNase-seq_rep1_5U_ml mpg150203_6319 True
wt_L2_DNase-seq_rep1_5U_ml mpg150203_6308 True
wt_L1_DNase-seq_rep1_5U_ml mpg140522_5036 True
wt_emb_DNase-seq_rep1_5U_ml mpg130828_4168 True
wt_emb_DNase-seq_rep2_5U_ml mpg130828_4177 True
wt_L3_DNase-seq_rep1_50U_ml mpg130809_4067 True
wt_L4_DNase-seq_rep2_5U_ml mpg150203_6330 True
wt_L3_DNase-seq_rep2_5U_ml mpg130919_4280 True
wt_L2_DNase-seq_rep2_5U_ml mpg150521_6954 True
wt_L1_DNase-seq_rep2_5U_ml mpg150603_7044 True
wt_L4_DNase-seq_rep1_10U_ml mpg150203_6320 True
wt_L2_DNase-seq_rep1_10U_ml mpg150203_6309 True
wt_L3_DNase-seq_rep1_100U_ml mpg130809_4068 True
wt_emb_DNase-seq_rep1_10U_ml mpg130828_4169 True
wt_L1_DNase-seq_rep1_10U_ml mpg140522_5037 True
wt_emb_DNase-seq_rep2_10U_ml mpg130705_3279 True
wt_L2_DNase-seq_rep2_10U_ml mpg150521_6955 True
wt_L3_DNase-seq_rep2_10U_ml mpg130919_4281 True
wt_L4_DNase-seq_rep2_10U_ml mpg150203_6331 True
wt_L1_DNase-seq_rep2_10U_ml mpg150603_7045 True
wt_L4_DNase-seq_rep1_25U_ml mpg150203_6321 True
wt_L2_DNase-seq_rep1_25U_ml mpg150203_6310 True
wt_L3_DNase-seq_rep2_25U_ml mpg130919_4282 True
wt_L3_DNase-seq_rep1_200U_ml mpg130809_4070 True
wt_emb_DNase-seq_rep2_25U_ml mpg130705_3280 True
wt_L1_DNase-seq_rep1_25U_ml mpg140522_5038 True
wt_emb_DNase-seq_rep1_25U_ml mpg130828_4170 True
wt_L2_DNase-seq_rep2_25U_ml mpg150521_6956 True
wt_L4_DNase-seq_rep2_25U_ml mpg150203_6332 True
wt_L1_DNase-seq_rep2_25U_ml mpg150603_7046 True
wt_L2_DNase-seq_rep1_50U_ml mpg150203_6311 True
wt_L4_DNase-seq_rep1_50U_ml mpg150203_6322 True
wt_L3_DNase-seq_rep1_400U_ml mpg130809_4071 True
wt_L3_DNase-seq_rep2_50U_ml mpg130919_4283 True
wt_L4_DNase-seq_rep2_50U_ml mpg150203_6333 True
wt_L2_DNase-seq_rep2_50U_ml mpg150521_6957 True
wt_emb_DNase-seq_rep1_50U_ml mpg130828_4171 True
wt_L1_DNase-seq_rep1_50U_ml mpg140522_5039 True
wt_emb_DNase-seq_rep2_50U_ml mpg130705_3281 True
wt_L2_DNase-seq_rep1_100U_ml mpg150203_6312 True
wt_L1_DNase-seq_rep2_50U_ml mpg150603_7047 True
wt_L4_DNase-seq_rep1_100U_ml mpg150203_6323 True
wt_L3_DNase-seq_rep1_800U_ml mpg130809_4072 True
wt_L4_DNase-seq_rep2_100U_ml mpg150203_6337 True
wt_L2_DNase-seq_rep2_100U_ml mpg150521_6958 True
wt_emb_DNase-seq_rep2_100U_ml mpg130705_3282 True
wt_L3_DNase-seq_rep2_100U_ml mpg130919_4284 True
wt_emb_DNase-seq_rep1_100U_ml mpg130828_4172 True
wt_L1_DNase-seq_rep1_100U_ml mpg140522_5040 True
wt_L4_DNase-seq_rep1_200U_ml mpg150203_6324 True
wt_L2_DNase-seq_rep1_200U_ml mpg150203_6313 True
wt_L1_DNase-seq_rep2_100U_ml mpg150603_7048 True
wt_L4_DNase-seq_rep2_200U_ml mpg150203_6334 True
wt_L2_DNase-seq_rep2_200U_ml mpg150521_6959 True
wt_emb True
wt_L3_DNase-seq_rep2_200U_ml mpg130919_4285 True
wt_L4_DNase-seq_rep1_400U_ml mpg150203_6325 True
wt_L2_DNase-seq_rep1_400U_ml mpg150203_6314 True
wt_emb_DNase-seq_rep1_200U_ml mpg130828_4173 True
wt_L1_DNase-seq_rep1_200U_ml mpg140522_5041 True
wt_emb_DNase-seq_rep2_200U_ml mpg130705_3283 True
wt_L1_DNase-seq_rep2_200U_ml mpg150603_7049 True
wt_L4_DNase-seq_rep2_400U_ml mpg150203_6335 True
wt_l1 True
wt_L2_DNase-seq_rep2_400U_ml mpg150521_6960 True
wt_L2_DNase-seq_rep1_800U_ml mpg150203_6315 True
wt_L4_DNase-seq_rep1_800U_ml mpg150203_6326 True
wt_L3_DNase-seq_rep2_400U_ml mpg130919_4286 True
wt_emb_DNase-seq_rep1_400U_ml mpg130828_4174 True
wt_L1_DNase-seq_rep1_400U_ml mpg140522_5042 True
wt_L4_DNase-seq_rep2_800U_ml mpg150203_6336 True
wt_L1_DNase-seq_rep2_400U_ml mpg150603_7050 True
wt_emb_DNase-seq_rep2_400U_ml mpg130705_3284 True
wt_l2 True
wt_L2_DNase-seq_rep2_800U_ml mpg150521_6961 True
wt_L3_DNase-seq_rep2_800U_ml mpg130919_4287 True
wt_L1_DNase-seq_rep2_800U_ml mpg150603_7051 True
wt_L1_DNase-seq_rep1_800U_ml mpg140522_5043 True
wt_emb_DNase-seq_rep2_800U_ml mpg130705_3285 True
wt_l3 True
wt_emb_DNase-seq_rep1_800U_ml mpg130828_4175 True
wt_emb True
wt_emb True
wt_emb True
wt_l4 True
wt_l1 True
wt_l1 True
wt_l1 True
wt_ya True
wt_l2 True
wt_l2 True
wt_YA_DNase-seq_rep1_2.5U_ml mpg140401_4970 True
wt_l2 True
wt_l3 True
wt_l3 True
wt_YA_DNase-seq_rep1_10U_ml mpg140401_4971 True
wt_l3 True
wt_l4 True
wt_YA_DNase-seq_rep1_25U_ml mpg140401_4972 True
wt_l4 True
wt_l4 True
wt_YA_DNase-seq_rep1_50U_ml mpg140401_4973 True
wt_ya True
wt_ya True
wt_ya True
wt_YA_DNase-seq_rep1_100U_ml mpg140401_lane8 True
wt_YA_DNase-seq_rep2_2.5U_ml mpg150603_7023 True
wt_emb_MNase-seq_rep1_0.1U_ml mpg130809_4054 True
wt_YA_DNase-seq_rep1_200U_ml mpg140401_4975 True
wt_emb_MNase-seq_rep2_0.025U_ml mpg141028_5513 True
wt_YA_DNase-seq_rep2_5U_ml mpg150603_7024 True
wt_YA_DNase-seq_rep1_400U_ml mpg140401_4976 True
wt_emb_MNase-seq_rep1_0.25U_ml mpg130809_4055 True
wt_emb_MNase-seq_rep2_0.05U_ml mpg141028_5514 True
wt_YA_DNase-seq_rep1_800U_ml mpg140401_4977 True
wt_YA_DNase-seq_rep2_10U_ml mpg150603_7025 True
wt_emb_MNase-seq_rep1_0.5U_ml mpg130809_4056 True
wt_emb_MNase-seq_rep2_0.1U_ml mpg141028_5515 True
wt_YA_DNase-seq_rep2_25U_ml mpg150603_7026 True
wt_emb_MNase-seq_rep1_1U_ml mpg130809_4057 True
wt_emb_MNase-seq_rep2_0.25U_ml mpg141028_5516 True
wt_YA_DNase-seq_rep2_50U_ml mpg150603_7027 True
wt_emb_MNase-seq_rep1_4U_ml mpg130809_4058 True
wt_emb_MNase-seq_rep2_0.5U_ml mpg141028_5517 True
wt_YA_DNase-seq_rep2_100U_ml mpg150603_7028 True
wt_emb_MNase-seq_rep1_16U_ml mpg130809_4061 True
wt_emb_MNase-seq_rep2_1U_ml mpg141028_5518 True
wt_YA_DNase-seq_rep2_200U_ml mpg150603_7029 True
wt_YA_DNase-seq_rep2_400U_ml mpg150603_7030 True
wt_emb_MNase-seq_rep2_4U_ml mpg141028_5519 True
wt_YA_DNase-seq_rep2_800U_ml mpg150603_7031 True
wt_emb_MNase-seq_rep2_8U_ml mpg141028_5520 True
wt_emb_MNase-seq_rep2_16U_ml mpg141028_5521 True
[Parallel(n_jobs=10)]: Done  14 out of  14 | elapsed: 15.6min finished
Out[2]:
[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None]