In [1]:
# initialise and load WB annotations
%run ~/relmapping/annot/notebooks/__init__.ipynb
In [2]:
df_hmm_ee = pd.read_csv('wget/compbio.med.harvard.edu/modencode/webpage/hihmm/iHMM.M1K16.worm_EE.bed', sep='\t', names=yp.NAMES_BED9)
df_hmm_l3 = pd.read_csv('wget/compbio.med.harvard.edu/modencode/webpage/hihmm/iHMM.M1K16.worm_L3.bed', sep='\t', names=yp.NAMES_BED9)
df_hmm_ee['chrom'] = list(map(lambda chr: 'chr' + chr, df_hmm_ee['chrom']))
df_hmm_l3['chrom'] = list(map(lambda chr: 'chr' + chr, df_hmm_l3['chrom']))
In [3]:
df_hmm_ee['strand'] = '.'
df_hmm_l3['strand'] = '.'
In [4]:
fp_ = 'annot/FigB_chromatin_states/Ho2014_EE.bed'
df_hmm_ee.to_csv(fp_, header=False, index=False, sep='\t')
fp_ = 'annot/FigB_chromatin_states/Ho2014_L3.bed'
df_hmm_l3.to_csv(fp_, header=False, index=False, sep='\t')
In [4]:
l_hmm_label = [
'1_Pro',
'2_Enh1',
'3_Enh2',
'4_Egn1',
'5_Egn2',
'6_Egn3',
'7_Egn4',
'8_Egn5',
'9_Egn6',
'10_Rep1',
'11_Rep2',
'12_Het1',
'13_Het2',
'14_Low1',
'15_Low2',
'16_Low3',
#'17_Unmap', # No accessible sites within unmapped sequence (not unreasonable)
]
df_regl = regl_addons()
df_mode = regl_mode()
df_regl['hmm_ee'] = BedTool.from_dataframe(df_mode).map(
b=BedTool.from_dataframe(df_hmm_ee[yp.NAMES_BED9[:4]]),
c=4, o='distinct',
).to_dataframe()['name']
df_regl['hmm_l3'] = BedTool.from_dataframe(df_mode).map(
b=BedTool.from_dataframe(df_hmm_l3[yp.NAMES_BED9[:4]]),
c=4, o='distinct',
).to_dataframe()['name']
In [5]:
def pie_(x, labels, sep='\n', **kwargs):
labels_annot = ['%s%s(%s sites)' % (l_, sep, yp.f_uk(x_)) for l_, x_ in zip(labels, x)]
df_ = pd.Series(x, index=labels_annot)
df_.plot(kind='pie', **kwargs)
plt.figure(figsize=(8,12))
plt.subplot(2,1,1)
vc_ = df_regl['hmm_ee'].value_counts().loc[l_hmm_label]
pie_(
vc_.values,
vc_.index.tolist(),
label='',
autopct='%.1f%%',
sep=' ',
title='all sites by (Ho et al., 2014) EE states',
startangle=90,
fontsize=12,
counterclock=False,
)
plt.axis('equal');
plt.subplot(2,1,2)
vc_ = df_regl['hmm_l3'].value_counts().loc[l_hmm_label]
pie_(
vc_.values,
vc_.index.tolist(),
label='',
autopct='%.1f%%',
sep=' ',
title='all sites by (Ho et al., 2014) L3 states',
startangle=90,
fontsize=12,
counterclock=False,
)
plt.axis('equal');
In [13]:
ct_ = pd.crosstab(
pd.Categorical(df_regl['annot']),
pd.Categorical(df_regl['hmm_ee'])
).loc[config['annot'], l_hmm_label]
fig = plt.figure(figsize=(12, 6))
plt.gca().set_xlabel('Chromatin state that overlaps the accessible site from (Ho et al., 2014), EE stage')
plt.gca().set_ylabel('Annotation of the accessible site')
#yp.imshow_enr(ct_, vmin=-1, vmax=+1)
yp.imshow_rpc(ct_, cmap='Reds', vmin=0, vmax=+40, rotation=70, boundaries=np.linspace(0, 26, 27, endpoint=True))
plt.savefig('annot/FigB_chromatin_states/annot_vs_Ho2014_EE.png', bbox_inches='tight')
In [14]:
ct_ = pd.crosstab(
pd.Categorical(df_regl['annot']),
pd.Categorical(df_regl['hmm_l3'])
).loc[config['annot'], l_hmm_label]
fig = plt.figure(figsize=(12, 6))
plt.gca().set_xlabel('Chromatin state that overlaps the accessible site from (Ho et al., 2014), L3 stage')
plt.gca().set_ylabel('Annotation of the accessible site')
#yp.imshow_enr(ct_, vmin=-1, vmax=+1)
yp.imshow_rpc(ct_, cmap='Reds', vmin=0, vmax=+40, rotation=70, boundaries=np.linspace(0, 33, 34, endpoint=True))
plt.savefig('annot/FigB_chromatin_states/annot_vs_Ho2014_L3.png', bbox_inches='tight')
In [ ]: