In [1]:
%run ~/relmapping/annot/notebooks/__init__.ipynb
In [2]:
# Read states
fp_ee_auto = 'wget/www.pnas.org/content/suppl/2016/10/25/1608162113.DCSupplemental/EE_autosome_states.csv'
fp_ee_chrx = 'wget/www.pnas.org/content/suppl/2016/10/25/1608162113.DCSupplemental/EE_chrX_states.csv'
fp_l3_auto = 'wget/www.pnas.org/content/suppl/2016/10/25/1608162113.DCSupplemental/L3_autosome_states.csv'
fp_l3_chrx = 'wget/www.pnas.org/content/suppl/2016/10/25/1608162113.DCSupplemental/L3_chrX_states.csv'
df_ee_auto = pd.read_csv(fp_ee_auto, names=['chrom', 'start', 'end', 'state_id'])
df_ee_auto['chrom'] = df_ee_auto['chrom'].map(lambda chrom: 'chr' + chrom)
df_l3_auto = pd.read_csv(fp_l3_auto, names=['chrom', 'start', 'end', 'state_id'])
df_l3_auto['chrom'] = df_l3_auto['chrom'].map(lambda chrom: 'chr' + chrom)
print(len(df_ee_auto), 'EE autosomal states')
print(len(df_l3_auto), 'L3 autosomal states')
ee_statekey = collections.OrderedDict([
(1, "Promoter I"),
(2, "5' proximal and gene body"),
(3, "Txn elongation I: exon, 3'"),
(4, "Txn elongation II: exon and intron"),
(5, "Txn elongation III: exon and gene end"),
(6, "Txn elongation IV: low expression and repeats"),
(7, "Txn elongation V: introns and repeats"),
(8, "wk Promoter, enhancer, miRNAs"),
(9, "Enhancer II: intergenic"),
(10, "Enhancer III: weak"),
(11, "Border"),
(12, "Repeats, intergenic, low expr introns"),
(13, "Retrotransposons, pseudogenes, H3K9me3, H3K27me3"),
(14, "Mixed, tissue specific"),
(15, "Repeats, RNA pseudogenes, H3K9me2"),
(16, "Intergenic, silent genes, piRNAs"),
(17, "Pc/H3K27me3 I: low expr/silent and pseudogenes"),
(18, "Pc/H3K27me3 II: low expr/silent, intergenic and gene body"),
(19, "Pc/H3K27me3 III: low expr/silent, gene body"),
(20, "H3K9me3 and H3K27me3: silent genes and pseudogenes"),
])
l3_statekey = collections.OrderedDict([
(1, "Promoter"),
(2, "5' proximal and gene body"),
(3, "Txn elongation I: exon"),
(4, "Txn elongation II: exon and intron"),
(5, "Txn elongation III: exon and gene end"),
(6, "Txn elongation IV: low expression and repeats"),
(7, "Txn elongation V: introns and repeats"),
(8, "Enhancer I: intronic"),
(9, "Enhancer II: intergenic"),
(10, "Enhancer III: weak"),
(11, "Border"),
(12, "Repeats, low expr introns, AT rich"),
(13, "Retrotransposons, pseudogenes, H3K9me3, H3K27me3"),
(14, "Mixed, tissue specific"),
(15, "Repeats, RNA pseudogenes, H3K9me2"),
(16, "Intergenic, silent genes, piRNAs amd repeats"),
(17, "Pc/H3K27me3 I: low expr/silent and pseudogenes"),
(18, "Pc/H3K27me3 II: low expr/silent"),
(19, "Pc/H3K27me3 III: low expr/silent, gene body"),
(20, "H3K9me3 and H3K27me3: silent genes and pseudogenes"),
])
colourkey = collections.OrderedDict([
(1, '255,0,0'),
(2, '234,155,86'),
(3, '55,126,34'),
(4, '124,146,72'),
(5, '200,214,161'),
(6, '117,251,142'),
(7, '213,254,125'),
(8, '234,51,127'),
(9, '237,120,203'),
(10, '244,185,195'),
(11, '117,20,245'),
(12, '0,0,245'),
(13, '97,140,207'),
(14, '120,67,21'),
(15, '188,203,226'),
(16, '128,128,128'),
(17, '219,217,198'),
(18, '195,189,155'),
(19, '146,138,91'),
(20, '13,13,13'),
])
df_ee_auto['state'] = df_ee_auto['state_id'].map(lambda state_id: ee_statekey[state_id])
df_l3_auto['state'] = df_l3_auto['state_id'].map(lambda state_id: l3_statekey[state_id])
df_ee_auto['colour'] = df_ee_auto['state_id'].map(lambda state_id: colourkey[state_id])
df_l3_auto['colour'] = df_l3_auto['state_id'].map(lambda state_id: colourkey[state_id])
In [4]:
df_ee_auto.to_csv('annot/FigB_chromatin_states/Evans2016_EE.tsv', sep='\t', index=False)
df_l3_auto.to_csv('annot/FigB_chromatin_states/Evans2016_L3.tsv', sep='\t', index=False)
write_gffbed('annot/FigB_chromatin_states/Evans2016_EE.bed',
chrom = df_ee_auto['chrom'],
start = df_ee_auto['start'],
end = df_ee_auto['end'],
name = df_ee_auto['state'],
attr = df_ee_auto[['state_id', 'state', 'colour']],
itemRgb = df_ee_auto['colour'],
)
write_gffbed('annot/FigB_chromatin_states/Evans2016_L3.bed',
chrom = df_l3_auto['chrom'],
start = df_l3_auto['start'],
end = df_l3_auto['end'],
name = df_l3_auto['state'],
attr = df_l3_auto[['state_id', 'state', 'colour']],
itemRgb = df_l3_auto['colour'],
)
In [4]:
# Assign autosomal peaks to states
df_regl = regl_addons().query("chrom != 'chrX'")
df_mode = regl_mode().query("chrom != 'chrX'")
df_regl['state_ee'] = BedTool.from_dataframe(df_mode).map(
b=BedTool.from_dataframe(df_ee_auto[['chrom', 'start', 'end', 'state']]),
c=4, o='distinct',
).to_dataframe()['name']
df_regl['state_l3'] = BedTool.from_dataframe(df_mode).map(
b=BedTool.from_dataframe(df_l3_auto[['chrom', 'start', 'end', 'state']]),
c=4, o='distinct',
).to_dataframe()['name']
In [5]:
ct_ = pd.crosstab(
pd.Categorical(df_regl['annot']),
pd.Categorical(df_regl['state_ee'])
).loc[config['annot'], list(ee_statekey.values())]
fig = plt.figure(figsize=(16, 5))
plt.gca().set_xlabel('Chromatin state that overlaps the accessible site in (Evans et al., 2016), EE stage')
plt.gca().set_ylabel('Annotation of the accessible site')
yp.imshow_rpc(ct_, cmap='Reds', vmin=0, vmax=+40, rotation=87, boundaries=np.linspace(0, 22, 23, endpoint=True))
plt.savefig('annot/FigB_chromatin_states/annot_vs_Evans2016_EE.png', bbox_inches='tight')
In [6]:
ct_ = pd.crosstab(
pd.Categorical(df_regl['annot']),
pd.Categorical(df_regl['state_l3'])
).loc[config['annot'], list(l3_statekey.values())]
fig = plt.figure(figsize=(16, 5))
plt.gca().set_xlabel('Chromatin state that overlaps the accessible site in (Evans et al., 2016), L3 stage')
plt.gca().set_ylabel('Annotation of the accessible site')
yp.imshow_rpc(ct_, cmap='Reds', vmin=0, vmax=+40, rotation=87, boundaries=np.linspace(0, 30, 31, endpoint=True))
plt.savefig('annot/FigB_chromatin_states/annot_vs_Evans2016_L3.png', bbox_inches='tight')
In [ ]: