In [1]:
import NotebookImport
from Setup.Imports import *
In [2]:
from Setup.Read_HIV_Data import *
In [3]:
import Setup.DX_Imports as dx
Set of patients used in analysis of age advancement
In [4]:
import Parallel.Age_HIV_Features as fx
In [5]:
sns.set_context("paper", rc={"lines.linewidth": 2.5,
"font_size":14})
sns.set_style("white")
In [7]:
direction = fx.r4.multi_variate.HIV > 0
direction.name = 'up'
h = fx.g_hiv
h.name = 'HIV'
hiv_d = combine(h, direction)
hiv_d = hiv_d.replace({'both':'HIV-up',
'up': 'neutral',
'neither': 'neutral',
'HIV': 'HIV-down'})
hiv_d.value_counts()
Out[7]:
In [8]:
direction = fx.res.in_set_s1.multi_variate.age > 0
direction.name = 'up'
h = fx.g_age
h.name = 'age'
age_d = combine(h, direction)
age_d = age_d.replace({'both':'age-up',
'up': 'neutral',
'neither': 'neutral',
'age': 'age-down'})
age_d.value_counts()
Out[8]:
In [12]:
features = {'HIV-up': (hiv_d=='HIV-up') & (age_d != 'age-up'),
'HIV-down': (hiv_d=='HIV-down') & (age_d != 'age-down'),
'age-up': (age_d=='age-up') & (hiv_d != 'HIV-up'),
'age-down': (age_d=='age-down') & (hiv_d != 'HIV-down'),
'both-up': (hiv_d=='HIV-up') & (age_d=='age-up'),
'both-down': (hiv_d=='HIV-down') & (age_d=='age-down')}
probes_sets = {'PRC2': dx.prc2_probes, 'CpG island': dx.cpg_island,
'DHS': dx.dhs_site, 'Enhancer': dx.enhancer,
'Gene body': dx.gene_body, 'TSS': dx.gene_tss,
'Promoter': dx.promoter}
In [10]:
c1 = pd.DataFrame({i:v.value_counts() for i,v in features.iteritems()}).T
c1
Out[10]:
In [11]:
c2 = pd.DataFrame({i:v.value_counts() for i,v in probes_sets.iteritems()}).T
c2
Out[11]:
In [13]:
def odds_ratio(a,b):
ct = pd.crosstab(a,b).astype(float)
r = (ct.ix[0,0] * ct.ix[1,1]) / (ct.ix[0,1] * ct.ix[1,0])
return r
In [14]:
ct = pd.crosstab(hiv_d, age_d).ix[['HIV-down','neutral','HIV-up'],
['age-down','neutral','age-up']]
or_ct = pd.DataFrame({a:{b:odds_ratio(hiv_d==a, age_d==b) for b in age_d.unique()}
for a in hiv_d.unique()}).T.ix[ct.index, ct.columns]
or_ct.columns.name = None
or_ct.index.name = None
In [15]:
or_ct
Out[15]:
In [16]:
def overlap_plot(fig, ax):
sns.heatmap(np.log2(or_ct).ix[::-1], linewidths=4, cbar=False, ax=ax, square=True,
center=0)
ax.set_xticklabels([c.get_text().split('-')[-1]
for c in ax.get_xticklabels()], size=14)
ax.set_yticklabels([c.get_text().split('-')[-1]
for c in ax.get_yticklabels()], size=14)
ax.set_xlabel('Validated age markers', size=14, ha='center')
ax.set_ylabel('HIV-associated markers\n', size=14, va='center')
cbar = fig.colorbar(ax.get_children()[2], ticks=[-1.58,-1,0,1,1.58],
ax=ax, use_gridspec=True, pad=.05, aspect=10, shrink=.85)
cbar.ax.set_yticklabels(['1/3','1/2',1,2,3], size=14)
cbar.ax.set_ylabel('Odds Ratio', rotation=270, size=14)
for i,(a,v) in enumerate(ct.iterrows()):
for j,(b,c) in enumerate(v.iteritems()):
if np.log2(or_ct).abs().ix[a,b] > 1:
color = 'white'
else:
color = 'black'
ax.annotate(c, (j+.5, i+.5), ha='center', va='center', fontsize=12,
color=color)
fig, ax = subplots(figsize=(5.5,3))
overlap_plot(fig, ax)
In [32]:
associations = {}
for i,a in features.iteritems():
for j,b in probes_sets.iteritems():
associations[(i,j)] = odds_ratio(a,b.ix[fx.probe_idx])
associations = pd.Series(associations)
In [33]:
associations.unstack()
Out[33]:
In [37]:
o = ['DHS','PRC2','CpG island','Enhancer', 'TSS','Gene body','Promoter']
p = ['HIV-up','both-up', 'age-up']
df1 = associations.unstack().T.ix[o[::-1],p]
o = ['DHS','PRC2','CpG island','Enhancer', 'TSS','Gene body','Promoter']
p = ['HIV-down','both-down', 'age-down']
df2 = associations.unstack().T.ix[o[::-1],p]
In [77]:
cc = sns.color_palette()
def enrichment_plot(df, ax):
np.log2(df).plot(kind='barh', ax=ax, alpha=.9, width=.8,
color=[cc[1], cc[2], cc[3]])
ax.set_xticks([-3, -2,-1,0,1,2,3,4])
ax.set_xticklabels([.125, .25,.5,1,2,4,8,16], size=14)
ax.set_ylim(-.75, 6.72)
ax.set_xlabel('Odds ratio', size=14)
ax.legend().set_visible(False)
ax.spines['left'].set_visible(False)
ax.set_yticklabels([c.get_text().replace('-','\n')
for c in ax.get_yticklabels()], size=14)
for i in range(len(o)):
ax.axhline(i+.5, color='grey', ls='--', lw=1)
ax.legend()
prettify_ax(ax, top=False)
fig, axs = subplots(1,2, figsize=(10,5))
enrichment_plot(df2, axs[0])
axs[0].set_xbound(-2, 3.)
enrichment_plot(df1, axs[1])
axs[1].set_xbound(-2.8, 4.)
fig.tight_layout()
In [75]:
cc = sns.color_palette()[1:]
mm = fx.mm #mean methylation for control patients
def dist_plot_split(splits, ax):
for i,split in enumerate(splits):
draw_dist(mm.ix[ti(split)], ax=ax, colors=cc[i])
d = smooth_dist(mm)
ax.fill_between(d.index, 0, d, color='grey', alpha=.2)
ax.plot(d.index, d, color='grey', lw=2, zorder=-1)
ax.set_ylabel('Density of CpG markers', size=14)
ax.set_yticks([])
ax.set_xlabel('Methylation state (%)', size=14)
ax.set_xticks([0, .2, .4, .6, .8, 1.])
ax.set_xticklabels(['0', '20', '40', '60', '80', '100'],
size=14)
#ax.set_ybound(0,7.2)
ax.set_xbound(0,1)
prettify_ax(ax)
fig, axs = subplots(1,2, figsize=(12,4))
dist_plot_split([features['HIV-down'],features['both-down'],features['age-down']],
axs[0])
dist_plot_split([features['HIV-up'],features['both-up'],features['age-up']],
axs[1])
fig.tight_layout()
In [29]:
import matplotlib.gridspec as gridspec
In [30]:
rcParams['font.sans-serif'] = 'Arial'
In [79]:
plt.figure(figsize=(4.5,14))
gs1 = gridspec.GridSpec(3, 1, height_ratios=[2,3,2], hspace=.3)
#gs1.update(left=0.05, right=0.48, wspace=0.05)
ax1 = plt.subplot(gs1[0])
ax2 = plt.subplot(gs1[1])
ax3 = plt.subplot(gs1[2])
axs = [ax1, ax2, ax3]
overlap_plot(fig, ax1)
enrichment_plot(df1, axs[1])
axs[1].set_xbound(-2.8, 4.)
axs[1].legend(loc='lower right')
dist_plot_split([features['HIV-up'],features['both-up'],features['age-up']],
axs[2])
plt.savefig(FIGDIR + 'fig3_ab_c.pdf')