In [1]:
import NotebookImport
from Setup.Imports import *
In [2]:
from Setup.Read_HIV_Data import *
In [3]:
import Setup.DX_Imports as dx
Set of patients used in analysis of age advancement
In [4]:
import Parallel.Age_HIV_Features as fx
In [5]:
sns.set_context("paper", rc={"lines.linewidth": 2.5,
"font_size":14})
sns.set_style("white")
In [6]:
keepers = fx.pred_c.index
In [7]:
mm = fx.mm
def disorder_frac(direction, beta, test_set):
ct = pd.crosstab(beta < .5, direction.ix[test_set]>0)
return (1.*ct[1][1] + ct[0][0]) / ct.sum().sum()
In [8]:
model_idx = fx.hannum_model.index.union(fx.horvath_model.index)
hiv_idx = ti(fx.g_hiv)
age_idx = ti(fx.g_age)
probe_sets = {'All': probe_idx, 'Models': model_idx, 'HIV': hiv_idx}
entropy_r = pd.Series({i: disorder_frac(fx.r4.multi_variate.HIV, mm, s) for i,s in
probe_sets.iteritems()})
probe_sets = {'All': probe_idx, 'Models': model_idx, 'Age': age_idx}
entropy_r2 = pd.Series({i: disorder_frac(fx.res.in_set_s1.multi_variate.age, mm, s) for i,s in
probe_sets.iteritems()})
In [9]:
entropy_r
Out[9]:
In [10]:
entropy_r2
Out[10]:
Here I am explicitly looking for things that trend away from 50% methylation.
In [11]:
ann = dx.probe_annotations.sort(['Chromosome','Genomic_Coordinate'])
In [12]:
t_hiv = dx.ttest_df(fx.hiv == 'HIV+', fx.df_hiv)
In [13]:
draw_dist(t_hiv.abs(), fx.g_hiv)
In [14]:
a,b = match_series((fx.mm > .5), (fx.r4.multi_variate.HIV < 0))
a,b = match_series(a,b)
direction = a == b
hiv_bh = fx.g_hiv
hiv_bh = (hiv_bh & (direction == False) & (fx.g_age == False)).dropna()
hiv_bh = hiv_bh.ix[probe_idx].ix[ann.index].dropna()
#hiv_bh = hiv_bh & (t_hiv.abs() > 1)
hiv_bh.value_counts()
Out[14]:
In [15]:
def manhattan(vec, chrom, coords, ax=None, ybound=None,
flow='up', ticks=True, gap=3e7):
fig, ax = init_ax(ax, figsize=(9,3))
x = 0
chr_coords = []
for i,c in enumerate(map(str, range(1,23))):
v = vec.ix[ti(chrom == c)].dropna()
series_scatter(coords + x, v, s=10, ax=ax,
color=colors[i % 5],
ann=None, alpha=1, rasterized=True)
chr_len = coords.ix[v.index].max()
x = x + chr_len + gap
chr_coords += [x - (chr_len / 2.)]
ax.set_xbound(gap, x + gap)
if ybound is not None:
ax.set_ybound(ybound[0],ybound[1])
ax.set_xlabel('Chromosome')
if ticks:
ax.set_xticks(chr_coords)
ax.set_xticklabels(map(str, range(1,23)))
else:
ax.set_xticks([])
top = flow == 'down'
prettify_ax(ax, top)
In [16]:
hiv_bh.name = 'hit'
ann2 = ann.copy()
ann2['GC'] = np.floor(ann2.Genomic_Coordinate / 1000000)
ann2 = ann2[ann2.Chromosome.isin(map(str, range(23)))]
ann2.Chromosome = ann2.Chromosome.astype(int)
ss = ann2.groupby(['Chromosome','GC']).size()
gb = ann2.join(hiv_bh.astype(float)).groupby(['Chromosome','GC'])['hit']
In [17]:
gb.size().mean()
Out[17]:
In [18]:
ss = gb.size().reset_index()
manhattan(ss[0], ss.Chromosome.astype(str), ss.GC, gap=10)
In [46]:
dd = fx.r4.multi_variate.HIV > 0
dd.name = 'up'
hiv_dx = fx.g_hiv
hiv_dx.name = 'hiv'
dx_hiv = combine(hiv_dx, dd).replace({'up':'neither','hiv':'hiv_down', 'both':'hiv_up'})
dx_hiv.value_counts()
Out[46]:
In [47]:
dd = (dx_hiv == 'hiv_down') & (fx.mm < .5) & (fx.g_age == False)
dd = dd.ix[fx.probe_idx].ix[ann.index].dropna()
m = dd.mean()
w = 200
t = lambda s: sp.stats.binom_test(s, w, p=m)
v = pd.concat([pd.rolling_mean(s, w, center=True) for i,s in
dd.groupby(ann.Chromosome)])
lookup = pd.Series({i: t(i) for i in (v.dropna() * w).unique()})
biom_p = (v.dropna() * w).map(lookup).order()
In [19]:
m = hiv_bh.mean()
w = 200
t = lambda s: sp.stats.binom_test(s, w, p=m)
v = pd.concat([pd.rolling_mean(s, w, center=True) for i,s in
hiv_bh.groupby(ann.Chromosome)])
lookup = pd.Series({i: t(i) for i in (v.dropna() * w).unique()})
biom_p = (v.dropna() * w).map(lookup).order()
In [48]:
gwas = pd.read_csv('./data/Euro_CHAVI_Setpoint_liftover.csv', index_col=0)
gwas.chromosome = gwas.chromosome.astype(str)
In [52]:
fig, axs = subplots(2,1, figsize=(10, 6))
ax = axs[0]
manhattan(-1*np.log10(biom_p), ann.Chromosome, ann.Genomic_Coordinate,
ybound=[0,18], ax=ax, ticks=False)
ax.set_yticks(range(0, 19, 3))
#ax.set_yticklabels(range(0, 26, 5), size=14)
ax.set_xlabel('Chromosome', size=14)
axs[0].set_ylabel('-logP',
size=14)
ax = axs[1]
manhattan(-1*gwas['-logP'], gwas.chromosome, gwas.Map,
ybound=[-12,0], ax=ax, flow='down')
ax.set_ylabel('GWAS P-value', size=14)
ax.set_yticks(range(-12, 1, 2))
ax.set_yticklabels(range(-12, 1, 2), size=14)
ax.set_xticklabels(range(1,23), size=14)
ax.set_xlabel('')
fig.tight_layout()
fig.savefig(FIGDIR + 'fig3_cd_n.pdf', dpi=300)
Look for associations with CD4T counts of HIV-associated probes
In [53]:
df = df_hiv.ix[ti(hiv_bh.dropna())]
df = df.ix[:, keepers.intersection(ti(fx.hiv == 'HIV+'))]
df = df.sub(df.mean(1), axis=0).div(df.std(1), axis=0)
In [54]:
corr_with_cd4_m = screen_feature(np.log2(labs['CD4/CD8 ratio']), spearman_pandas, df,
align=False)
In [55]:
corr_with_cd4_m.head()
Out[55]:
In [25]:
manhattan(corr_with_cd4_m.rho.abs(), ann.Chromosome, ann.Genomic_Coordinate,
ybound=[0,.45], ticks=False)
In [56]:
hla_region = ((ann.Chromosome == '6') &
ann.Genomic_Coordinate.isin(range(29570008, 33377112)))
In [57]:
m = (dx_hiv == 'hiv_down').mean()
sp.stats.binom_test(hiv_bh.ix[ti(hla_region)].sum(),
n=len(hiv_bh.ix[ti(hla_region)].dropna()), p=m)
Out[57]:
In [59]:
fisher_exact_test((dx_hiv == 'hiv_down'), hla_region)
Out[59]:
In [29]:
odds = pd.DataFrame({x: fisher_exact_test(hla_region, corr_with_cd4_m.rho.abs() > x)
for x in [.1, .15, .2, .25, .3, .35, .4]}).T
odds
Out[29]:
In [60]:
rcParams['axes.labelsize'] = 14
rcParams['xtick.labelsize'] = 14
rcParams['ytick.labelsize'] = 14
In [35]:
fig, axs = subplots(3,1, figsize=(10,6), sharex=True)
ax=axs[0]
k = ti((gwas.chromosome == '6') &
gwas.Map.isin(range(20000000, 40000000)))
x = gwas.Map.ix[k]
rr = -1*gwas['-logP']
series_scatter(x, rr, color=colors[0],
s=20, ax=ax, ann=None,
edgecolor='grey')
ax.set_ylabel('GWAS P-value')
ax.set_yticks(range(-12, 1, 2))
ax.set_yticklabels(range(-12, 1, 2), size=14)
ax.set_ylim(-12,0)
ax.set_xlabel('')
#ax.axvspan(29570008, 33377112, alpha=.1)
ax.set_xbound(x.min(), x.max())
prettify_ax(ax, top=True)
k = ti((ann.Chromosome == '6') &
ann.Genomic_Coordinate.isin(range(20000000, 40000000)))
x = ann.Genomic_Coordinate.ix[k].order()
ax = axs[1]
d = (tt[tt.abs() > 0] > 0).ix[ann.index].dropna()
v = pd.concat([pd.rolling_mean(s, 200, center=True) for i,s in
d.groupby(ann.Chromosome)])
v = (v - v.mean()) / v.std()
series_scatter(x[::5], v.ix[x.index], s=20, ann=None, color=colors[0],
edgecolor='grey', ax=ax, alpha=1)
ax.axhline(0, ls='--', color='black', lw=3)
ax.set_ylabel('Enrichment')
ax.spines['bottom'].set_visible(False)
#ax.axvspan(29570008, 33377112, alpha=.1)
ax.set_xbound(x.min(), x.max())
prettify_ax(ax)
ax.set_xlabel('')
ax = axs[2]
series_scatter(x, corr_with_cd4_m.rho.abs(),
ax=ax, s=20, ann=None, color=colors[0],
edgecolor='grey')
ax.set_xbound(x.min(), x.max())
ax.set_ylim(0)
ax.set_xticks([])
ax.set_ylabel('Correlation with\nCD4+/CD8+')
ax.set_xlabel('Genomic coordinate on chromosome 6')
#ax.axvspan(29570008, 33377112, alpha=.1)
ax.set_xbound(x.min(), x.max())
ax.set_xticks(range(20000000, 41000000, 5000000))
ax.set_xticklabels(['20MB','25MB','30MB','35MB','40MB'])
prettify_ax(ax)
fig.tight_layout()
fig.savefig(FIGDIR + 'fig3_efg.pdf', dpi=300)
In [36]:
fig, ax = subplots(1,1, figsize=(10,2), sharex=True)
series_scatter(x, -1*np.log10(biom_p).ix[x.index],
ax=ax, s=20, ann=None, color=colors[0],
edgecolor='grey')
ax.set_xbound(x.min(), x.max())
ax.set_ylim(0,10.5)
ax.set_xlabel('Genomic coordinate on chromosome 6')
ax.set_ylabel('Binomial P\n\n ')
ax.set_xbound(x.min(), x.max())
ax.set_xticks(range(20000000, 41000000, 5000000))
ax.set_xticklabels(['20MB','25MB','30MB','35MB','40MB'])
prettify_ax(ax)
fig.tight_layout()
In [38]:
full_region = ((ann.Chromosome == '6') &
ann.Genomic_Coordinate.isin(range(25000000, 35000000)))
histone_region = ((ann.Chromosome == '6') &
ann.Genomic_Coordinate.isin(range(25000000, 29570008)))
hla_region = ((ann.Chromosome == '6') &
ann.Genomic_Coordinate.isin(range(29570008, 33377112)))
In [39]:
b = biom_p.ix[ti(histone_region)].order()
b.name= 'b'
ann.ix[b.index].join(b).head(10)
Out[39]:
In [40]:
fisher_exact_test(hiv_bh, full_region)
Out[40]:
In [41]:
fisher_exact_test(hla_region, hiv_bh)
Out[41]:
In [42]:
fisher_exact_test(hiv_bh, histone_region)
Out[42]: