In [1]:
import NotebookImport
from Setup.Imports import *
In [2]:
from Setup.Read_HIV_Data import *
In [3]:
import Setup.DX_Imports as dx
Set of patients used in analysis of age advancement
In [4]:
import Parallel.Age_HIV_Features as fx
In [5]:
keepers = fx.pred_c.index
In [6]:
features = {'Age only': fx.features['Age (BH)'],
'HIV + Age': fx.features['HIV + Age (BH)'],
'HIV only': fx.features['HIV (BH)']}
probes_sets = {'PRC2': dx.prc2_probes, 'CpG island': dx.cpg_island,
'DHS': dx.dhs_site, 'Enhancer': dx.enhancer,
'Gene body': dx.gene_body, 'TSS': dx.gene_tss,
'Promoter': dx.promoter}
In [7]:
c1 = pd.DataFrame({i:v.value_counts() for i,v in features.iteritems()}).T
c1
Out[7]:
In [8]:
c2 = pd.DataFrame({i:v.value_counts() for i,v in probes_sets.iteritems()}).T
c2
Out[8]:
In [44]:
ann = dx.probe_annotations.sort(['Chromosome','Genomic_Coordinate'])
In [49]:
a,b = match_series((fx.mm > .5), (fx.r4.multi_variate.HIV < 0))
a,b = match_series(a,b)
direction = a == b
hiv_bh = fx.g_hiv
hiv_bh = (hiv_bh & (direction == False) & (fx.g_age == False)).dropna()
hiv_bh = hiv_bh.ix[probe_idx].ix[ann.index].dropna()
#hiv_bh = hiv_bh & (t_hiv.abs() > 1)
hiv_bh.value_counts()
Out[49]:
In [12]:
def manhattan(vec, chrom, coords, ax=None, ybound=None,
flow='up', ticks=True, gap=3e7):
fig, ax = init_ax(ax, figsize=(9,3))
x = 0
chr_coords = []
for i,c in enumerate(map(str, range(1,23))):
v = vec.ix[ti(chrom == c)].dropna()
series_scatter(coords + x, v, s=10, ax=ax,
color=colors[i % 5],
ann=None, alpha=1, rasterized=True)
chr_len = coords.ix[v.index].max()
x = x + chr_len + gap
chr_coords += [x - (chr_len / 2.)]
ax.set_xbound(gap, x + gap)
if ybound is not None:
ax.set_ybound(ybound[0],ybound[1])
ax.set_xlabel('Chromosome')
if ticks:
ax.set_xticks(chr_coords)
ax.set_xticklabels(map(str, range(1,23)))
else:
ax.set_xticks([])
top = flow == 'down'
prettify_ax(ax, top)
In [50]:
m = hiv_bh.mean()
w = 200
t = lambda s: sp.stats.binom_test(s, w, p=m)
v = pd.concat([pd.rolling_mean(s, w, center=True) for i,s in
hiv_bh.groupby(ann.Chromosome)])
lookup = pd.Series({i: t(i) for i in (v.dropna() * w).unique()})
biom_p = (v.dropna() * w).map(lookup).order()
In [51]:
gwas = pd.read_csv('./data/Euro_CHAVI_Setpoint_liftover.csv', index_col=0)
gwas.chromosome = gwas.chromosome.astype(str)
In [52]:
fig, axs = subplots(2,1, figsize=(10, 6))
ax = axs[0]
manhattan(-1*np.log10(biom_p), ann.Chromosome, ann.Genomic_Coordinate,
ybound=[0,12], ax=ax, ticks=False)
ax.set_yticks(range(0, 13, 2))
#ax.set_yticklabels(range(0, 26, 5), size=14)
ax.set_xlabel('Chromosome', size=14)
axs[0].set_ylabel('-logP',
size=14)
ax = axs[1]
manhattan(-1*gwas['-logP'], gwas.chromosome, gwas.Map,
ybound=[-12,0], ax=ax, flow='down')
ax.set_ylabel('GWAS P-value', size=14)
ax.set_yticks(range(-12, 1, 2))
ax.set_yticklabels(range(-12, 1, 2), size=14)
ax.set_xticklabels(range(1,23), size=14)
ax.set_xlabel('')
fig.tight_layout()
fig.savefig(FIGDIR + 'fig3_cd_n.pdf', dpi=300)
Look for associations with CD4T counts of HIV-associated probes
In [53]:
dm_cell = pd.read_csv('./data/Jaffee_Supplementary_Table_2.csv', index_col=0,
low_memory=False, skiprows=[0])
In [54]:
df = df_hiv.ix[ti(hiv_bh.dropna())]
df = df.ix[:, keepers.intersection(ti(fx.hiv == 'HIV+'))]
df = df.sub(df.mean(1), axis=0).div(df.std(1), axis=0)
In [55]:
corr_with_cd4_m = screen_feature(np.log2(labs['CD4/CD8 ratio']), spearman_pandas, df,
align=False)
In [56]:
corr_with_cd4_m.head()
Out[56]:
In [57]:
rr = (dm_cell.CD4T_mean - dm_cell.CD8T_mean)
In [58]:
spearman_pandas(rr, corr_with_cd4_m.rho)
Out[58]:
In [23]:
vv = np.log2(corr_with_cd4_m.rho / rr).clip(-10,15).dropna()
vv.hist()
Out[23]:
In [24]:
manhattan(vv.abs(), ann.Chromosome, ann.Genomic_Coordinate,
ybound=[0,15], ticks=False)
In [25]:
dm_cell.ix['cg06872964']
Out[25]:
In [26]:
manhattan(corr_with_cd4_m.rho.abs(), ann.Chromosome, ann.Genomic_Coordinate,
ybound=[0,.45], ticks=False)
In [27]:
ss = -1*np.sign(fx.r4.multi_variate.HIV)
In [28]:
rr = np.log2(dm_cell.CD4T_mean / dm_cell.CD8T_mean).clip(-1.,1.)
draw_dist(rr*ss, hiv_bh)
In [63]:
rr = (dm_cell.CD4T_mean - dm_cell.CD8T_mean).clip(-.3,.3)
draw_dist(rr*ss, hiv_bh)
In [64]:
hla_region = ((ann.Chromosome == '6') &
ann.Genomic_Coordinate.isin(range(29570008, 33377112)))
In [67]:
rr = np.log2(dm_cell.CD4T_mean / dm_cell.CD8T_mean).clip(-1.,1.)
draw_dist(rr * ss, hiv_bh.ix[ti(hla_region)])
In [68]:
anova(hla_region, rr)
Out[68]:
In [69]:
pearson_pandas(corr_with_cd4_m.rho, rr)
Out[69]:
In [70]:
draw_dist(rr, corr_with_cd4_m.rho.abs() > .2)
In [ ]: