In [1]:
import os
if os.getcwd().endswith('Benchmarks'):
os.chdir('..')
In [2]:
import NotebookImport
from HIV_Age_Advancement import *
In [3]:
flow_sorted_data = pd.read_hdf(HDFS_DIR + 'methylation_annotation.h5','flow_sorted_data')
cell_type = pd.read_hdf(HDFS_DIR + 'methylation_annotation.h5', 'label_map')
In [4]:
patient_id = pd.Series({i: i.split('_')[1] for i in cell_type.index})
In [5]:
violin_plot_pandas(cell_type, run_horvath_model(flow_sorted_data))
In [6]:
violin_plot_pandas(cell_type, run_horvath_model(flow_sorted_data))
In [8]:
horvath = run_horvath_model(flow_sorted_data)
hannum = run_hannum_model(flow_sorted_data)
plot_regression(horvath, hannum)
Hannum and Horvath models seem to disagree for CD8T cells.
In [9]:
violin_plot_pandas(cell_type, horvath - hannum)
In [10]:
pred_c = (horvath + hannum) / 2
In [11]:
df = pd.concat([pred_c, patient_id, cell_type], axis=1)
d2 = df.set_index(1)
In [12]:
fig, axs = subplots(3,3, figsize=(8,8), sharex=True, sharey=False)
ct = ['PBMC', 'Gran', 'CD4T', 'CD8T', 'Bcell', 'Mono', 'NK', 'Neu', 'Eos']
axs = list(flatten(axs))
ll = [{'color':'grey','alpha':.7, 'ls':'--'}, {'alpha':0}]
for i,c in enumerate(ct):
ax = axs[i]
plot_regression(d2[d2[2]=='WBC'][0], d2[d2[2]==c][0], ax=ax,
line_args=ll)
ax.set_xlabel('WBC')
ax.set_ylabel(c)
ann = ax.get_children()[4]
ann.set_text(ann.get_text().split()[0])
ax.set_ybound(8,70)
fig.tight_layout()
fig.savefig(FIGDIR + 'housemann_fig.png',dpi=300)
In [13]:
violin_plot_pandas(patient_id, hannum)
In [14]:
violin_plot_pandas(patient_id, horvath)