Pull a test set of ABIDE subjects, read in each network for each metric, compute the distance matrix and linkage and then display selected phenotype variables for each cluster


In [1]:
# Imports
import re
import numpy as np
import pandas as pd
import brainbox as bb
from matplotlib import pyplot as plt
import scipy.cluster.hierarchy as clh

In [2]:
# Paths
debug_path = '/data1/abide/Test/Out/Debug/All'

In [3]:
# Find the data
metric = 'stability_maps'
file_dict = bb.fileOps.grab_files(debug_path, '.nii.gz', metric)
# Get subject IDs of the files I just read in
data_subs = np.array([int64(re.search(r'(?<=\d{2})\d{5}', sub_id).group()) for sub_id in file_dict['sub_name']])


I will be pulling files from /data1/abide/Test/Out/Debug/All/stability_maps

In [4]:
# Read the data
network = 1
data_dict = bb.fileOps.read_files(file_dict, network=network)


I found 65 files to load.
 100.0 % done 0.00 seconds to go.
We are done

In [5]:
# Compute linkage on the data
distance, linkage = bb.dataOps.calc_link(data_dict, metric, network=0)

In [6]:
# Visualize the distance matrix and dendrogram
f = plt.figure(figsize=(8,8))
subdend = f.add_axes([0, 0.71, 1, 0.29])
D1 = clh.dendrogram(linkage, ax=subdend)
subdend.set_xticks([])

submat = f.add_axes([0, 0, 1, 0.7])
D2 = submat.matshow(-distance, aspect='auto')
submat.set_xticks([])


Out[6]:
[]

In [7]:
# Get the partition of the linkage
scale = 3
part = clh.fcluster(linkage, scale, criterion='maxclust')

In [8]:
# Grab the phenotype data
pheno_path = '/home/surchs/Project/abide/pheno/pheno_full.csv'
pheno = pd.read_csv(pheno_path)
# Get the subject IDs of the pheno files I just read in
pheno_subs = pheno['SUB_ID']
# Find a mask of those pheno subs for which we have brain data
pheno_mask = pheno_subs.isin(data_subs)
# Get the correct pheno data
pheno_data = pheno[pheno_mask]

In [9]:
# Define covariates of interest
cov_interest = ['DX_GROUP', 'AGE_AT_SCAN', 'SITE_ID', 'SEX', 'EYE_STATUS_AT_SCAN']

In [13]:
# Pull up the subjects for one cluster
f = plt.figure(figsize=(30,10))

for clust in np.arange(1,4):
    clust_subs = data_subs[part == clust]
    clust_pheno = pheno_data[part == clust]
    ax_cl = f.add_subplot(1, 3, clust)
    ax_cl.set_xticks([])
    ax_cl.set_title('Cluster {}'.format(clust))

    lt, lb, rt, rb = bb.visuOps.add_four_grid(ax_cl, ticks=True, titles=('age', 'sex', 'dx', 'fiq'))
    lt.hist(clust_pheno['AGE_AT_SCAN'].values)
    lb.hist(clust_pheno['SEX'].values)
    rt.hist(clust_pheno['DX_GROUP'].values)
    rb.hist(clust_pheno['FIQ'].values)



In [13]:


In [13]:


In [ ]: