In [5]:
import re
import sys
import pandas as pd
import brainbox as bb
from matplotlib import pyplot as plt
import scipy.spatial.distance as dist
import scipy.cluster.hierarchy as clh
In [6]:
in_path = '/data1/abide/Out/Remote/some_failed/out'
out_path = '/data1/abide/Analysis/Remote/some_missing'
metric = 'stability_maps'
In [7]:
file_dict = bb.fileOps.grab_files(in_path, '.nii.gz', sub=metric)
In [8]:
array_dict = bb.dataOps.read_files(file_dict)
This could be done in parallel if Marc gives me more memory
In [6]:
result_list = []
for n_id in range(10):
sys.stdout.write('\rRunning network {}.'.format(n_id))
sys.stdout.flush()
eucl = dist.squareform(dist.pdist(array_dict[metric][..., n_id], 'euclidean'))
Y = clh.linkage(eucl, method='ward')
result_list.append((eucl, Y))
In [8]:
f = plt.figure(figsize=(18, 50))
order_net = None
for n_id in range(10):
n_id += 1
sys.stdout.write('\rRunning network {}.'.format(n_id))
sys.stdout.flush()
(eucl, Y) = result_list[n_id-1]
ax = f.add_subplot(5,2,n_id)
subdend = bb.visuOps.add_subplot_axes(ax, [0, 0.71, 1, 0.29])
submat = bb.visuOps.add_subplot_axes(ax, [0, 0, 1, 0.7])
if order_net:
Y = result_list[order_net][1]
Z1 = clh.dendrogram(Y, ax=subdend)
if order_net:
subdend.matshow(np.ones((100,20)), aspect='auto')
idx = Z1['leaves']
tmp = eucl[idx, :]
D = tmp[:, idx]
a = submat.matshow(-D, aspect='auto')
submat.set_xticks([])
submat.set_yticks([])
subdend.set_xticks([])
subdend.set_yticks([])
ax.set_xticks([])
ax.set_yticks([])
ax.set_title('Network {}'.format(n_id))
f.suptitle('Stability Maps')
Out[8]:
In [16]:
pheno_path = '/home/surchs/Project/abide/pheno/pheno_full.csv'
pheno = pd.read_csv(pheno_path)
In [30]:
# Get the real sub ids from the file names
sub_ids = np.array([int64(re.search(r'(?<=\d{2})\d{5}', sub_id).group()) for sub_id in file_dict['sub_name']])
In [31]:
# Get the relevant section of the pheno data
this_pheno = pheno[pheno['SUB_ID'].isin(sub_ids)]
In [38]:
# Define the network
netw = 2
# Build the distance metric
eucl = dist.squareform(dist.pdist(array_dict[metric][..., netw], 'euclidean'))
Y = clh.linkage(eucl, method='ward')
# Make clusters out of the hierarchy
fl = clh.fcluster(Y,10,criterion='maxclust')
In [56]:
# Define the cluster
clust = 1
# Get the subjects belonging to this cluster
cl_subs = sub_ids[fl == clust]
cl_phen = pheno[pheno['SUB_ID'].isin(cl_subs)]
In [65]:
cl_phen[['AGE_AT_SCAN', 'FIQ']]
Out[65]:
In [67]:
cl_phen[['AGE_AT_SCAN', 'SITE_ID']].groupby('SITE_ID').aggregate(np.median).plot()
Out[67]:
In [53]:
cl_phen
Out[53]:
In [ ]: