Load the pre-computed linkage and then compute a cluster by cluster matrix of pheno distance and p-values. For now, we have these metrics to choose from:
In [7]:
# Imports
import re
import gzip
import numpy as np
import pandas as pd
import cPickle as cp
import brainbox as bb
from matplotlib import pyplot as plt
import scipy.cluster.hierarchy as clh
Load the precomputed linkage from disk
In [18]:
# Path to the precomputed linkage
linkage_path = '/data1/abide/Test/linkage.gz'
In [20]:
# Load the precomputed linkage
f = gzip.open(linkage_path, 'rb')
in_data = cp.load(f)
In [21]:
results = in_data[1]
data_subs = in_data[]
Visualize the structure of the data
In [65]:
f = plt.figure(figsize=(12,4))
ax1 = f.add_subplot(121)
D1 = clh.dendrogram(results[0][1], p=40, truncate_mode='lastp', ax=ax1, color_threshold=500)
dump = ax1.set_xticks([])
ax2 = f.add_subplot(122)
D1 = clh.dendrogram(results[0][1], ax=ax2, color_threshold=500)
dump = ax2.set_xticks([])
Decide on the scale that I want to cluster this network at
In [68]:
# Get the partition of the linkage
scale = 3
part = clh.fcluster(results[0][1], scale, criterion='maxclust')
Get the pheno data for the entire dataset and limit it to the subjects that are in this sample
In [67]:
# Grab the phenotype data
pheno_path = '/home/surchs/Project/abide/pheno/pheno_full.csv'
pheno = pd.read_csv(pheno_path)
# Get the subject IDs of the pheno files I just read in
pheno_subs = pheno['SUB_ID']
# Find a mask of those pheno subs for which we have brain data
pheno_mask = pheno_subs.isin(data_subs)
# Get the correct pheno data
pheno_data = pheno[pheno_mask]
In [69]:
results[0][0].shape
Out[69]:
In [70]:
len(pheno_data)
Out[70]:
In [16]:
In [13]:
In [24]:
def show_netw(results, network, scale):
distance, linkage = results[network - 1]
part = clh.fcluster(linkage, scale, criterion='maxclust')
# Define covariates of interest
cov_interest = ['DX_GROUP', 'AGE_AT_SCAN', 'SITE_ID', 'SEX', 'EYE_STATUS_AT_SCAN']
# Pull up the subjects for one cluster
f = plt.figure(figsize=(10,5*(scale + 1)))
for clust in np.arange(1,scale + 1):
clust_subs = data_subs[part == clust]
clust_pheno = pheno_data[part == clust]
ax_cl = f.add_subplot(scale, 2, clust)
ax_cl.set_xticks([])
ax_cl.set_title('Cluster {}'.format(clust))
lt, lb, rt, rb = bb.visuOps.add_four_grid(ax_cl, ticks=True, titles=('age', 'sex', 'dx', 'fiq'))
lt.hist(clust_pheno['AGE_AT_SCAN'].values)
lb.hist(clust_pheno['SEX'].values, bins=2)
rt.hist(clust_pheno['DX_GROUP'].values, bins=2)
rb.hist(clust_pheno['FIQ'].values)
In [ ]: