Generate cluster by cluster matrix

Load the pre-computed linkage and then compute a cluster by cluster matrix of pheno distance and p-values. For now, we have these metrics to choose from:

  • mean
  • median

In [7]:
# Imports
import re
import gzip
import numpy as np
import pandas as pd
import cPickle as cp
import brainbox as bb
from matplotlib import pyplot as plt
import scipy.cluster.hierarchy as clh

Load the precomputed linkage from disk


In [18]:
# Path to the precomputed linkage
linkage_path = '/data1/abide/Test/linkage.gz'

In [20]:
# Load the precomputed linkage
f = gzip.open(linkage_path, 'rb')
in_data = cp.load(f)

In [21]:
results = in_data[1]
data_subs = in_data[]

Visualize the structure of the data


In [65]:
f = plt.figure(figsize=(12,4))
ax1 = f.add_subplot(121)
D1 = clh.dendrogram(results[0][1], p=40, truncate_mode='lastp', ax=ax1, color_threshold=500)
dump = ax1.set_xticks([])

ax2 = f.add_subplot(122)
D1 = clh.dendrogram(results[0][1], ax=ax2, color_threshold=500)
dump = ax2.set_xticks([])


Decide on the scale that I want to cluster this network at


In [68]:
# Get the partition of the linkage
scale = 3
part = clh.fcluster(results[0][1], scale, criterion='maxclust')

Get the pheno data for the entire dataset and limit it to the subjects that are in this sample


In [67]:
# Grab the phenotype data
pheno_path = '/home/surchs/Project/abide/pheno/pheno_full.csv'
pheno = pd.read_csv(pheno_path)
# Get the subject IDs of the pheno files I just read in
pheno_subs = pheno['SUB_ID']
# Find a mask of those pheno subs for which we have brain data
pheno_mask = pheno_subs.isin(data_subs)
# Get the correct pheno data
pheno_data = pheno[pheno_mask]

In [69]:
results[0][0].shape


Out[69]:
(607, 607)

In [70]:
len(pheno_data)


Out[70]:
582

In [16]:


In [13]:


In [24]:
def show_netw(results, network, scale):
    distance, linkage = results[network - 1]
    part = clh.fcluster(linkage, scale, criterion='maxclust')

    # Define covariates of interest
    cov_interest = ['DX_GROUP', 'AGE_AT_SCAN', 'SITE_ID', 'SEX', 'EYE_STATUS_AT_SCAN']

    # Pull up the subjects for one cluster
    f = plt.figure(figsize=(10,5*(scale + 1)))

    for clust in np.arange(1,scale + 1):
        clust_subs = data_subs[part == clust]
        clust_pheno = pheno_data[part == clust]
        ax_cl = f.add_subplot(scale, 2, clust)
        ax_cl.set_xticks([])
        ax_cl.set_title('Cluster {}'.format(clust))

        lt, lb, rt, rb = bb.visuOps.add_four_grid(ax_cl, ticks=True, titles=('age', 'sex', 'dx', 'fiq'))
        lt.hist(clust_pheno['AGE_AT_SCAN'].values)
        lb.hist(clust_pheno['SEX'].values, bins=2)
        rt.hist(clust_pheno['DX_GROUP'].values, bins=2)
        rb.hist(clust_pheno['FIQ'].values)

In [ ]: