In [1]:
import copy
import os, sys
os.chdir('..')

In [2]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from mpl_toolkits.axes_grid1 import make_axes_locatable

from scipy.stats.mstats import zscore
from sklearn.linear_model import LinearRegression
# loa my modules
from src.utils import load_pkl, unflatten
from src.visualise import *
from src.models import clean_confound


import joblib
import pickle

# Built-in modules #
import random

# Third party modules #
import numpy, scipy, matplotlib, pandas
from matplotlib import pyplot
import scipy.cluster.hierarchy as sch
import scipy.spatial.distance as dist


/usr/lib/python2.7/dist-packages/h5py/__init__.py:34: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
  from ._conv import register_converters as _register_converters

In [3]:
model_path = './models/SCCA_Yeo7nodes_revision_4_0.80_0.50.pkl'
label_path = './references/names.csv'
dat_path = './data/processed/dict_SCCA_data_prepro_revision1.pkl'

In [4]:
# load data
model = joblib.load(model_path)
dataset = load_pkl(dat_path)
df_label = pd.read_csv(label_path)

#df = pd.read_pickle(df_path)
u, v = model.u * [1, 1, -1, 1] , model.v * [1, 1, -1, 1]
n = model.n_components

# create labels for the nodes
seed_names = df_label.iloc[:, 0].apply(str) + '-' + df_label.iloc[:, -2] + '-' + df_label.iloc[:, -3] + ' ' + df_label.iloc[:, -1]

# unflatten the functional corr coeff
u_mat = []               
for i in range(4):
    u_mat.append(unflatten(u[:, i]))

In [5]:
FC_nodes = dataset['FC_nodes']
MRIQ    = dataset['MRIQ']
mot     = dataset['Motion_Jenkinson']
sex     = dataset['Gender']
age     = dataset['Age']
confound_raw = np.hstack((mot, sex, age))
X, Y, R = clean_confound(FC_nodes, MRIQ, confound_raw)

In [23]:
X_scores = zscore(X).dot(u)
Y_scores = zscore(Y).dot(v)
canpair_score = zscore(X_scores) + zscore(Y_scores)

In [7]:
model.cancorr_


Out[7]:
array([0.27825067, 0.1925879 , 0.1630966 , 0.07353168])

In [20]:
np.corrcoef(X_scores, rowvar=False)


Out[20]:
array([[ 1.        , -0.98860885,  0.98347996, -0.02409729],
       [-0.98860885,  1.        , -0.97180279,  0.10956693],
       [ 0.98347996, -0.97180279,  1.        , -0.12031206],
       [-0.02409729,  0.10956693, -0.12031206,  1.        ]])

In [21]:
np.corrcoef(Y_scores, rowvar=False)


Out[21]:
array([[ 1.        ,  0.4295978 ,  0.62001441, -0.56160462],
       [ 0.4295978 ,  1.        ,  0.67924529, -0.02659967],
       [ 0.62001441,  0.67924529,  1.        , -0.03484224],
       [-0.56160462, -0.02659967, -0.03484224,  1.        ]])

In [24]:
np.corrcoef(canpair_score, rowvar=False)


Out[24]:
array([[ 1.        , -0.34765929,  0.82398859, -0.20723944],
       [-0.34765929,  1.        , -0.19749665,  0.13307812],
       [ 0.82398859, -0.19749665,  1.        , -0.04918527],
       [-0.20723944,  0.13307812, -0.04918527,  1.        ]])

variance inflation factor


In [97]:
def vif(X):
    corr = np.corrcoef(X, rowvar=0, bias=True)
    minv = np.linalg.inv(corr)
    vif = minv.dot(corr).dot(minv)
    vif = np.diag(vif)
    return vif

In [117]:
print sum(vif(X) > 10)


/usr/lib/python2.7/dist-packages/ipykernel/__main__.py:2: DeprecationWarning: bias and ddof have no effect and are deprecated
  from ipykernel import kernelapp as app
785

In [120]:
from sklearn.decomposition import PCA

In [125]:
pca = PCA(n_components=200)
pca.fit(X)


Out[125]:
PCA(copy=True, iterated_power='auto', n_components=200, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)

In [131]:
pca.explained_variance_


Out[131]:
array([6.60001331e+01, 5.58770076e+00, 3.24461736e+00, 2.42806747e+00,
       2.03419212e+00, 1.94050497e+00, 1.75968804e+00, 1.62684591e+00,
       1.41895976e+00, 1.31121861e+00, 1.26591371e+00, 1.14737732e+00,
       1.10967340e+00, 1.08090548e+00, 1.00894152e+00, 9.46031464e-01,
       9.29564349e-01, 9.09073397e-01, 8.72052384e-01, 8.40391826e-01,
       8.14069314e-01, 7.83101455e-01, 7.60732034e-01, 7.21662395e-01,
       6.94554626e-01, 6.71202947e-01, 6.59112402e-01, 6.41710011e-01,
       6.22156802e-01, 5.94576706e-01, 5.90757659e-01, 5.60061294e-01,
       5.52594066e-01, 5.51219637e-01, 5.01134313e-01, 4.94498296e-01,
       4.79284475e-01, 4.67169526e-01, 4.45390570e-01, 4.38503493e-01,
       4.34837966e-01, 4.20422637e-01, 4.14287016e-01, 4.11480746e-01,
       4.04740499e-01, 3.94477172e-01, 3.82026367e-01, 3.74092961e-01,
       3.62788220e-01, 3.49220372e-01, 3.43326440e-01, 3.32267215e-01,
       3.26541854e-01, 3.23808932e-01, 3.13575366e-01, 3.10018400e-01,
       3.02957044e-01, 2.96094499e-01, 2.90774329e-01, 2.82591550e-01,
       2.77752568e-01, 2.74047129e-01, 2.70825877e-01, 2.70215329e-01,
       2.61227255e-01, 2.58585729e-01, 2.43400600e-01, 2.42246255e-01,
       2.40367647e-01, 2.35004708e-01, 2.33080270e-01, 2.31105278e-01,
       2.22728098e-01, 2.19213948e-01, 2.11777621e-01, 2.09555427e-01,
       2.07770445e-01, 2.03480304e-01, 1.98756700e-01, 1.94598331e-01,
       1.91204104e-01, 1.89305353e-01, 1.86193748e-01, 1.83998876e-01,
       1.80242485e-01, 1.78991244e-01, 1.74391788e-01, 1.70433445e-01,
       1.69079684e-01, 1.64320164e-01, 1.64016199e-01, 1.62686026e-01,
       1.57430960e-01, 1.53444893e-01, 1.51995391e-01, 1.50510918e-01,
       1.48029192e-01, 1.46176811e-01, 1.41708389e-01, 1.40517916e-01,
       1.37104878e-01, 1.34640755e-01, 1.33330544e-01, 1.30940345e-01,
       1.29235799e-01, 1.28043342e-01, 1.26177440e-01, 1.24419902e-01,
       1.22842149e-01, 1.21377346e-01, 1.20311591e-01, 1.18516628e-01,
       1.14212323e-01, 1.13802852e-01, 1.10439039e-01, 1.09667597e-01,
       1.07674360e-01, 1.04761926e-01, 1.04706268e-01, 1.03279341e-01,
       1.02439723e-01, 1.01172751e-01, 9.87572555e-02, 9.86665714e-02,
       9.79713284e-02, 9.53747567e-02, 9.37547015e-02, 9.29918658e-02,
       9.21133391e-02, 8.99676525e-02, 8.97809559e-02, 8.92548287e-02,
       8.67911898e-02, 8.60433507e-02, 8.47335507e-02, 8.41937670e-02,
       8.30156488e-02, 8.14223090e-02, 8.04292483e-02, 7.89255926e-02,
       7.76974257e-02, 7.71468077e-02, 7.62547117e-02, 7.58466937e-02,
       7.47075732e-02, 7.41654804e-02, 7.20808089e-02, 7.11081728e-02,
       7.10700182e-02, 7.06739568e-02, 7.03423293e-02, 6.78318197e-02,
       6.71988113e-02, 6.60964232e-02, 6.54702824e-02, 6.48345114e-02,
       6.39938583e-02, 6.28693903e-02, 6.24797081e-02, 6.01054190e-02,
       5.96851992e-02, 5.88075465e-02, 5.82010080e-02, 5.77096527e-02,
       5.69201774e-02, 5.64909108e-02, 5.59802733e-02, 5.51323505e-02,
       5.44646223e-02, 5.35229500e-02, 5.24993009e-02, 5.23059855e-02,
       5.08301048e-02, 5.00251379e-02, 4.88212854e-02, 4.81294924e-02,
       4.77520702e-02, 4.67344147e-02, 4.62734936e-02, 4.61182284e-02,
       4.52753292e-02, 4.46725761e-02, 4.37442540e-02, 4.34913867e-02,
       4.29199422e-02, 4.23365536e-02, 4.17383983e-02, 4.14599062e-02,
       4.12515599e-02, 4.01265987e-02, 4.00503560e-02, 3.93960960e-02,
       3.86896982e-02, 3.79838478e-02, 3.76732414e-02, 3.58528633e-02,
       3.56033352e-02, 3.47169018e-02, 3.44984087e-02, 3.37593099e-02])

CC explained variance of the original data


In [8]:
def _Rsquare(X, P):
    '''
    calculate the coefficent of determination (R square):
    the ratio of the explained variation to the total variation.
    '''
    lr = LinearRegression(fit_intercept=False)
    lr.fit(P, X.T)
    rec_ = lr.coef_.dot(P.T)
    return 1 - (np.var(X - rec_) / np.var(X))

In [15]:
print 'The canonical vectors explained {0:.1f}% of the original connectivity data'.format(_Rsquare(X, u)*100)
print 'The canonical vectors explained {0:.1f}% of the original self-reports'.format(_Rsquare(Y, v)*100)


The canonical vectors explained 50.7% of the original connectivity data
The canonical vectors explained 32.4% of the original self-reports

Visualise the components


In [3]:
df_yeo7color = pd.read_csv('./references/yeo7_color.csv', index_col=0)

In [4]:
c_label = []
for l in df_label.iloc[:, -2].values:
    cur_color = df_yeo7color[l].values
    hex_c = '#%02x%02x%02x' % tuple(cur_color)
    c_label.append(mpl.colors.to_rgb(hex_c))

In [5]:
plt.close()
for i in range(4):
    set_text_size(8)
    fig = plt.figure(figsize=(20, 20))
    ax = fig.add_subplot(111)
    max = np.abs(u_mat[i]).max()
    m = ax.matshow(u_mat[i], vmax=max, vmin=-max, cmap='RdBu_r')
    ax.set_xticks(np.arange(u_mat[i].shape[1]))
    ax.set_yticks(np.arange(u_mat[i].shape[0]))
    ax.set_xticklabels(seed_names, rotation='vertical')
    ax.set_yticklabels(seed_names)
    for xtick, color in zip(ax.get_xticklabels(), c_label):
        xtick.set_color(color)
    for ytick, color in zip(ax.get_yticklabels(), c_label):
        ytick.set_color(color)
    fig.colorbar(m)
    plt.savefig("reports/plots/yeo7node_{}.png".format(i + 1), dpi=300, tight_layout=True)

try to average the correlation coefficents


In [4]:
yeo7_names = ['VIS', 'S-M', 'VAN', 'DAN', 'LIM', 'FPN', 'DMN']
yeo7_fullnames = ['Visual', 'Somatomotor', 'VentralAttention', 'DorsalAttention', 'Limbic', 'Frontoparietal', 'Default']

In [5]:
import numpy as np

In [6]:
summary_mat = np.zeros((7, 7, 4))
for k in range(n):
    df = pd.DataFrame(u_mat[k], columns=df_label.iloc[:, -2].values, index=df_label.iloc[:, -2].values)
    for i, x in enumerate(yeo7_fullnames):
        for j, y in enumerate(yeo7_fullnames): 
            mat = df.loc[x, y].values.mean()
            summary_mat[i, j, k] = mat

In [7]:
from src.visualise import rank_labels

In [8]:
df_v = pd.DataFrame(v, index=dataset['MRIQ_labels'])

In [9]:
def sparse_row(seri_v):
    vi, cur_v_labels = rank_labels(seri_v)
    idx = np.isnan(vi).reshape((vi.shape[0]))
    vi = vi[~idx]
    vi = vi.reshape((vi.shape[0], 1))
    cur_v_labels = np.array(cur_v_labels)[~idx] 
    return vi, cur_v_labels

In [10]:
u_max = np.abs(summary_mat).max()
v_max = np.abs(v).max()

In [24]:
set_text_size(11)
for i in range(n):
    # thought probe
    vi, cur_v_labels = sparse_row(df_v.iloc[:, i])
    
    # between networks
    mat = np.tril(summary_mat[..., i], 0)
    mat[np.triu_indices(mat.shape[0], 0)] = np.nan
    cur_df = pd.DataFrame(mat, columns=yeo7_names, index=yeo7_names)
    
    # within networks
    within_net = summary_mat[..., i].diagonal().reshape((7,1))
    
    fig = plt.figure(figsize=(6, 2.5))
    ax = fig.add_subplot(131)
    t = ax.matshow(vi, vmax=v_max, vmin=-v_max, cmap='RdBu_r')
    ax.set_xticks(np.arange(vi.shape[1]))
    ax.set_yticks(np.arange(vi.shape[0]))
    ax.set_xticklabels([' '])
    ax.set_yticklabels(cur_v_labels)
    ax.set_title('Thoughts', fontsize=16)
    
    ax = fig.add_subplot(132)
    m1 = ax.matshow(cur_df.values, vmax=u_max, vmin=-u_max, cmap='RdBu_r')
    ax.set_xticks(np.arange(cur_df.shape[1]))
    ax.set_yticks(np.arange(cur_df.shape[0]))
    ax.set_xticklabels(yeo7_names, rotation=45)
    ax.set_yticklabels(yeo7_names)
    ax.set_title('Between', fontsize=16)
    ax.set_frame_on(False)
    ax.plot([-0.5, -0.5], [-0.5, 6.5], ls='-', c='.1')
    ax.plot([-0.5, 6.5], [6.5, 6.5], ls='-', c='.1')
    ax.xaxis.set_ticks_position('bottom')
    
    ax = fig.add_subplot(133)
    m2 = ax.matshow(within_net, vmax=u_max, vmin=-u_max, cmap='RdBu_r')
    ax.set_xticks(np.arange(within_net.shape[1]))
    ax.set_yticks(np.arange(within_net.shape[0]))
    ax.set_xticklabels(' ')
    ax.set_yticklabels(yeo7_names)
    ax.set_title('Within', fontsize=16)
    
    plt.tight_layout()
    plt.savefig('./reports/plots/yeo7nodes_bwsummary_{}.png'.format(i + 1), dpi=300)
    plt.show()



In [21]:
master = []
vmax = np.abs(v).max()
vmin = -vmax
for i in range(4):
    rescale = (v[:,i] - vmin) / (vmax - vmin)
    colors_hex = []
    for c in cm.RdBu_r(rescale):
        colors_hex.append(matplotlib.colors.to_hex(c))
    master.append(colors_hex)
colors_hex = np.array(master).T
df_v_color = pd.DataFrame(colors_hex, index=dataset['MRIQ_labels'])
df_v_color.to_csv('./reports/plots/wordcloud_colors.csv')
df_v.to_csv('./reports/plots/v.csv')

In [49]:
# word cloud colorbar 
set_text_size(10)

fig = plt.figure(figsize=(2, 0.7))
ax = fig.add_subplot(111)
cmap = cm.RdBu_r
norm = matplotlib.colors.Normalize(vmin=vmin, vmax=vmax)
cb1 = mpl.colorbar.ColorbarBase(ax, cmap=cmap,
                                norm=norm, orientation='horizontal')
plt.tight_layout()
plt.savefig('./reports/plots/wordcloud_cb.png', transparent=True, dpi=300)
plt.show()



In [44]:
# word cloud colorbar 
set_text_size(10)

fig = plt.figure(figsize=(4, 0.8))
ax = fig.add_subplot(111)
cmap = cm.RdBu_r
norm = matplotlib.colors.Normalize(vmin=-u_max, vmax=u_max)
cb1 = mpl.colorbar.ColorbarBase(ax, cmap=cmap,
                                norm=norm, orientation='horizontal')
plt.tight_layout()
plt.savefig('./reports/plots/fc_cb.png', transparent=True, dpi=300)
plt.show()


within network connectivity


In [ ]:
for k in range(n):
    df = pd.DataFrame(u_mat[k], columns=df_label.iloc[:, -2].values, index=df_label.iloc[:, -2].values)
    for net in yeo7_names:
        mat = df.loc[net, net].values
        label_idx = df_label.loc[:, 'Yeo7'] == net
        label_l = df_label.iloc[:, -3][label_idx]
        label_r = df_label.iloc[:, -1][label_idx]
        label = list((label_l + " - " +label_r).values)
        
        set_text_size(8)
        fig = plt.figure(figsize=(10, 10))
        ax = fig.add_subplot(111)

        max_val = np.abs(mat).max()
        
        m = ax.matshow(mat, vmax=max_val, vmin=-max_val, cmap='RdBu_r')
        ax.set_xticks(np.arange(mat.shape[1]))
        ax.set_yticks(np.arange(mat.shape[0]))
        ax.set_xticklabels(label, rotation='vertical')
        ax.set_yticklabels(label)
#         ax.set_frame_on(False)
#         ax.plot([-0.5, -0.5], [-0.5, 6.5], ls='-', c='.1')
#         ax.plot([-0.5, 6.5], [6.5, 6.5], ls='-', c='.1')
        ax.xaxis.set_ticks_position('bottom')
        ax.set_title('Component {} - {}'.format(k + 1, net))
        fig.colorbar(m)
        plt.savefig('./reports/plots/withinNetworks/com{}.png'.format(k + 1, net))

In [9]:
for k in range(n):
    df = pd.DataFrame(u_mat[k], columns=df_label.iloc[:, -2].values, index=df_label.iloc[:, -2].values)
    for net in yeo7_names:
        mat = df.loc[net, net].values
        label_idx = df_label.loc[:, 'Yeo7'] == net
        label_l = df_label.iloc[:, -3][label_idx]
        label_r = df_label.iloc[:, -1][label_idx]
        label = list((label_l + " - " +label_r).values)
        
        set_text_size(8)
        fig = plt.figure(figsize=(10, 10))
        ax = fig.add_subplot(111)

        max_val = np.abs(mat).max()
        
        m = ax.matshow(mat, vmax=max_val, vmin=-max_val, cmap='RdBu_r')
        ax.set_xticks(np.arange(mat.shape[1]))
        ax.set_yticks(np.arange(mat.shape[0]))
        ax.set_xticklabels(label, rotation='vertical')
        ax.set_yticklabels(label)
#         ax.set_frame_on(False)
#         ax.plot([-0.5, -0.5], [-0.5, 6.5], ls='-', c='.1')
#         ax.plot([-0.5, 6.5], [6.5, 6.5], ls='-', c='.1')
        ax.xaxis.set_ticks_position('bottom')
        ax.set_title('Component {} - {}'.format(k + 1, net))
        fig.colorbar(m)
        plt.savefig('./reports/plots/withinNetworks/com{}_{}.png'.format(k + 1, net))