Manuscript5 - Calculating information estimate for each brain-region using vertex-level activation patterns for each rule domain

Master code for Ito et al., 2017

Takuya Ito (takuya.ito@rutgers.edu)

In [30]:
import sys
sys.path.append('utils/')
import numpy as np
import scripts3_functions as func
import scipy.stats as stats
from IPython.display import display, HTML
import matplotlib.pyplot as plt
import statsmodels.sandbox.stats.multicomp as mc
import statsmodels.api as sm
import sys
import multiprocessing as mp
import pandas as pd
import multregressionconnectivity as mreg
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
import nibabel as nib
import os
import permutationTesting as pt

0.0 Basic parameters

In [3]:
# Set basic parameters
basedir = '/projects2/ModalityControl2/'
runLength = 4648

subjNums = ['032', '033', '037', '038', '039', '045',
'013', '014', '016', '017', '018', '021',
'023', '024', '025', '026', '027', '031',
'035', '046', '042', '028', '048', '053',
'040', '049', '057', '062', '050', '030', '047', '034']

# Organized as a 64k vector

nParcels = 360

# Load in Glasser parcels in their native format
# Note that this parcel file is actually flipped (across hemispheres), but it doesn't matter since we're using the same exact file to reconstruct the data
glasser2 = glasser2.get_data()
glasser2 = np.squeeze(glasser2)

pixdim[1,2,3] should be non-zero; setting 0 dims to 1

1.0 Load in vertex-wise betas across all miniblocks for all brain regions

1.1 Define some basic functions for RSA pipeline

In [33]:
betas = betas[:,17:].T
return betas

def setUpRSAMatrix(subj,ruledim):
"""
Sets up basic SVM Matrix for a classification of a particular rule dimension and network
"""

rules, rulesmb = func.importRuleTimingsV3(subj,ruledim)

svm_mat = np.zeros((betas.shape))
samplecount = 0
labels = []
for rule in rulesmb:
rule_ind = rulesmb[rule].keys()
sampleend = samplecount + len(rule_ind)
svm_mat[samplecount:sampleend,:] = betas[rule_ind,:]
labels.extend(np.ones(len(rule_ind),)*rule)
samplecount += len(rule_ind)

labels = np.asarray(labels)

svm_dict = {}
nParcels = 360
for roi in range(1,nParcels+1):
roi_ind = np.where(glasserparcels==roi)[0]
svm_dict[roi] = svm_mat[:,roi_ind]

return svm_dict, labels

def rsaCV(svm_mat,labels, subj):
"""Runs a leave-4-out CV for a 4 way  classification"""

cvfolds = []
# 32 folds, if we do a leave 4 out for 128 total miniblocks
# Want to leave a single block from each rule from each CV
for rule in np.unique(labels):
cvfolds.append(np.where(labels==rule)[0])
cvfolds = np.asarray(cvfolds)

# Number of CVs is columns
ncvs = cvfolds.shape[1]
nrules = cvfolds.shape[0]
# For each CV fold, make sure the fold is constructed randomly
for i in range(nrules): np.random.shuffle(cvfolds[i,:])

corr_rho_cvs = []
err_rho_cvs = []
acc_ind = []
infoEstimate = []
for cv in range(ncvs):
# Select a test set from the CV Fold matrix
test_ind = cvfolds[:,cv].copy()
# The accuracy array should be the same as test_idn
acc_ind.extend(cvfolds[:,cv].copy())
# Delete the CV included from the train set
train_ind = np.delete(cvfolds,cv,axis=1)

# Identify the train and test sets
svm_train = svm_mat[np.reshape(train_ind,-1),:]
svm_test = svm_mat[test_ind,:]

prototype = {}
# Construct RSA prototypes
for rule in range(nrules):
prototype_ind = np.reshape(train_ind[rule,:],-1)
prototype[rule] = np.mean(svm_mat[prototype_ind],axis=0)

corr_rho = []
err_rho = []
for rule1 in range(nrules):
tmp = []
for rule2 in range(nrules):
r = np.arctanh(stats.spearmanr(prototype[rule1],svm_test[rule2,:])[0])
if rule1==rule2:
corr_rho.append(r)
else:
tmp.append(r)
err_rho.append(np.mean(tmp))

corr_rho_cvs.append(np.mean(corr_rho))
err_rho_cvs.append(np.mean(err_rho))
# Compute miniblock-wise information estimate
for i in range(len(corr_rho)):
infoEstimate.append(corr_rho[i] - err_rho[i])

# independent var (constant terms + information estimate)
infoEstimate = np.asarray(infoEstimate)
ind_var = np.vstack((np.ones((len(infoEstimate),)),infoEstimate))
ind_var = ind_var.T

return np.mean(corr_rho_cvs), np.mean(err_rho_cvs), np.mean(infoEstimate)

def subjRSACV((subj,ruledim,behav)):
svm_dict, labels = setUpRSAMatrix(subj,ruledim)
corr_rhos = {}
err_rhos = {}
infoEstimate = {}
for roi in svm_dict:
svm_mat = svm_dict[roi].copy()
# Demean each sample
svmmean = np.mean(svm_mat,axis=1)
svmmean.shape = (len(svmmean),1)
svm_mat = svm_mat - svmmean

#         svm_mat = preprocessing.scale(svm_mat,axis=0)

corr_rhos[roi], err_rhos[roi], infoEstimate[roi] = rsaCV(svm_mat, labels, subj)

return corr_rhos, err_rhos, infoEstimate

2.0 - Estimate information estimates for all regions for all 3 rule domains

In [34]:
ruledims = ['logic','sensory','motor']
behav='acc'
corr_rhos = {}
err_rhos = {}
diff_rhos = {}
for ruledim in ruledims:
corr_rhos[ruledim] = {}
err_rhos[ruledim] = {}
diff_rhos[ruledim] = {}

print 'Running', ruledim

inputs = []
for subj in subjNums: inputs.append((subj,ruledim,behav))

#     pool = mp.Pool(processes=8)
pool = mp.Pool(processes=16)
results = pool.map_async(subjRSACV,inputs).get()
pool.close()
pool.join()

# Reorganize results
corr_rhos[ruledim] = np.zeros((nParcels,len(subjNums)))
err_rhos[ruledim] = np.zeros((nParcels,len(subjNums)))
diff_rhos[ruledim] = np.zeros((nParcels,len(subjNums)))

scount = 0
for result in results:
for roi in range(nParcels):
corr_rhos[ruledim][roi,scount] = result[0][roi+1]
err_rhos[ruledim][roi,scount] = result[1][roi+1]
diff_rhos[ruledim][roi,scount] = result[2][roi+1]
scount += 1

Running logic
Running sensory
Running motor

Save CSVs for baseline leave-4-out CV on information transfer estimate

In [35]:
outdir = '/projects2/ModalityControl2/data/resultsMaster/Manuscript5_BaselineRegionIE/'

for ruledim in ruledims:
filename = 'Regionwise_InformationEstimate_LeaveOneOut_' + ruledim + '.csv'
np.savetxt(outdir + filename, diff_rhos[ruledim], delimiter=',')

Run statistics (t-tests and multiple comparisons correction with FDR)

In [36]:
df_stats = {}
# Output to CSV matrix
sig_t = np.zeros((nParcels,len(ruledims)))
sig_effect = np.zeros((nParcels,len(ruledims)))
effectsize = {}
rulecount = 0
for ruledim in ruledims:
df_stats[ruledim] = {}
df_stats[ruledim]['t'] = np.zeros((nParcels,))
df_stats[ruledim]['p'] = np.zeros((nParcels,))
effectsize[ruledim] = np.zeros((nParcels,))
for roi in range(nParcels):
t, p = stats.ttest_1samp(diff_rhos[ruledim][roi,:], 0)
#         t, p = stats.ttest_rel(corr_rhos[ruledim][roi,:], err_rhos[ruledim][roi,:])

effectsize[ruledim][roi] = np.mean(diff_rhos[ruledim][roi,:])

ps = np.zeros(())
if t > 0:
p = p/2.0
else:
p = 1.0 - p/2.0
df_stats[ruledim]['t'][roi] = t
df_stats[ruledim]['p'][roi] = p

arr = df_stats[ruledim]['p']
df_stats[ruledim]['q'] = mc.fdrcorrection0(arr)[1]

qbin = df_stats[ruledim]['q'] < 0.05
sig_t[:,rulecount] = np.multiply(df_stats[ruledim]['t'],qbin)
sig_effect[:,rulecount] = np.multiply(effectsize[ruledim],qbin)

rulecount += 1

Map statistics and results to surface using workbench

In [38]:
sig_t_vertex = np.zeros((len(glasser2),len(ruledims)))
effects_vertex = np.zeros((len(glasser2),len(ruledims)))
effects_vertex_sig = np.zeros((len(glasser2),len(ruledims)))
col = 0
for cols in range(sig_t_vertex.shape[1]):
for roi in range(nParcels):
parcel_ind = np.where(glasser2==(roi+1))[0]
sig_t_vertex[parcel_ind,col] = sig_t[roi,col]
effects_vertex[parcel_ind,col] = effectsize[ruledims[col]][roi]
effects_vertex_sig[parcel_ind,col] = sig_effect[roi,col]
col += 1

# Write file to csv and run wb_command
outdir = '/projects2/ModalityControl2/data/resultsMaster/Manuscript5_BaselineRegionIE/'
filename = 'RegionwiseIE_FDRThresholded_Tstat.csv'
np.savetxt(outdir + filename, sig_t_vertex,fmt='%s')
wb_file = 'RegionwiseIE_FDRThresholded_Tstat.dscalar.nii'
glasserfilename = '/projects/AnalysisTools/ParcelsGlasser2016/archive/Q1-Q6_RelatedParcellation210.LR.CorticalAreas_dil_Colors.32k_fs_LR.dlabel.nii'
wb_command = 'wb_command -cifti-convert -from-text ' + outdir + filename + ' ' + glasserfilename + ' ' + outdir + wb_file + ' -reset-scalars'
os.system(wb_command)

# Compute effect size baseline (information content)
outdir = '/projects2/ModalityControl2/data/resultsMaster/Manuscript5_BaselineRegionIE/'
filename = 'RegionwiseIE_InformationEstimate.csv'
np.savetxt(outdir + filename, effects_vertex,fmt='%s')
wb_file = 'RegionwiseIE_InformationEstimate.dscalar.nii'
wb_command = 'wb_command -cifti-convert -from-text ' + outdir + filename + ' ' + glasserfilename + ' ' + outdir + wb_file + ' -reset-scalars'
os.system(wb_command)

# Compute Thresholded effect size baseline (information content)
outdir = '/projects2/ModalityControl2/data/resultsMaster/Manuscript5_BaselineRegionIE/'
filename = 'RegionwiseIE_FDRThresholded_InformationEstimate.csv'
np.savetxt(outdir + filename, effects_vertex_sig,fmt='%s')
wb_file = 'RegionwiseIE_FDRThresholded_InformationEstimate.dscalar.nii'
wb_command = 'wb_command -cifti-convert -from-text ' + outdir + filename + ' ' + glasserfilename + ' ' + outdir + wb_file + ' -reset-scalars'
os.system(wb_command)

Out[38]:
0

Run FWE Correction using permutation testing

In [51]:

fwe_Ts = np.zeros((nParcels,len(ruledims)))
fwe_Ps = np.zeros((nParcels,len(ruledims)))
rulecount = 0
for ruledim in ruledims:
t, p = pt.permutationFWE(diff_rhos[ruledim], nullmean=0, permutations=10000, nproc=15)
#     t, p = pt.permutationFWE(corr_rhos[ruledim] - err_rhos[ruledim],
#                              nullmean=0, permutations=1000, nproc=15)
fwe_Ts[:,rulecount] = t
fwe_Ps[:,rulecount] = p

rulecount += 1

In [52]:
# Compare t-values from permutation function and above

rulecount = 0
for ruledim in ruledims:
if np.sum(df_stats[ruledim]['t']==fwe_Ts[:,rulecount])==360:
print 'Correct t-values match up'
else:
print 'Error! Likely a bug in the code'
rulecount += 1

Correct t-values match up
Correct t-values match up
Correct t-values match up

Write out significant ROIs

In [53]:
fwe_Ps2 = (1.0000 - fwe_Ps) # One-tailed test on upper tail
sig_mat = fwe_Ps2 < 0.0500 # One-sided t-test (Only interested in values greater than 95% interval)
outdir = '/projects2/ModalityControl2/data/resultsMaster/Manuscript5_BaselineRegionIE/'
filename = 'FWE_corrected_pvals_allruledims.csv'
np.savetxt(outdir + filename, fwe_Ps, delimiter=',')

sig_t = np.zeros((nParcels,len(ruledims)))
sig_effect = np.zeros((nParcels,len(ruledims)))
rulecount = 0
for ruledim in ruledims:
sig_t[:,rulecount] = np.multiply(fwe_Ts[:,rulecount],sig_mat[:,rulecount])
sig_effect[:,rulecount] = np.multiply(effectsize[ruledim],sig_mat[:,rulecount])

# Read out statistics for manuscript
# Identify significant regions
sig_ind = sig_mat[:,rulecount] == True
nonsig_ind = sig_mat[:,rulecount] == False

print 'Average significant IE for', ruledim, ':', np.mean(effectsize[ruledim][sig_ind])
print 'Average significant T-stats for', ruledim, ':', np.mean(fwe_Ts[:,rulecount][sig_ind])
print 'Maximum significant p-value for', ruledim, ':', np.max(fwe_Ps2[:,rulecount][sig_ind])
print '----'
print 'Average nonsignificant IE for', ruledim, ':', np.mean(effectsize[ruledim][nonsig_ind])
print 'Average nonsignificant T-stats for', ruledim, ':', np.mean(fwe_Ts[:,rulecount][nonsig_ind])
print 'Minimum nonsignificant p-value for', ruledim, ':', np.min(fwe_Ps2[:,rulecount][nonsig_ind])
print '\n'
print '*****************'
rulecount += 1

Average significant IE for logic : 0.0228556752082
Average significant T-stats for logic : 5.24410223409
Maximum significant p-value for logic : 0.05
----
Average nonsignificant IE for logic : 0.0108887207719
Average nonsignificant T-stats for logic : 2.14023612418
Minimum nonsignificant p-value for logic : 0.0507

*****************
Average significant IE for sensory : 0.0213431959576
Average significant T-stats for sensory : 4.97298583963
Maximum significant p-value for sensory : 0.0458
----
Average nonsignificant IE for sensory : 0.0123727448245
Average nonsignificant T-stats for sensory : 2.28815520011
Minimum nonsignificant p-value for sensory : 0.0506

*****************
Average significant IE for motor : 0.0637508552622
Average significant T-stats for motor : 6.80359722659
Maximum significant p-value for motor : 0.0483
----
Average nonsignificant IE for motor : 0.00742946704093
Average nonsignificant T-stats for motor : 1.55104069333
Minimum nonsignificant p-value for motor : 0.0535

*****************

Map out FWE-corrected statistics/results to surface using workbench

In [54]:
sig_t_vertex = np.zeros((len(glasser2),len(ruledims)))
effects_vertex = np.zeros((len(glasser2),len(ruledims)))
effects_vertex_sig = np.zeros((len(glasser2),len(ruledims)))
col = 0
for cols in range(sig_t_vertex.shape[1]):
for roi in range(nParcels):
parcel_ind = np.where(glasser2==(roi+1))[0]
sig_t_vertex[parcel_ind,col] = sig_t[roi,col]
effects_vertex_sig[parcel_ind,col] = sig_effect[roi,col]
col += 1

# Write file to csv and run wb_command
outdir = '/projects2/ModalityControl2/data/resultsMaster/Manuscript5_BaselineRegionIE/'
filename = 'RegionwiseIE_FWERThresholded_Tstat.csv'
np.savetxt(outdir + filename, sig_t_vertex,fmt='%s')
wb_file = 'RegionwiseIE_FWERThresholded_Tstat.dscalar.nii'
glasserfilename = '/projects/AnalysisTools/ParcelsGlasser2016/archive/Q1-Q6_RelatedParcellation210.LR.CorticalAreas_dil_Colors.32k_fs_LR.dlabel.nii'
wb_command = 'wb_command -cifti-convert -from-text ' + outdir + filename + ' ' + glasserfilename + ' ' + outdir + wb_file + ' -reset-scalars'
os.system(wb_command)

# # Compute effect size baseline (information content)
# outdir = '/projects2/ModalityControl2/data/resultsMaster/Manuscript5_BaselineRegionIE/'
# filename = 'RegionwiseIE_InformationEstimate.csv'
# np.savetxt(outdir + filename, effects_vertex,fmt='%s')
# wb_file = 'RegionwiseIE_InformationEstimate.dscalar.nii'
# wb_command = 'wb_command -cifti-convert -from-text ' + outdir + filename + ' ' + glasserfilename + ' ' + outdir + wb_file + ' -reset-scalars'
# os.system(wb_command)

# Compute Thresholded effect size baseline (information content)
outdir = '/projects2/ModalityControl2/data/resultsMaster/Manuscript5_BaselineRegionIE/'
filename = 'RegionwiseIE_FWERThresholded_InformationEstimate.csv'
np.savetxt(outdir + filename, effects_vertex_sig,fmt='%s')
wb_file = 'RegionwiseIE_FWERThresholded_InformationEstimate.dscalar.nii'
wb_command = 'wb_command -cifti-convert -from-text ' + outdir + filename + ' ' + glasserfilename + ' ' + outdir + wb_file + ' -reset-scalars'
os.system(wb_command)

Out[54]:
0