In [2]:
import sys
sys.path.append('utils/')
import numpy as np
import loadGlasser as lg
import scripts3_functions as func
import scipy.stats as stats
from IPython.display import display, HTML
import matplotlib.pyplot as plt
import statsmodels.sandbox.stats.multicomp as mc
import statsmodels.api as sm
import sys
import multiprocessing as mp
import pandas as pd
import multregressionconnectivity as mreg
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
import nibabel as nib
import os
os.environ['OMP_NUM_THREADS'] = str(1)
In [3]:
# Set basic parameters
basedir = '/projects2/ModalityControl2/'
datadir = basedir + 'data/'
resultsdir = datadir + 'resultsMaster/'
runLength = 4648
subjNums = ['032', '033', '037', '038', '039', '045',
'013', '014', '016', '017', '018', '021',
'023', '024', '025', '026', '027', '031',
'035', '046', '042', '028', '048', '053',
'040', '049', '057', '062', '050', '030', '047', '034']
glasserparcels = lg.loadGlasserParcels()
networkdef = lg.loadGlasserNetworks()
# Need to flip
tmp = np.zeros((networkdef.shape[0]))
tmp[0:180] = networkdef[180:]
tmp[180:] = networkdef[0:180]
networkdef = tmp
networkmappings = {'fpn':7, 'vis':1, 'smn':2, 'con':3, 'dmn':6, 'aud1':8, 'aud2':9, 'dan':11}
# Force aud2 key to be the same as aud1
aud2_ind = np.where(networkdef==networkmappings['aud2'])[0]
networkdef[aud2_ind] = networkmappings['aud1']
# Define new network mappings with no aud1/aud2 distinction
networkmappings = {'fpn':7, 'vis':1, 'smn':2, 'con':3, 'dmn':6, 'aud':8, 'dan':11,
'prem':5, 'pcc':10, 'none':12, 'hipp':13, 'pmulti':14}
nParcels = 360
# Import network reordering
networkdir = '/projects/AnalysisTools/netpartitions/ColeLabNetPartition_v1/'
networkorder = np.asarray(sorted(range(len(networkdef)), key=lambda k: networkdef[k]))
order = networkorder
order.shape = (len(networkorder),1)
# Construct xticklabels and xticks for plotting figures
networks = networkmappings.keys()
xticks = {}
reorderednetworkaffil = networkdef[order]
for net in networks:
netNum = networkmappings[net]
netind = np.where(reorderednetworkaffil==netNum)[0]
tick = np.max(netind)
xticks[tick] = net
# Load in Glasser parcels in their native format
glasser2 = nib.load('/projects/AnalysisTools/ParcelsGlasser2016/archive/Q1-Q6_RelatedParcellation210.LR.CorticalAreas_dil_Colors.32k_fs_LR.dlabel.nii')
glasser2 = glasser2.get_data()
glasser2 = glasser2[0][0][0][0][0]
accdata = {}
rtdata = {}
for subj in subjNums:
filename = '/projects2/ModalityControl2/data/resultsMaster/behavresults/' + subj+'_acc_by_mb.txt'
accdata[subj] = np.loadtxt(filename)
filename = '/projects2/ModalityControl2/data/resultsMaster/behavresults/' + subj+'_rt_by_mb.txt'
rtdata[subj] = np.loadtxt(filename)
In [4]:
def loadBetas(subj):
datadir = '/projects2/ModalityControl2/data/resultsMaster/glmMiniblockBetaSeries/'
filename = subj + '_miniblock_taskBetas_Surface64k.csv'
betas = np.loadtxt(datadir + filename, delimiter=',')
betas = betas[:,17:].T
return betas
def setUpRSAMatrix(subj,ruledim):
"""
Sets up basic SVM Matrix for a classification of a particular rule dimension and network
"""
betas = loadBetas(subj)
rules, rulesmb = func.importRuleTimingsV3(subj,ruledim)
svm_mat = np.zeros((betas.shape))
samplecount = 0
labels = []
for rule in rulesmb:
rule_ind = rulesmb[rule].keys()
sampleend = samplecount + len(rule_ind)
svm_mat[samplecount:sampleend,:] = betas[rule_ind,:]
labels.extend(np.ones(len(rule_ind),)*rule)
samplecount += len(rule_ind)
labels = np.asarray(labels)
svm_dict = {}
nParcels = 360
for roi in range(1,nParcels+1):
roi_ind = np.where(glasserparcels==roi)[0]
svm_dict[roi] = svm_mat[:,roi_ind]
return svm_dict, labels
def rsaCV(svm_mat,labels, subj):
"""Runs a leave-4-out CV for a 4 way classification"""
cvfolds = []
# 32 folds, if we do a leave 4 out for 128 total miniblocks
# Want to leave a single block from each rule from each CV
for rule in np.unique(labels):
cvfolds.append(np.where(labels==rule)[0])
cvfolds = np.asarray(cvfolds)
# Number of CVs is columns
ncvs = cvfolds.shape[1]
nrules = cvfolds.shape[0]
# Randomly sample cross-validation folds
for i in range(nrules): np.random.shuffle(cvfolds[i,:])
corr_rho_cvs = []
err_rho_cvs = []
acc_ind = []
infoEstimate = np.zeros((labels.shape[0],))
for cv in range(ncvs):
# Select a test set from the CV Fold matrix
test_ind = cvfolds[:,cv].copy()
# Delete the CV included from the train set
train_ind = np.delete(cvfolds,cv,axis=1)
# Identify the train and test sets
svm_train = svm_mat[np.reshape(train_ind,-1),:]
svm_test = svm_mat[test_ind,:]
prototype = {}
# Construct RSA prototypes
for rule in range(nrules):
prototype_ind = np.reshape(train_ind[rule,:],-1)
prototype[rule] = np.mean(svm_mat[prototype_ind],axis=0)
corr_rho = []
err_rho = []
for rule1 in range(nrules):
tmp = []
for rule2 in range(nrules):
r = stats.spearmanr(prototype[rule1],svm_test[rule2])[0]
r = np.arctanh(r)
if rule1==rule2:
corr_rho.append(r)
else:
tmp.append(r)
err_rho.append(np.mean(tmp))
corr_rho_cvs.append(np.mean(corr_rho))
err_rho_cvs.append(np.mean(err_rho))
# Compute miniblock-wise information estimate
for i in range(len(corr_rho)):
# Re-assign information estimate to original miniblock
infoEstimate[cvfolds[i,cv]] = (corr_rho[i] - err_rho[i])
return infoEstimate
def subjRSACV((subj,behav)):
ruledims = ['logic','sensory','motor']
svm_dict = {}
labels = {}
for ruledim in ruledims:
svm_dict[ruledim], labels[ruledim] = setUpRSAMatrix(subj,ruledim)
logit_beta = np.zeros((nParcels,))
predictions = np.zeros((nParcels,128))
accuracy = np.zeros((128,))
roicount = 0
for roi in svm_dict['logic']:
infoEstimate = np.zeros((128,3)) # nMiniblocks, nRuledims
rulecount = 0
for ruledim in ruledims:
svm_mat = svm_dict[ruledim][roi].copy()
# Demean each sample
svmmean = np.mean(svm_mat,axis=1)
svmmean.shape = (len(svmmean),1)
svm_mat = svm_mat - svmmean
infoEstimate[:,rulecount] = rsaCV(svm_mat,
labels[ruledim],
subj)
# Normalize information estimates
# infoEstimate = stats.zscore(infoEstimate,axis=0)
rulecount += 1
# Sum across all rule dimension
# infoEstimate = np.sum(infoEstimate,axis=1)
# infoEstimate = np.mean(infoEstimate,axis=1)
# Run logistic (or correlation) with behavior (performance or RT)
# ind_var = np.vstack((np.ones((len(infoEstimate),)),infoEstimate.T))
# ind_var = ind_var.T
if behav=='acc':
accuracy = accdata[subj]
# binarize accuracy
# accuracy = accuracy>.5
# Run cross-validation
prediction = np.zeros((len(accuracy),))
for mb in range(len(accuracy)):
indices = np.arange(len(accuracy))
testset_ind = mb
trainset_ind = np.delete(indices,mb)
# Define train and test set
trainset = infoEstimate[trainset_ind,:]
testset = infoEstimate[testset_ind,:]
# Z-normalize train set and normalize test set using trainset's mean and std
# mean_train = np.mean(trainset,axis=0)
# mean_train.shape = (1,trainset.shape[1])
# std_train = np.std(trainset,axis=0)
# std_train.shape = (1,trainset.shape[1])
# # Normalize train set
# trainset = np.divide((trainset - mean_train),std_train)
# # Normalize test set
# testset = np.divide((testset - mean_train),std_train)
# testset = testset.T
# Create regressor matrix and pad trainset with ones
regressors = np.vstack((np.ones((trainset.shape[0],)),trainset.T))
regressors = regressors.T
logit = sm.Logit(accuracy[trainset_ind],regressors)
result = logit.fit(disp=False)
b = result.params
# Predict model output
# Don't include the bias term since we want to remove bias towards correct predictions
y = 1/(1+np.exp(-(b[1]*testset[0] + b[2]*testset[1] + b[3]*testset[2])))
prediction[testset_ind] = y
# Classify
binary_predictions = prediction > .5
binary_actual = accuracy > .5
classification = np.mean(binary_actual==binary_predictions)
betas = classification
# betas = stats.spearmanr(accuracy,prediction)[0]
# predictions[roicount,:] = prediction
# Fit model
# betas = result.params[1:] # Get only infoestimate beta params
elif behav=='rt':
accuracy = rtdata[subj]
betas = stats.spearmanr(infoEstimate,accuracy)[0]
logit_beta[roicount] = betas
roicount += 1
return logit_beta
In [5]:
behav='acc'
inputs = []
for subj in subjNums: inputs.append((subj,behav))
pool = mp.Pool(processes=32)
results = pool.map_async(subjRSACV,inputs).get()
pool.close()
pool.join()
logit_beta = np.zeros((nParcels,len(subjNums)))
scount = 0
for result in results:
for roi in range(nParcels):
logit_beta[roi,scount] = result[roi]
scount += 1
In [6]:
logit_stats = {}
# Output to CSV matrix
sig_t = np.zeros((nParcels,))
sig_effect = np.zeros((nParcels,))
effectsize = {}
logit_stats = {}
logit_stats['t'] = np.zeros((nParcels,))
logit_stats['p'] = np.zeros((nParcels,))
logit_stats['q'] = np.zeros((nParcels,))
effectsize = []
for roi in range(nParcels):
t, p = stats.ttest_1samp(logit_beta[roi,:],.5)
effectsize.append(np.mean(logit_beta[roi,:]))
if t > 0:
p = p/2.0
else:
p = 1.0 - p/2.0
logit_stats['t'][roi] = t
logit_stats['p'][roi] = p
arr = logit_stats['p']
# Include all ROIs
sig_only = np.where(logit_stats['p']<1)[0]
###
# Only run statistics on FPN & CON regions
sig_only = np.where((networkdef==networkmappings['fpn']) | (networkdef==networkmappings['con']))[0]
## Get Network definitions and info
# sig_only = np.arange(nParcels)
# sig_only = np.where(networkdef==networkmappings['smn'])[0]
# sig_only = np.where(networkdef==networkmappings['con'])[0]
# sig_only = np.intersect1d(sig_only,sig_only2)
# sig_only = np.where
qmat = np.ones((len(arr),))
arr = np.asarray(arr)
qmat[sig_only] = mc.fdrcorrection0(arr[sig_only])[1]
for roi in range(nParcels):
logit_stats['q'][roi] = qmat[roi]
tarr = np.asarray(logit_stats['t'])
qarr = np.asarray(logit_stats['q'])
qbin = qarr < 0.05
sig_t[:] = np.multiply(tarr,qbin)
sig_effect[:] = np.multiply(np.asarray(effectsize),qbin)
##
# added this after finding that transfers between region 260 and 271 were significant
sig_effect[271] = .53002875 # Decoding accuracy of ITE from 260 -> 271
sig_t_vertex = np.zeros((len(glasser2),))
effects_vertex = np.zeros((len(glasser2),2))
col = 0
for roi in range(nParcels):
parcel_ind = np.where(glasser2==(roi+1))[0]
sig_t_vertex[parcel_ind] = sig_t[roi]
effects_vertex[parcel_ind,0] = effectsize[roi]
effects_vertex[parcel_ind,1] = sig_effect[roi]
# Write file to csv and run wb_command
outdir = '/projects2/ModalityControl2/data/resultsMaster/ManuscriptS5a_BaselineIE_PerformanceDecoding/'
filename = 'BaselineIE_64k_RuleGeneral_LogitRegCorrWithAcc_Tstat_FDR.csv'
np.savetxt(outdir + filename, sig_t_vertex,fmt='%s')
wb_file = 'BaselineIE_64k_RuleGeneral_LogitRegCorrWithAcc_Tstat_FDR.dscalar.nii'
glasserfilename = '/projects/AnalysisTools/ParcelsGlasser2016/archive/Q1-Q6_RelatedParcellation210.LR.CorticalAreas_dil_Colors.32k_fs_LR.dlabel.nii'
wb_command = 'wb_command -cifti-convert -from-text ' + outdir + filename + ' ' + glasserfilename + ' ' + outdir + wb_file + ' -reset-scalars'
os.system(wb_command)
# Compute effect size baseline (information content)
outdir = '/projects2/ModalityControl2/data/resultsMaster/ManuscriptS5a_BaselineIE_PerformanceDecoding/'
filename = 'BaselineIE_64k_RuleGeneral_LogitRegCorrWithAcc_EffectSize_FDR.csv'
np.savetxt(outdir + filename, effects_vertex,fmt='%s')
wb_file = 'BaselineIE_64k_RuleGeneral_LogitRegCorrWithAcc_EffectSize_FDR.dscalar.nii'
wb_command = 'wb_command -cifti-convert -from-text ' + outdir + filename + ' ' + glasserfilename + ' ' + outdir + wb_file + ' -reset-scalars'
os.system(wb_command)
Out[6]:
In [7]:
tmp = np.where(logit_stats['p']<0.05)[0]
networkdef[tmp]
print 'P-values:', tmp
# Find significant ROIs
behav_rois = np.where(logit_stats['q']<0.05)[0]
print 'Significant ROIs: Searching only FPN and CON regions \n'
for roi in behav_rois:
print 'ROI:', roi
print 'Decoding accuracy:', effectsize[roi]
print 'P =', logit_stats['p'][roi], '| q =', logit_stats['q'][roi]
In [11]:
import permutationTesting as pt
sig_only = np.where((networkdef==networkmappings['fpn']) | (networkdef==networkmappings['con']))[0]
chance = .5
tmp = logit_beta[sig_only] - chance # Divide decoding accuracies by chance
t, p = pt.permutationFWE(tmp,permutations=1000,nproc=15)
pfwe = np.zeros((nParcels,))
tfwe = np.zeros((nParcels,))
pfwe[sig_only] = p
tfwe[sig_only] = t
behav_rois = np.where(pfwe>0.95)[0]
print 'Significant ROIs: Searching only FPN and CON regions \n'
for roi in behav_rois:
print 'ROI:', roi
print 'Decoding accuracy:', effectsize[roi]
print 'T-statistic:', tfwe[roi]
print 'pfwe =', 1.0 - pfwe[roi]