ManuscriptS5a - Predicting task performance using information estimates (prior to information transfer mapping)

Analysis statistic in text, but contributes to analysis in Supplementary Figure 5

Master code for Ito et al., 2017

Takuya Ito (takuya.ito@rutgers.edu)


In [2]:
import sys
sys.path.append('utils/')
import numpy as np
import loadGlasser as lg
import scripts3_functions as func
import scipy.stats as stats
from IPython.display import display, HTML
import matplotlib.pyplot as plt
import statsmodels.sandbox.stats.multicomp as mc
import statsmodels.api as sm
import sys
import multiprocessing as mp
import pandas as pd
import multregressionconnectivity as mreg
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
import nibabel as nib
import os
os.environ['OMP_NUM_THREADS'] = str(1)

0.0 Basic parameters


In [3]:
# Set basic parameters
basedir = '/projects2/ModalityControl2/'
datadir = basedir + 'data/'
resultsdir = datadir + 'resultsMaster/'
runLength = 4648

subjNums = ['032', '033', '037', '038', '039', '045', 
            '013', '014', '016', '017', '018', '021', 
            '023', '024', '025', '026', '027', '031', 
            '035', '046', '042', '028', '048', '053', 
            '040', '049', '057', '062', '050', '030', '047', '034']

glasserparcels = lg.loadGlasserParcels()
networkdef = lg.loadGlasserNetworks()
# Need to flip
tmp = np.zeros((networkdef.shape[0]))
tmp[0:180] = networkdef[180:]
tmp[180:] = networkdef[0:180]
networkdef = tmp

networkmappings = {'fpn':7, 'vis':1, 'smn':2, 'con':3, 'dmn':6, 'aud1':8, 'aud2':9, 'dan':11}
# Force aud2 key to be the same as aud1
aud2_ind = np.where(networkdef==networkmappings['aud2'])[0]
networkdef[aud2_ind] = networkmappings['aud1']
# Define new network mappings with no aud1/aud2 distinction
networkmappings = {'fpn':7, 'vis':1, 'smn':2, 'con':3, 'dmn':6, 'aud':8, 'dan':11,
                   'prem':5, 'pcc':10, 'none':12, 'hipp':13, 'pmulti':14}

nParcels = 360

# Import network reordering
networkdir = '/projects/AnalysisTools/netpartitions/ColeLabNetPartition_v1/'
networkorder = np.asarray(sorted(range(len(networkdef)), key=lambda k: networkdef[k]))
order = networkorder
order.shape = (len(networkorder),1)

# Construct xticklabels and xticks for plotting figures
networks = networkmappings.keys()
xticks = {}
reorderednetworkaffil = networkdef[order]
for net in networks:
    netNum = networkmappings[net]
    netind = np.where(reorderednetworkaffil==netNum)[0]
    tick = np.max(netind)
    xticks[tick] = net
    
# Load in Glasser parcels in their native format
glasser2 = nib.load('/projects/AnalysisTools/ParcelsGlasser2016/archive/Q1-Q6_RelatedParcellation210.LR.CorticalAreas_dil_Colors.32k_fs_LR.dlabel.nii')
glasser2 = glasser2.get_data()
glasser2 = glasser2[0][0][0][0][0]

accdata = {}
rtdata = {}
for subj in subjNums:
    filename = '/projects2/ModalityControl2/data/resultsMaster/behavresults/' + subj+'_acc_by_mb.txt'
    accdata[subj] = np.loadtxt(filename)
    filename = '/projects2/ModalityControl2/data/resultsMaster/behavresults/' + subj+'_rt_by_mb.txt'
    rtdata[subj] = np.loadtxt(filename)


pixdim[1,2,3] should be non-zero; setting 0 dims to 1

1.0 Load in preprocessed vertex-wise betas across all miniblocks

1.1 Define some basic functions for RSA pipeline


In [4]:
def loadBetas(subj):
    datadir = '/projects2/ModalityControl2/data/resultsMaster/glmMiniblockBetaSeries/'
    filename = subj + '_miniblock_taskBetas_Surface64k.csv'
    betas = np.loadtxt(datadir + filename, delimiter=',')
    betas = betas[:,17:].T
    return betas


def setUpRSAMatrix(subj,ruledim):
    """
    Sets up basic SVM Matrix for a classification of a particular rule dimension and network
    """
    
    betas = loadBetas(subj)
    rules, rulesmb = func.importRuleTimingsV3(subj,ruledim)
    
    svm_mat = np.zeros((betas.shape))
    samplecount = 0
    labels = []
    for rule in rulesmb:
        rule_ind = rulesmb[rule].keys()
        sampleend = samplecount + len(rule_ind)
        svm_mat[samplecount:sampleend,:] = betas[rule_ind,:]
        labels.extend(np.ones(len(rule_ind),)*rule)
        samplecount += len(rule_ind)
        
    labels = np.asarray(labels)
    
    svm_dict = {}
    nParcels = 360
    for roi in range(1,nParcels+1):
        roi_ind = np.where(glasserparcels==roi)[0]
        svm_dict[roi] = svm_mat[:,roi_ind]
    
    return svm_dict, labels

def rsaCV(svm_mat,labels, subj):
    """Runs a leave-4-out CV for a 4 way  classification"""
        
    cvfolds = []
    # 32 folds, if we do a leave 4 out for 128 total miniblocks
    # Want to leave a single block from each rule from each CV
    for rule in np.unique(labels):
        cvfolds.append(np.where(labels==rule)[0])
    cvfolds = np.asarray(cvfolds)
    
    # Number of CVs is columns
    ncvs = cvfolds.shape[1]
    nrules = cvfolds.shape[0]
    # Randomly sample cross-validation folds
    for i in range(nrules): np.random.shuffle(cvfolds[i,:])
    
    corr_rho_cvs = []
    err_rho_cvs = []
    acc_ind = []
    infoEstimate = np.zeros((labels.shape[0],))
    for cv in range(ncvs):
        # Select a test set from the CV Fold matrix
        test_ind = cvfolds[:,cv].copy()
        # Delete the CV included from the train set
        train_ind = np.delete(cvfolds,cv,axis=1)
        
        # Identify the train and test sets
        svm_train = svm_mat[np.reshape(train_ind,-1),:]
        svm_test = svm_mat[test_ind,:]

        
        prototype = {}
        # Construct RSA prototypes
        for rule in range(nrules):
            prototype_ind = np.reshape(train_ind[rule,:],-1)
            prototype[rule] = np.mean(svm_mat[prototype_ind],axis=0)
            
        corr_rho = []
        err_rho = []
        for rule1 in range(nrules):
            tmp = []
            for rule2 in range(nrules):
                r = stats.spearmanr(prototype[rule1],svm_test[rule2])[0]
                r = np.arctanh(r)
                if rule1==rule2: 
                    corr_rho.append(r)
                else:
                    tmp.append(r)
            err_rho.append(np.mean(tmp))

        corr_rho_cvs.append(np.mean(corr_rho))
        err_rho_cvs.append(np.mean(err_rho))
        # Compute miniblock-wise information estimate
        for i in range(len(corr_rho)):
            # Re-assign information estimate to original miniblock
            infoEstimate[cvfolds[i,cv]] = (corr_rho[i] - err_rho[i])

    return infoEstimate
        
    
def subjRSACV((subj,behav)):
    ruledims = ['logic','sensory','motor']
    svm_dict = {}
    labels = {}
    for ruledim in ruledims:
        svm_dict[ruledim], labels[ruledim] = setUpRSAMatrix(subj,ruledim)

    logit_beta = np.zeros((nParcels,))
    predictions = np.zeros((nParcels,128))
    accuracy = np.zeros((128,))
    roicount = 0
    for roi in svm_dict['logic']:
        infoEstimate = np.zeros((128,3)) # nMiniblocks, nRuledims
        rulecount = 0
        for ruledim in ruledims:
            svm_mat = svm_dict[ruledim][roi].copy()
            # Demean each sample
            svmmean = np.mean(svm_mat,axis=1)
            svmmean.shape = (len(svmmean),1)
            svm_mat = svm_mat - svmmean


            infoEstimate[:,rulecount] = rsaCV(svm_mat, 
                                              labels[ruledim], 
                                              subj)
            
            # Normalize information estimates
#             infoEstimate = stats.zscore(infoEstimate,axis=0)
            
            rulecount += 1
        
        # Sum across all rule dimension
#         infoEstimate = np.sum(infoEstimate,axis=1)
#         infoEstimate = np.mean(infoEstimate,axis=1)
        
        # Run logistic (or correlation) with behavior (performance or RT)
#         ind_var = np.vstack((np.ones((len(infoEstimate),)),infoEstimate.T))
#         ind_var = ind_var.T
        
        if behav=='acc':
            accuracy = accdata[subj]
            # binarize accuracy
#             accuracy = accuracy>.5
            # Run cross-validation
            prediction = np.zeros((len(accuracy),))
            for mb in range(len(accuracy)):
                indices = np.arange(len(accuracy))
                testset_ind = mb
                trainset_ind = np.delete(indices,mb)
                
                # Define train and test set
                trainset = infoEstimate[trainset_ind,:]
                testset = infoEstimate[testset_ind,:]

                # Z-normalize train set and normalize test set using trainset's mean and std
#                 mean_train = np.mean(trainset,axis=0)
#                 mean_train.shape = (1,trainset.shape[1])
#                 std_train = np.std(trainset,axis=0)
#                 std_train.shape = (1,trainset.shape[1])
                
#                 # Normalize train set
#                 trainset = np.divide((trainset - mean_train),std_train)
#                 # Normalize test set
#                 testset = np.divide((testset - mean_train),std_train)
#                 testset = testset.T
                
                # Create regressor matrix and pad trainset with ones
                regressors = np.vstack((np.ones((trainset.shape[0],)),trainset.T))
                regressors = regressors.T
                logit = sm.Logit(accuracy[trainset_ind],regressors)
                result = logit.fit(disp=False)
                b = result.params
                
                # Predict model output
                # Don't include the bias term since we want to remove bias towards correct predictions
                y = 1/(1+np.exp(-(b[1]*testset[0] + b[2]*testset[1] + b[3]*testset[2])))
                
                prediction[testset_ind] = y
        
            # Classify
            binary_predictions = prediction > .5
            binary_actual = accuracy > .5
            classification = np.mean(binary_actual==binary_predictions)
            betas = classification
#             betas = stats.spearmanr(accuracy,prediction)[0]
#             predictions[roicount,:] = prediction
            # Fit model
#             betas = result.params[1:] # Get only infoestimate beta params

        elif behav=='rt':
            accuracy = rtdata[subj]
            betas = stats.spearmanr(infoEstimate,accuracy)[0]
        
        logit_beta[roicount] = betas
        
        roicount += 1
    
    return logit_beta

2.0 - Run logistic regressions comparing accuracy to information estimates


In [5]:
behav='acc'

inputs = []
for subj in subjNums: inputs.append((subj,behav))

pool = mp.Pool(processes=32)
results = pool.map_async(subjRSACV,inputs).get()
pool.close()
pool.join()

logit_beta = np.zeros((nParcels,len(subjNums)))
scount = 0
for result in results:
    for roi in range(nParcels):
        logit_beta[roi,scount] = result[roi]
        
    scount += 1

Now visualize correlation with behavior (logistic regression results) for only regions that have significant information estimates


In [6]:
logit_stats = {}
# Output to CSV matrix
sig_t = np.zeros((nParcels,))
sig_effect = np.zeros((nParcels,))
effectsize = {}
logit_stats = {}
logit_stats['t'] = np.zeros((nParcels,))
logit_stats['p'] = np.zeros((nParcels,))
logit_stats['q'] = np.zeros((nParcels,))
effectsize = []
for roi in range(nParcels):
    t, p = stats.ttest_1samp(logit_beta[roi,:],.5)

    effectsize.append(np.mean(logit_beta[roi,:]))

    if t > 0:
        p = p/2.0
    else:
        p = 1.0 - p/2.0
    logit_stats['t'][roi] = t
    logit_stats['p'][roi] = p

arr = logit_stats['p']
# Include all ROIs
sig_only = np.where(logit_stats['p']<1)[0]

### 
# Only run statistics on FPN & CON regions
sig_only = np.where((networkdef==networkmappings['fpn']) | (networkdef==networkmappings['con']))[0]

## Get Network definitions and info
# sig_only = np.arange(nParcels)
# sig_only = np.where(networkdef==networkmappings['smn'])[0]
# sig_only = np.where(networkdef==networkmappings['con'])[0]
#     sig_only = np.intersect1d(sig_only,sig_only2)
#     sig_only = np.where
qmat = np.ones((len(arr),))
arr = np.asarray(arr)
qmat[sig_only] = mc.fdrcorrection0(arr[sig_only])[1]
for roi in range(nParcels):
    logit_stats['q'][roi] = qmat[roi]

tarr = np.asarray(logit_stats['t'])
qarr = np.asarray(logit_stats['q'])

qbin = qarr < 0.05
sig_t[:] = np.multiply(tarr,qbin)
sig_effect[:] = np.multiply(np.asarray(effectsize),qbin)
##
# added this after finding that transfers between region 260 and 271 were significant
sig_effect[271] = .53002875 # Decoding accuracy of ITE from 260 -> 271

sig_t_vertex = np.zeros((len(glasser2),))
effects_vertex = np.zeros((len(glasser2),2))
col = 0
for roi in range(nParcels):
    parcel_ind = np.where(glasser2==(roi+1))[0]
    sig_t_vertex[parcel_ind] = sig_t[roi]
    effects_vertex[parcel_ind,0] = effectsize[roi]
    effects_vertex[parcel_ind,1] = sig_effect[roi]


# Write file to csv and run wb_command
outdir = '/projects2/ModalityControl2/data/resultsMaster/ManuscriptS5a_BaselineIE_PerformanceDecoding/'
filename = 'BaselineIE_64k_RuleGeneral_LogitRegCorrWithAcc_Tstat_FDR.csv'
np.savetxt(outdir + filename, sig_t_vertex,fmt='%s')
wb_file = 'BaselineIE_64k_RuleGeneral_LogitRegCorrWithAcc_Tstat_FDR.dscalar.nii'
glasserfilename = '/projects/AnalysisTools/ParcelsGlasser2016/archive/Q1-Q6_RelatedParcellation210.LR.CorticalAreas_dil_Colors.32k_fs_LR.dlabel.nii'
wb_command = 'wb_command -cifti-convert -from-text ' + outdir + filename + ' ' + glasserfilename + ' ' + outdir + wb_file + ' -reset-scalars'
os.system(wb_command)

# Compute effect size baseline (information content)
outdir = '/projects2/ModalityControl2/data/resultsMaster/ManuscriptS5a_BaselineIE_PerformanceDecoding/'
filename = 'BaselineIE_64k_RuleGeneral_LogitRegCorrWithAcc_EffectSize_FDR.csv'
np.savetxt(outdir + filename, effects_vertex,fmt='%s')
wb_file = 'BaselineIE_64k_RuleGeneral_LogitRegCorrWithAcc_EffectSize_FDR.dscalar.nii'
wb_command = 'wb_command -cifti-convert -from-text ' + outdir + filename + ' ' + glasserfilename + ' ' + outdir + wb_file + ' -reset-scalars'
os.system(wb_command)


Out[6]:
0

In [7]:
tmp = np.where(logit_stats['p']<0.05)[0]
networkdef[tmp]
print 'P-values:', tmp
# Find significant ROIs
behav_rois = np.where(logit_stats['q']<0.05)[0]
print 'Significant ROIs: Searching only FPN and CON regions \n'
for roi in behav_rois:
    print 'ROI:', roi
    print 'Decoding accuracy:', effectsize[roi]
    print 'P =', logit_stats['p'][roi], '| q =', logit_stats['q'][roi]


P-values: [ 18  39  40  45  67  68  69  83  85  86  88  90  91  94  96  97 107 114
 122 129 148 149 165 176 178 182 190 195 205 207 209 219 220 224 226 230
 231 240 243 247 248 252 254 260 261 278 288 302 309 333 334 346 355 357]
Significant ROIs: Searching only FPN and CON regions 

ROI: 260
Decoding accuracy: 0.5263671875
P = 0.000196346966794 | q = 0.0188493088123

Run FWE correction (using Permutation tests) for statistical significance


In [11]:
import permutationTesting as pt
sig_only = np.where((networkdef==networkmappings['fpn']) | (networkdef==networkmappings['con']))[0]
chance = .5
tmp = logit_beta[sig_only] - chance # Divide decoding accuracies by chance
t, p = pt.permutationFWE(tmp,permutations=1000,nproc=15)
pfwe = np.zeros((nParcels,))
tfwe = np.zeros((nParcels,))
pfwe[sig_only] = p
tfwe[sig_only] = t

behav_rois = np.where(pfwe>0.95)[0]
print 'Significant ROIs: Searching only FPN and CON regions \n'
for roi in behav_rois:
    print 'ROI:', roi
    print 'Decoding accuracy:', effectsize[roi]
    print 'T-statistic:', tfwe[roi]
    print 'pfwe =', 1.0 - pfwe[roi]


Significant ROIs: Searching only FPN and CON regions 

ROI: 260
Decoding accuracy: 0.5263671875
T-statistic: 3.97397192116
pfwe = 0.016