Manuscript5 - Calculating information estimate for each brain-region using vertex-level activation patterns for each rule domain

Analysis for Fig. 5

Master code for Ito et al., 2017

Takuya Ito (takuya.ito@rutgers.edu)


In [30]:
import sys
sys.path.append('utils/')
import numpy as np
import loadGlasser as lg
import scripts3_functions as func
import scipy.stats as stats
from IPython.display import display, HTML
import matplotlib.pyplot as plt
import statsmodels.sandbox.stats.multicomp as mc
import statsmodels.api as sm
import sys
import multiprocessing as mp
import pandas as pd
import multregressionconnectivity as mreg
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
import nibabel as nib
import os
os.environ['OMP_NUM_THREADS'] = str(1)
import permutationTesting as pt

0.0 Basic parameters


In [3]:
# Set basic parameters
basedir = '/projects2/ModalityControl2/'
datadir = basedir + 'data/'
resultsdir = datadir + 'resultsMaster/'
runLength = 4648

subjNums = ['032', '033', '037', '038', '039', '045', 
            '013', '014', '016', '017', '018', '021', 
            '023', '024', '025', '026', '027', '031', 
            '035', '046', '042', '028', '048', '053', 
            '040', '049', '057', '062', '050', '030', '047', '034']

# Organized as a 64k vector
glasserparcels = lg.loadGlasserParcels()

nParcels = 360

# Load in Glasser parcels in their native format
# Note that this parcel file is actually flipped (across hemispheres), but it doesn't matter since we're using the same exact file to reconstruct the data
glasser2 = nib.load('/projects/AnalysisTools/ParcelsGlasser2016/archive/Q1-Q6_RelatedParcellation210.LR.CorticalAreas_dil_Colors.32k_fs_LR.dlabel.nii')
glasser2 = glasser2.get_data()
glasser2 = np.squeeze(glasser2)


pixdim[1,2,3] should be non-zero; setting 0 dims to 1

1.0 Load in vertex-wise betas across all miniblocks for all brain regions

1.1 Define some basic functions for RSA pipeline


In [33]:
def loadBetas(subj):
    datadir = '/projects2/ModalityControl2/data/resultsMaster/glmMiniblockBetaSeries/'
    filename = subj + '_miniblock_taskBetas_Surface64k.csv'
    betas = np.loadtxt(datadir + filename, delimiter=',')
    betas = betas[:,17:].T
    return betas


def setUpRSAMatrix(subj,ruledim):
    """
    Sets up basic SVM Matrix for a classification of a particular rule dimension and network
    """
    
    betas = loadBetas(subj)
    rules, rulesmb = func.importRuleTimingsV3(subj,ruledim)
    
    svm_mat = np.zeros((betas.shape))
    samplecount = 0
    labels = []
    for rule in rulesmb:
        rule_ind = rulesmb[rule].keys()
        sampleend = samplecount + len(rule_ind)
        svm_mat[samplecount:sampleend,:] = betas[rule_ind,:]
        labels.extend(np.ones(len(rule_ind),)*rule)
        samplecount += len(rule_ind)
        
    labels = np.asarray(labels)
    
    svm_dict = {}
    nParcels = 360
    for roi in range(1,nParcels+1):
        roi_ind = np.where(glasserparcels==roi)[0]
        svm_dict[roi] = svm_mat[:,roi_ind]
    
    return svm_dict, labels

def rsaCV(svm_mat,labels, subj):
    """Runs a leave-4-out CV for a 4 way  classification"""
        
    cvfolds = []
    # 32 folds, if we do a leave 4 out for 128 total miniblocks
    # Want to leave a single block from each rule from each CV
    for rule in np.unique(labels):
        cvfolds.append(np.where(labels==rule)[0])
    cvfolds = np.asarray(cvfolds)
    
    # Number of CVs is columns
    ncvs = cvfolds.shape[1]
    nrules = cvfolds.shape[0]
    # For each CV fold, make sure the fold is constructed randomly
    for i in range(nrules): np.random.shuffle(cvfolds[i,:])

    corr_rho_cvs = []
    err_rho_cvs = []
    acc_ind = []
    infoEstimate = []
    for cv in range(ncvs):
        # Select a test set from the CV Fold matrix
        test_ind = cvfolds[:,cv].copy()
        # The accuracy array should be the same as test_idn
        acc_ind.extend(cvfolds[:,cv].copy())
        # Delete the CV included from the train set
        train_ind = np.delete(cvfolds,cv,axis=1)
        
        # Identify the train and test sets
        svm_train = svm_mat[np.reshape(train_ind,-1),:]
        svm_test = svm_mat[test_ind,:]
        
        prototype = {}
        # Construct RSA prototypes
        for rule in range(nrules):
            prototype_ind = np.reshape(train_ind[rule,:],-1)
            prototype[rule] = np.mean(svm_mat[prototype_ind],axis=0)
            
        corr_rho = []
        err_rho = []
        for rule1 in range(nrules):
            tmp = []
            for rule2 in range(nrules):
                r = np.arctanh(stats.spearmanr(prototype[rule1],svm_test[rule2,:])[0])
                if rule1==rule2: 
                    corr_rho.append(r)
                else:
                    tmp.append(r)
            err_rho.append(np.mean(tmp))

        corr_rho_cvs.append(np.mean(corr_rho))
        err_rho_cvs.append(np.mean(err_rho))
        # Compute miniblock-wise information estimate
        for i in range(len(corr_rho)):
            infoEstimate.append(corr_rho[i] - err_rho[i])

    # independent var (constant terms + information estimate)
    infoEstimate = np.asarray(infoEstimate)
    ind_var = np.vstack((np.ones((len(infoEstimate),)),infoEstimate))
    ind_var = ind_var.T


    return np.mean(corr_rho_cvs), np.mean(err_rho_cvs), np.mean(infoEstimate)
        
    
def subjRSACV((subj,ruledim,behav)):
    svm_dict, labels = setUpRSAMatrix(subj,ruledim)
    corr_rhos = {}
    err_rhos = {}
    infoEstimate = {}
    for roi in svm_dict:
        svm_mat = svm_dict[roi].copy()
        # Demean each sample
        svmmean = np.mean(svm_mat,axis=1)
        svmmean.shape = (len(svmmean),1)
        svm_mat = svm_mat - svmmean

#         svm_mat = preprocessing.scale(svm_mat,axis=0)

        corr_rhos[roi], err_rhos[roi], infoEstimate[roi] = rsaCV(svm_mat, labels, subj)
        
    return corr_rhos, err_rhos, infoEstimate

2.0 - Estimate information estimates for all regions for all 3 rule domains


In [34]:
ruledims = ['logic','sensory','motor']
behav='acc'
corr_rhos = {}
err_rhos = {}
diff_rhos = {}
for ruledim in ruledims:
    corr_rhos[ruledim] = {}
    err_rhos[ruledim] = {}
    diff_rhos[ruledim] = {}
    
    print 'Running', ruledim

    inputs = []
    for subj in subjNums: inputs.append((subj,ruledim,behav))

#     pool = mp.Pool(processes=8)
    pool = mp.Pool(processes=16)
    results = pool.map_async(subjRSACV,inputs).get()
    pool.close()
    pool.join()

    # Reorganize results
    corr_rhos[ruledim] = np.zeros((nParcels,len(subjNums)))
    err_rhos[ruledim] = np.zeros((nParcels,len(subjNums)))
    diff_rhos[ruledim] = np.zeros((nParcels,len(subjNums)))
    
    scount = 0
    for result in results:
        for roi in range(nParcels):
            corr_rhos[ruledim][roi,scount] = result[0][roi+1]
            err_rhos[ruledim][roi,scount] = result[1][roi+1]
            diff_rhos[ruledim][roi,scount] = result[2][roi+1]
        scount += 1


Running logic
Running sensory
Running motor

Save CSVs for baseline leave-4-out CV on information transfer estimate


In [35]:
outdir = '/projects2/ModalityControl2/data/resultsMaster/Manuscript5_BaselineRegionIE/'

for ruledim in ruledims:
    filename = 'Regionwise_InformationEstimate_LeaveOneOut_' + ruledim + '.csv'
    np.savetxt(outdir + filename, diff_rhos[ruledim], delimiter=',')

Run statistics (t-tests and multiple comparisons correction with FDR)


In [36]:
df_stats = {}
# Output to CSV matrix
sig_t = np.zeros((nParcels,len(ruledims)))
sig_effect = np.zeros((nParcels,len(ruledims)))
effectsize = {}
rulecount = 0
for ruledim in ruledims:
    df_stats[ruledim] = {}
    df_stats[ruledim]['t'] = np.zeros((nParcels,))
    df_stats[ruledim]['p'] = np.zeros((nParcels,))
    effectsize[ruledim] = np.zeros((nParcels,))
    for roi in range(nParcels):
        t, p = stats.ttest_1samp(diff_rhos[ruledim][roi,:], 0)
#         t, p = stats.ttest_rel(corr_rhos[ruledim][roi,:], err_rhos[ruledim][roi,:])
            
        effectsize[ruledim][roi] = np.mean(diff_rhos[ruledim][roi,:])

        ps = np.zeros(())
        if t > 0:
            p = p/2.0
        else:
            p = 1.0 - p/2.0
        df_stats[ruledim]['t'][roi] = t
        df_stats[ruledim]['p'][roi] = p
        
    arr = df_stats[ruledim]['p']
    df_stats[ruledim]['q'] = mc.fdrcorrection0(arr)[1]

        

    qbin = df_stats[ruledim]['q'] < 0.05
    sig_t[:,rulecount] = np.multiply(df_stats[ruledim]['t'],qbin)
    sig_effect[:,rulecount] = np.multiply(effectsize[ruledim],qbin)
    
    rulecount += 1

Map statistics and results to surface using workbench


In [38]:
sig_t_vertex = np.zeros((len(glasser2),len(ruledims)))
effects_vertex = np.zeros((len(glasser2),len(ruledims)))
effects_vertex_sig = np.zeros((len(glasser2),len(ruledims)))
col = 0
for cols in range(sig_t_vertex.shape[1]):
    for roi in range(nParcels):
        parcel_ind = np.where(glasser2==(roi+1))[0]
        sig_t_vertex[parcel_ind,col] = sig_t[roi,col]
        effects_vertex[parcel_ind,col] = effectsize[ruledims[col]][roi]
        effects_vertex_sig[parcel_ind,col] = sig_effect[roi,col]
    col += 1

# Write file to csv and run wb_command
outdir = '/projects2/ModalityControl2/data/resultsMaster/Manuscript5_BaselineRegionIE/'
filename = 'RegionwiseIE_FDRThresholded_Tstat.csv'
np.savetxt(outdir + filename, sig_t_vertex,fmt='%s')
wb_file = 'RegionwiseIE_FDRThresholded_Tstat.dscalar.nii'
glasserfilename = '/projects/AnalysisTools/ParcelsGlasser2016/archive/Q1-Q6_RelatedParcellation210.LR.CorticalAreas_dil_Colors.32k_fs_LR.dlabel.nii'
wb_command = 'wb_command -cifti-convert -from-text ' + outdir + filename + ' ' + glasserfilename + ' ' + outdir + wb_file + ' -reset-scalars'
os.system(wb_command)

# Compute effect size baseline (information content)
outdir = '/projects2/ModalityControl2/data/resultsMaster/Manuscript5_BaselineRegionIE/'
filename = 'RegionwiseIE_InformationEstimate.csv'
np.savetxt(outdir + filename, effects_vertex,fmt='%s')
wb_file = 'RegionwiseIE_InformationEstimate.dscalar.nii'
wb_command = 'wb_command -cifti-convert -from-text ' + outdir + filename + ' ' + glasserfilename + ' ' + outdir + wb_file + ' -reset-scalars'
os.system(wb_command)

# Compute Thresholded effect size baseline (information content)
outdir = '/projects2/ModalityControl2/data/resultsMaster/Manuscript5_BaselineRegionIE/'
filename = 'RegionwiseIE_FDRThresholded_InformationEstimate.csv'
np.savetxt(outdir + filename, effects_vertex_sig,fmt='%s')
wb_file = 'RegionwiseIE_FDRThresholded_InformationEstimate.dscalar.nii'
wb_command = 'wb_command -cifti-convert -from-text ' + outdir + filename + ' ' + glasserfilename + ' ' + outdir + wb_file + ' -reset-scalars'
os.system(wb_command)


Out[38]:
0

Run FWE Correction using permutation testing


In [51]:
pt = reload(pt)

fwe_Ts = np.zeros((nParcels,len(ruledims)))
fwe_Ps = np.zeros((nParcels,len(ruledims)))
rulecount = 0
for ruledim in ruledims:
    t, p = pt.permutationFWE(diff_rhos[ruledim], nullmean=0, permutations=10000, nproc=15)
#     t, p = pt.permutationFWE(corr_rhos[ruledim] - err_rhos[ruledim],
#                              nullmean=0, permutations=1000, nproc=15)
    fwe_Ts[:,rulecount] = t
    fwe_Ps[:,rulecount] = p
    
    rulecount += 1

In [52]:
# Compare t-values from permutation function and above

rulecount = 0
for ruledim in ruledims:
    if np.sum(df_stats[ruledim]['t']==fwe_Ts[:,rulecount])==360:
        print 'Correct t-values match up'
    else:
        print 'Error! Likely a bug in the code'
    rulecount += 1


Correct t-values match up
Correct t-values match up
Correct t-values match up

Write out significant ROIs


In [53]:
fwe_Ps2 = (1.0000 - fwe_Ps) # One-tailed test on upper tail
sig_mat = fwe_Ps2 < 0.0500 # One-sided t-test (Only interested in values greater than 95% interval)
outdir = '/projects2/ModalityControl2/data/resultsMaster/Manuscript5_BaselineRegionIE/'
filename = 'FWE_corrected_pvals_allruledims.csv'
np.savetxt(outdir + filename, fwe_Ps, delimiter=',')

sig_t = np.zeros((nParcels,len(ruledims)))
sig_effect = np.zeros((nParcels,len(ruledims)))
rulecount = 0
for ruledim in ruledims:
    sig_t[:,rulecount] = np.multiply(fwe_Ts[:,rulecount],sig_mat[:,rulecount])
    sig_effect[:,rulecount] = np.multiply(effectsize[ruledim],sig_mat[:,rulecount])
    
    # Read out statistics for manuscript
    # Identify significant regions
    sig_ind = sig_mat[:,rulecount] == True
    nonsig_ind = sig_mat[:,rulecount] == False
    
    print 'Average significant IE for', ruledim, ':', np.mean(effectsize[ruledim][sig_ind])
    print 'Average significant T-stats for', ruledim, ':', np.mean(fwe_Ts[:,rulecount][sig_ind])
    print 'Maximum significant p-value for', ruledim, ':', np.max(fwe_Ps2[:,rulecount][sig_ind])
    print '----'
    print 'Average nonsignificant IE for', ruledim, ':', np.mean(effectsize[ruledim][nonsig_ind])
    print 'Average nonsignificant T-stats for', ruledim, ':', np.mean(fwe_Ts[:,rulecount][nonsig_ind])
    print 'Minimum nonsignificant p-value for', ruledim, ':', np.min(fwe_Ps2[:,rulecount][nonsig_ind])
    print '\n'
    print '*****************'
    rulecount += 1


Average significant IE for logic : 0.0228556752082
Average significant T-stats for logic : 5.24410223409
Maximum significant p-value for logic : 0.05
----
Average nonsignificant IE for logic : 0.0108887207719
Average nonsignificant T-stats for logic : 2.14023612418
Minimum nonsignificant p-value for logic : 0.0507


*****************
Average significant IE for sensory : 0.0213431959576
Average significant T-stats for sensory : 4.97298583963
Maximum significant p-value for sensory : 0.0458
----
Average nonsignificant IE for sensory : 0.0123727448245
Average nonsignificant T-stats for sensory : 2.28815520011
Minimum nonsignificant p-value for sensory : 0.0506


*****************
Average significant IE for motor : 0.0637508552622
Average significant T-stats for motor : 6.80359722659
Maximum significant p-value for motor : 0.0483
----
Average nonsignificant IE for motor : 0.00742946704093
Average nonsignificant T-stats for motor : 1.55104069333
Minimum nonsignificant p-value for motor : 0.0535


*****************

Map out FWE-corrected statistics/results to surface using workbench


In [54]:
sig_t_vertex = np.zeros((len(glasser2),len(ruledims)))
effects_vertex = np.zeros((len(glasser2),len(ruledims)))
effects_vertex_sig = np.zeros((len(glasser2),len(ruledims)))
col = 0
for cols in range(sig_t_vertex.shape[1]):
    for roi in range(nParcels):
        parcel_ind = np.where(glasser2==(roi+1))[0]
        sig_t_vertex[parcel_ind,col] = sig_t[roi,col]
        effects_vertex_sig[parcel_ind,col] = sig_effect[roi,col]
    col += 1

# Write file to csv and run wb_command
outdir = '/projects2/ModalityControl2/data/resultsMaster/Manuscript5_BaselineRegionIE/'
filename = 'RegionwiseIE_FWERThresholded_Tstat.csv'
np.savetxt(outdir + filename, sig_t_vertex,fmt='%s')
wb_file = 'RegionwiseIE_FWERThresholded_Tstat.dscalar.nii'
glasserfilename = '/projects/AnalysisTools/ParcelsGlasser2016/archive/Q1-Q6_RelatedParcellation210.LR.CorticalAreas_dil_Colors.32k_fs_LR.dlabel.nii'
wb_command = 'wb_command -cifti-convert -from-text ' + outdir + filename + ' ' + glasserfilename + ' ' + outdir + wb_file + ' -reset-scalars'
os.system(wb_command)

# # Compute effect size baseline (information content)
# outdir = '/projects2/ModalityControl2/data/resultsMaster/Manuscript5_BaselineRegionIE/'
# filename = 'RegionwiseIE_InformationEstimate.csv'
# np.savetxt(outdir + filename, effects_vertex,fmt='%s')
# wb_file = 'RegionwiseIE_InformationEstimate.dscalar.nii'
# wb_command = 'wb_command -cifti-convert -from-text ' + outdir + filename + ' ' + glasserfilename + ' ' + outdir + wb_file + ' -reset-scalars'
# os.system(wb_command)

# Compute Thresholded effect size baseline (information content)
outdir = '/projects2/ModalityControl2/data/resultsMaster/Manuscript5_BaselineRegionIE/'
filename = 'RegionwiseIE_FWERThresholded_InformationEstimate.csv'
np.savetxt(outdir + filename, effects_vertex_sig,fmt='%s')
wb_file = 'RegionwiseIE_FWERThresholded_InformationEstimate.dscalar.nii'
wb_command = 'wb_command -cifti-convert -from-text ' + outdir + filename + ' ' + glasserfilename + ' ' + outdir + wb_file + ' -reset-scalars'
os.system(wb_command)


Out[54]:
0