ManuscriptS1a - Network-level information estimates (using RSA)

Analysis for Supplementary Figure 1B.

Master code for Ito et al., 2017

Takuya Ito (takuya.ito@rutgers.edu)


In [119]:
import sys
sys.path.append('utils/')
import numpy as np
import loadGlasser as lg
import scripts3_functions as func
import scipy.stats as stats
from IPython.display import display, HTML
import matplotlib.pyplot as plt
import statsmodels.sandbox.stats.multicomp as mc
import sys
import multiprocessing as mp
import pandas
%matplotlib inline
import permutationTesting as pt
import os
os.environ['OMP_NUM_THREADS'] = str(1)



Basic parameters


In [120]:
# Set basic parameters
basedir = '/projects2/ModalityControl2/'
datadir = basedir + 'data/'
resultsdir = datadir + 'resultsMaster/'
runLength = 4648

subjNums = ['032', '033', '037', '038', '039', '045', 
            '013', '014', '016', '017', '018', '021', 
            '023', '024', '025', '026', '027', '031', 
            '035', '046', '042', '028', '048', '053', 
            '040', '049', '057', '062', '050', '030', '047', '034']

glasserparcels = lg.loadGlasserParcels()
networkdef = lg.loadGlasserNetworks()
networkmappings = {'fpn':7, 'vis':1, 'smn':2, 'con':3, 'dmn':6, 'aud1':8, 'aud2':9, 'dan':11}
# Force aud2 key to be the same as aud1
aud2_ind = np.where(networkdef==networkmappings['aud2'])[0]
networkdef[aud2_ind] = networkmappings['aud1']
# Define new network mappings with no aud1/aud2 distinction
networkmappings = {'fpn':7, 'vis':1, 'smn':2, 'con':3, 'dmn':6, 'aud':8, 'dan':11}

Set up basic functions


In [121]:
def loadBetas(subj, net='all'):
    """Loads in task betas"""
    datafile = resultsdir + 'glmMiniblockBetaSeries/' + subj + '_miniblock_taskBetas_Glasser.csv'
    betas = np.loadtxt(datafile, delimiter=',')
    betas = betas[:,17:]
    if net == 'all':
        return betas
    else:
        net_ind = np.where(networkdef==net)[0]
        return betas[net_ind,:].T
    
def setupMatrix(subj,ruledim,net):
    """
    Sets up basic SVM Matrix for a classification of a particular rule dimension and network
    """
    
    betas = loadBetas(subj,net=net)
    rules, rulesmb = func.importRuleTimingsV3(subj,ruledim)
    
    svm_mat = np.zeros((betas.shape))
    samplecount = 0
    labels = []
    for rule in rulesmb:
        rule_ind = rulesmb[rule].keys()
        sampleend = samplecount + len(rule_ind)
        svm_mat[samplecount:sampleend,:] = betas[rule_ind,:]
        labels.extend(np.ones(len(rule_ind),)*rule)
        samplecount += len(rule_ind)
        
    labels = np.asarray(labels)
    
    return svm_mat, labels

Set up functions for information estimation (RSA instead of SVM decoding)


In [122]:
def rsaCV(svm_mat,labels,ruledim):
    """Runs a leave-4-out CV for a 4 way  classification"""
        
    cvfolds = []
    # 32 folds, if we do a leave 4 out for 128 total miniblocks
    # Want to leave a single block from each rule from each CV
    for rule in np.unique(labels):
        cvfolds.append(np.where(labels==rule)[0])
    cvfolds = np.asarray(cvfolds)
    
    # Number of CVs is columns
    ncvs = cvfolds.shape[1]
    nrules = cvfolds.shape[0]
    # Randomly sample cross-validation folds
    for i in range(nrules): np.random.shuffle(cvfolds[i,:])
    
    corr_rho_cvs = []
    err_rho_cvs = []
    for cv in range(ncvs):
        # Select a test set from the CV Fold matrix
        test_ind = cvfolds[:,cv].copy()
        # Delete the CV included from the train set
        train_ind = np.delete(cvfolds,cv,axis=1)
        
        # Identify the train and test sets
        svm_train = svm_mat[np.reshape(train_ind,-1),:]
        svm_test = svm_mat[test_ind,:]
        
#         ## Feature-wise normalization (computed by hand)
#         # Compute mean of train set
#         train_mean = np.mean(svm_train,axis=0)
#         train_mean.shape = (1,len(train_mean))
#         # Compute std of train set
#         train_std = np.std(svm_train,axis=0)
#         train_std.shape = (1,len(train_std))
#         # Normalize train set
#         svm_train = np.divide((svm_train - train_mean),train_std)
#         # Normalize test set with trainset mean and std to avoid circularity
#         svm_test = (svm_test - train_mean)/train_std
        
        prototype = {}
        # Construct RSA prototypes
        for rule in range(nrules):
            prototype_ind = np.reshape(train_ind[rule,:],-1)
            prototype[rule] = np.mean(svm_mat[prototype_ind],axis=0)
            
        corr_rho = []
        err_rho = []
        for rule1 in range(nrules):
            for rule2 in range(nrules):
                r = stats.spearmanr(prototype[rule1],svm_test[rule2,:])[0]
                r = np.arctanh(r)
                if rule1==rule2: 
                    corr_rho.append(r)
                else:
                    err_rho.append(r)
        
        corr_rho_cvs.append(np.mean(corr_rho))
        err_rho_cvs.append(np.mean(err_rho))

        

    return np.mean(corr_rho_cvs), np.mean(err_rho_cvs)
        
    
def subjRSACV((subj,ruledim,net)):
    svm_mat, labels = setupMatrix(subj,ruledim,net)
    # Demean each sample
    svmmean = np.mean(svm_mat,axis=1)
    svmmean.shape = (len(svmmean),1)
    svm_mat = svm_mat - svmmean
    
#     svm_mat = preprocessing.scale(svm_mat,axis=0)
    
    corr_rho, err_rho = rsaCV(svm_mat, labels, ruledim)
    diff_rho = corr_rho - err_rho

#     diff_rho = np.arctanh(corr_rho) - np.arctanh(err_rho)
#     diff_rho = np.arctanh(corr_rho) - np.arctanh(err_rho)
    return corr_rho, err_rho, diff_rho

In [123]:
netkeys = {0:'fpn', 1:'dan', 2:'con', 3:'dmn', 4:'vis', 5:'aud', 6:'smn'}

In [124]:
ruledims = ['logic','sensory','motor']
corr_rho = {}
err_rho = {}
diff_rho = {}
avg_acc = {}
for ruledim in ruledims:
    avg_acc[ruledim] = {}
    corr_rho[ruledim] = np.zeros((len(netkeys),len(subjNums)))
    err_rho[ruledim] = np.zeros((len(netkeys),len(subjNums)))
    diff_rho[ruledim] = np.zeros((len(netkeys),len(subjNums)))
    print 'Running', ruledim
    for net in netkeys.keys():
#         print 'Running network', net
        inputs = []
        for subj in subjNums: inputs.append((subj,ruledim,networkmappings[netkeys[net]]))
            
        pool = mp.Pool(processes=11)
        results = pool.map_async(subjRSACV,inputs).get()
        pool.close()
        pool.join()
        
        scount = 0
        for result in results:
            tmp_corr, tmp_err, tmp_diff = result
            corr_rho[ruledim][net,scount] = tmp_corr
            err_rho[ruledim][net,scount] = tmp_err
            diff_rho[ruledim][net,scount] = tmp_diff
            scount += 1
        avg_acc[ruledim][net] = np.mean(diff_rho[ruledim][net])


Running logic
Running sensory
Running motor

Run statistics with FDR-correction


In [125]:
# Compute group stats
chance = 0.0
results_dict_fdr = {}
for ruledim in ruledims:
    results_dict_fdr[ruledim] = {}
    for net in netkeys.keys(): results_dict_fdr[ruledim][netkeys[net]] = {}
    pvals = []
    for net in netkeys.keys():
        results_dict_fdr[ruledim][netkeys[net]]['Accuracy'] = str(round(np.mean(avg_acc[ruledim][net]),3))
        t, p = stats.ttest_1samp(diff_rho[ruledim][net],chance)
        results_dict_fdr[ruledim][netkeys[net]]['T-stats'] = t
        results_dict_fdr[ruledim][netkeys[net]]['P-values'] = p
        pvals.append(p)
        
    qvals = mc.fdrcorrection0(pvals)[1]
    qcount = 0
    for net in netkeys.keys():
        results_dict_fdr[ruledim][netkeys[net]]['Q-values'] = qvals[qcount]
        qcount += 1

Show results as dataframe


In [126]:
results_dframe_fdr = {}
for ruledim in ruledims:
    print 'Dataframe for', ruledim, 'classification'
    results_dframe_fdr[ruledim] = pandas.DataFrame(data=results_dict_fdr[ruledim])
    display(results_dframe_fdr[ruledim])


Dataframe for logic classification
aud con dan dmn fpn smn vis
Accuracy 0.019 0.029 0.024 0.029 0.034 0.007 0.015
P-values 0.000720487 1.95845e-06 0.000327489 2.17409e-06 1.53998e-07 0.23322 0.00317303
Q-values 0.00100868 5.07287e-06 0.000573107 5.07287e-06 1.07798e-06 0.23322 0.00370187
T-stats 3.75381 5.83596 4.03927 5.79937 6.73616 1.21585 3.19901
Dataframe for sensory classification
aud con dan dmn fpn smn vis
Accuracy 0.004 0.012 0.024 0.01 0.025 -0.004 0.054
P-values 0.155311 0.000226706 4.1589e-07 0.0374678 0.000257061 0.25929 1.78973e-06
Q-values 0.181196 0.000449857 2.91123e-06 0.052455 0.000449857 0.25929 6.26405e-06
T-stats 1.45649 4.17085 6.38192 2.17391 4.12599 -1.14913 5.86753
Dataframe for motor classification
aud con dan dmn fpn smn vis
Accuracy 0.007 0.017 0.017 0.009 0.004 0.203 0.005
P-values 0.020813 2.19242e-05 9.09449e-06 0.0352807 0.225402 1.04e-12 0.0771277
Q-values 0.0364228 5.11564e-05 3.18307e-05 0.049393 0.225402 7.28002e-12 0.0899823
T-stats 2.4356 4.99282 5.29965 2.20139 1.23693 11.4951 1.82839

In [127]:
#### Compute statistics for bar plot
bar_avgs = {}
bar_sems = {}
bar_avg_all = {}
for net in netkeys.keys(): bar_avg_all[net] = np.zeros((len(ruledims),len(subjNums)))

rulecount = 0
for ruledim in ruledims:
    bar_avgs[ruledim] = {}
    bar_sems[ruledim] = {}
    for net in netkeys.keys():
        bar_avgs[ruledim][net] = np.mean(diff_rho[ruledim][net])
        bar_sems[ruledim][net] = np.std(diff_rho[ruledim][net])/np.sqrt(len(subjNums))
        bar_avg_all[net][rulecount,:] = diff_rho[ruledim][net]
    rulecount += 1
        
bar_sem_all = {}
for net in netkeys.keys():
    meanacc = np.mean(bar_avg_all[net],axis=0)
    bar_avg_all[net] = np.mean(meanacc)
    bar_sem_all[net] = np.std(meanacc)/np.sqrt(len(subjNums))

##### Generate figures
width=0.25
width=.265
networks = netkeys.keys()
nbars = len(networks)
fig = plt.figure()
ax = fig.add_subplot(111)
rects = {}
widthcount = 0
colors = ['b','g','r']
colorcount = 0
for ruledim in ruledims:
    rects[ruledim] = ax.bar(np.arange(nbars)+widthcount, bar_avgs[ruledim].values(), width,align='center',
                     yerr=bar_sems[ruledim].values(), color=colors[colorcount], error_kw=dict(ecolor='black'))
    widthcount += width
    colorcount += 1
    
ax.set_title('Network Information Estimation of CPRO rules (FDR)',
             y=1.04, fontsize=16)
ax.set_ylabel('Match V. Mismatch Difference (Rho)',fontsize=12)
ax.set_xlabel('Rule types by Networks', fontsize=12)
ax.set_xticks(np.arange(nbars)+width)
ax.set_xticklabels(netkeys.values(),rotation=-45)
vmax=0.07
ax.set_ylim([0,vmax])

plt.legend((rects['logic'], rects['sensory'], rects['motor']),
           ('Logic', 'Sensory', 'Motor'), loc=((1.08,.65)))

## Add asterisks
def autolabel(rects,df,ruledim):
    # attach some text labels
    netcount = 0
    for rect in rects:
        height = rect.get_height()
        # Decide where to put the asterisk
        if height > vmax: 
            yax = vmax - .005
        else:
            yax = height + .01
            
        # Slightly move this asterisk since it's in the way
        if ruledim=='sensory' and netkeys[netcount]=='dan': yax -= .0025
            
        # Retrive q-value and assign asterisk accordingly
        q = df[netkeys[netcount]]['Q-values']
        if q > .05: asterisk=''
        if q < .05: asterisk='*'
        if q < .01: asterisk='**'
        if q < .001: asterisk='***'
            
        # Label bar
        ax.text(rect.get_x() + rect.get_width()/2., yax,
                asterisk, ha='center', va='bottom', fontsize=8)
        # Go to next network
        netcount += 1
        
for ruledim in ruledims:
    autolabel(rects[ruledim],results_dframe_fdr[ruledim],ruledim)
# autolabel(rects2)

plt.tight_layout()
# plt.savefig('FigS1a_NetworkRSA_InformationEstimate.pdf')


Run statistics with FWER-correction (Permutation testing)


In [128]:
# Compute group stats
chance = 0.0
results_dict_fwe = {}
for ruledim in ruledims:
    results_dict_fwe[ruledim] = {}
    for net in netkeys.keys(): results_dict_fwe[ruledim][netkeys[net]] = {}
    pvals = []
    for net in netkeys.keys():
        results_dict_fwe[ruledim][netkeys[net]]['Accuracy'] = str(round(np.mean(avg_acc[ruledim][net]),3))
        t, p = pt.permutationFWE(diff_rho[ruledim],nullmean=0,permutations=10000,nproc=15)
#         t, p = stats.ttest_1samp(diff_rho[ruledim][net],chance)
        results_dict_fwe[ruledim][netkeys[net]]['T-stats'] = t[net]
        results_dict_fwe[ruledim][netkeys[net]]['P-FWE'] = 1.0 - p[net]

Show results as dataframe


In [129]:
results_dframe_fwe = {}
for ruledim in ruledims:
    print 'Dataframe for', ruledim, 'classification'
    results_dframe_fwe[ruledim] = pandas.DataFrame(data=results_dict_fwe[ruledim])
    display(results_dframe_fwe[ruledim])


Dataframe for logic classification
aud con dan dmn fpn smn vis
Accuracy 0.019 0.029 0.024 0.029 0.034 0.007 0.015
P-FWE 0.0017 0 0.0008 0 0 0.5858 0.0068
T-stats 3.75381 5.83596 4.03927 5.79937 6.73616 1.21585 3.19901
Dataframe for sensory classification
aud con dan dmn fpn smn vis
Accuracy 0.004 0.012 0.024 0.01 0.025 -0.004 0.054
P-FWE 0.4293 0.0006 0 0.1195 0.0003 1 0
T-stats 1.45649 4.17085 6.38192 2.17391 4.12599 -1.14913 5.86753
Dataframe for motor classification
aud con dan dmn fpn smn vis
Accuracy 0.007 0.017 0.017 0.009 0.004 0.203 0.005
P-FWE 0.0728 0.0002 0 0.1203 0.5758 0 0.243
T-stats 2.4356 4.99282 5.29965 2.20139 1.23693 11.4951 1.82839

Manually compute average significant and non-significant t-stats for manuscript


In [130]:
results_dict_fwe[ruledim]['fpn'].keys()


Out[130]:
['P-FWE', 'T-stats', 'Accuracy']

In [131]:
ie = {}
t_avg = {}
p_avg = {}
for ruledim in ruledims:
    ie[ruledim] = {'sig':[], 'nonsig':[]}
    t_avg[ruledim] = {'sig':[], 'nonsig':[]}
    p_avg[ruledim] = {'sig':[], 'nonsig':[]}
    for net in results_dict_fwe[ruledim].keys():
        if results_dict_fwe[ruledim][net]['P-FWE']<0.05:
            ie[ruledim]['sig'].append(float(results_dict_fwe[ruledim][net]['Accuracy']))
            t_avg[ruledim]['sig'].append(float(results_dict_fwe[ruledim][net]['T-stats']))
            p_avg[ruledim]['sig'].append(float(results_dict_fwe[ruledim][net]['P-FWE']))
        else:
            ie[ruledim]['nonsig'].append(float(results_dict_fwe[ruledim][net]['Accuracy']))
            t_avg[ruledim]['nonsig'].append(float(results_dict_fwe[ruledim][net]['T-stats']))
            p_avg[ruledim]['nonsig'].append(float(results_dict_fwe[ruledim][net]['P-FWE']))
    
    # Read out statistics for manuscript
    print 'Average significant IE for', ruledim, ':', np.mean(ie[ruledim]['sig'])
    print 'Average significant T-stats for', ruledim, ':', np.mean(t_avg[ruledim]['sig'])
    print 'Max significant p-value for', ruledim, ':', np.max(p_avg[ruledim]['sig'])
    print '#################'
    print 'Average nonsignificant IE for', ruledim, ':', np.mean(ie[ruledim]['nonsig'])
    print 'Average nonsignificant T-stats for', ruledim, ':', np.mean(t_avg[ruledim]['nonsig'])
    print 'Min nonsignificant p-value for', ruledim, ':', np.min(p_avg[ruledim]['nonsig'])
    print '\n'


Average significant IE for logic : 0.025
Average significant T-stats for logic : 4.89392889673
Max significant p-value for logic : 0.0068
#################
Average nonsignificant IE for logic : 0.007
Average nonsignificant T-stats for logic : 1.21585023666
Min nonsignificant p-value for logic : 0.5858


Average significant IE for sensory : 0.02875
Average significant T-stats for sensory : 5.13657314649
Max significant p-value for sensory : 0.0006
#################
Average nonsignificant IE for sensory : 0.00333333333333
Average nonsignificant T-stats for sensory : 0.827091269676
Min nonsignificant p-value for sensory : 0.1195


Average significant IE for motor : 0.079
Average significant T-stats for motor : 7.26251804999
Max significant p-value for motor : 0.0002
#################
Average nonsignificant IE for motor : 0.00625
Average nonsignificant T-stats for motor : 1.92557722121
Min nonsignificant p-value for motor : 0.0728



In [132]:
#### Compute statistics for bar plot
bar_avgs = {}
bar_sems = {}
bar_avg_all = {}
t_avgs = {}
p_avgs = {}
for net in netkeys.keys(): bar_avg_all[net] = np.zeros((len(ruledims),len(subjNums)))

rulecount = 0
for ruledim in ruledims:
    bar_avgs[ruledim] = {}
    bar_sems[ruledim] = {}
    
    for net in netkeys.keys():
        bar_avgs[ruledim][net] = np.mean(diff_rho[ruledim][net])
        bar_sems[ruledim][net] = np.std(diff_rho[ruledim][net])/np.sqrt(len(subjNums))
        bar_avg_all[net][rulecount,:] = diff_rho[ruledim][net]
    rulecount += 1
        
bar_sem_all = {}
for net in netkeys.keys():
    meanacc = np.mean(bar_avg_all[net],axis=0)
    bar_avg_all[net] = np.mean(meanacc)
    bar_sem_all[net] = np.std(meanacc)/np.sqrt(len(subjNums))

##### Generate figures
width=0.25
width=.265
networks = netkeys.keys()
nbars = len(networks)
fig = plt.figure()
ax = fig.add_subplot(111)
rects = {}
widthcount = 0
colors = ['b','g','r']
colorcount = 0
for ruledim in ruledims:
    rects[ruledim] = ax.bar(np.arange(nbars)+widthcount, bar_avgs[ruledim].values(), width,align='center',
                     yerr=bar_sems[ruledim].values(), color=colors[colorcount], error_kw=dict(ecolor='black'))
    widthcount += width
    colorcount += 1
    
ax.set_title('Network Information Estimation of CPRO rules (FWE)',
             y=1.04, fontsize=16)
ax.set_ylabel('Match V. Mismatch Difference (Rho)',fontsize=12)
ax.set_xlabel('Rule types by Networks', fontsize=12)
ax.set_xticks(np.arange(nbars)+width)
ax.set_xticklabels(netkeys.values(),rotation=-45)
vmax=0.07
ax.set_ylim([0,vmax])

plt.legend((rects['logic'], rects['sensory'], rects['motor']),
           ('Logic', 'Sensory', 'Motor'), loc=((1.08,.65)))

## Add asterisks
def autolabel(rects,df,ruledim):
    # attach some text labels
    netcount = 0
    for rect in rects:
        height = rect.get_height()
        # Decide where to put the asterisk
        if height > vmax: 
            yax = vmax - .005
        else:
            yax = height + .01
            
        # Slightly move this asterisk since it's in the way
        if ruledim=='sensory' and netkeys[netcount]=='dan': yax -= .0025
            
        # Retrive q-value and assign asterisk accordingly
        q = results_dict_fwe[ruledim][netkeys[netcount]]['P-FWE']
        if q > .05: asterisk=''
        if q < .05: asterisk='*'
        if q < .01: asterisk='**'
        if q < .001: asterisk='***'
            
        # Label bar
        ax.text(rect.get_x() + rect.get_width()/2., yax,
                asterisk, ha='center', va='bottom', fontsize=8)
        # Go to next network
        netcount += 1
        
for ruledim in ruledims:
    autolabel(rects[ruledim],results_dframe_fwe[ruledim],ruledim)
# autolabel(rects2)

plt.tight_layout()
# plt.savefig('FigS1a_NetworkRSA_InformationEstimate.pdf')