In [119]:
import sys
sys.path.append('utils/')
import numpy as np
import loadGlasser as lg
import scripts3_functions as func
import scipy.stats as stats
from IPython.display import display, HTML
import matplotlib.pyplot as plt
import statsmodels.sandbox.stats.multicomp as mc
import sys
import multiprocessing as mp
import pandas
%matplotlib inline
import permutationTesting as pt
import os
os.environ['OMP_NUM_THREADS'] = str(1)
In [120]:
# Set basic parameters
basedir = '/projects2/ModalityControl2/'
datadir = basedir + 'data/'
resultsdir = datadir + 'resultsMaster/'
runLength = 4648
subjNums = ['032', '033', '037', '038', '039', '045',
'013', '014', '016', '017', '018', '021',
'023', '024', '025', '026', '027', '031',
'035', '046', '042', '028', '048', '053',
'040', '049', '057', '062', '050', '030', '047', '034']
glasserparcels = lg.loadGlasserParcels()
networkdef = lg.loadGlasserNetworks()
networkmappings = {'fpn':7, 'vis':1, 'smn':2, 'con':3, 'dmn':6, 'aud1':8, 'aud2':9, 'dan':11}
# Force aud2 key to be the same as aud1
aud2_ind = np.where(networkdef==networkmappings['aud2'])[0]
networkdef[aud2_ind] = networkmappings['aud1']
# Define new network mappings with no aud1/aud2 distinction
networkmappings = {'fpn':7, 'vis':1, 'smn':2, 'con':3, 'dmn':6, 'aud':8, 'dan':11}
In [121]:
def loadBetas(subj, net='all'):
"""Loads in task betas"""
datafile = resultsdir + 'glmMiniblockBetaSeries/' + subj + '_miniblock_taskBetas_Glasser.csv'
betas = np.loadtxt(datafile, delimiter=',')
betas = betas[:,17:]
if net == 'all':
return betas
else:
net_ind = np.where(networkdef==net)[0]
return betas[net_ind,:].T
def setupMatrix(subj,ruledim,net):
"""
Sets up basic SVM Matrix for a classification of a particular rule dimension and network
"""
betas = loadBetas(subj,net=net)
rules, rulesmb = func.importRuleTimingsV3(subj,ruledim)
svm_mat = np.zeros((betas.shape))
samplecount = 0
labels = []
for rule in rulesmb:
rule_ind = rulesmb[rule].keys()
sampleend = samplecount + len(rule_ind)
svm_mat[samplecount:sampleend,:] = betas[rule_ind,:]
labels.extend(np.ones(len(rule_ind),)*rule)
samplecount += len(rule_ind)
labels = np.asarray(labels)
return svm_mat, labels
In [122]:
def rsaCV(svm_mat,labels,ruledim):
"""Runs a leave-4-out CV for a 4 way classification"""
cvfolds = []
# 32 folds, if we do a leave 4 out for 128 total miniblocks
# Want to leave a single block from each rule from each CV
for rule in np.unique(labels):
cvfolds.append(np.where(labels==rule)[0])
cvfolds = np.asarray(cvfolds)
# Number of CVs is columns
ncvs = cvfolds.shape[1]
nrules = cvfolds.shape[0]
# Randomly sample cross-validation folds
for i in range(nrules): np.random.shuffle(cvfolds[i,:])
corr_rho_cvs = []
err_rho_cvs = []
for cv in range(ncvs):
# Select a test set from the CV Fold matrix
test_ind = cvfolds[:,cv].copy()
# Delete the CV included from the train set
train_ind = np.delete(cvfolds,cv,axis=1)
# Identify the train and test sets
svm_train = svm_mat[np.reshape(train_ind,-1),:]
svm_test = svm_mat[test_ind,:]
# ## Feature-wise normalization (computed by hand)
# # Compute mean of train set
# train_mean = np.mean(svm_train,axis=0)
# train_mean.shape = (1,len(train_mean))
# # Compute std of train set
# train_std = np.std(svm_train,axis=0)
# train_std.shape = (1,len(train_std))
# # Normalize train set
# svm_train = np.divide((svm_train - train_mean),train_std)
# # Normalize test set with trainset mean and std to avoid circularity
# svm_test = (svm_test - train_mean)/train_std
prototype = {}
# Construct RSA prototypes
for rule in range(nrules):
prototype_ind = np.reshape(train_ind[rule,:],-1)
prototype[rule] = np.mean(svm_mat[prototype_ind],axis=0)
corr_rho = []
err_rho = []
for rule1 in range(nrules):
for rule2 in range(nrules):
r = stats.spearmanr(prototype[rule1],svm_test[rule2,:])[0]
r = np.arctanh(r)
if rule1==rule2:
corr_rho.append(r)
else:
err_rho.append(r)
corr_rho_cvs.append(np.mean(corr_rho))
err_rho_cvs.append(np.mean(err_rho))
return np.mean(corr_rho_cvs), np.mean(err_rho_cvs)
def subjRSACV((subj,ruledim,net)):
svm_mat, labels = setupMatrix(subj,ruledim,net)
# Demean each sample
svmmean = np.mean(svm_mat,axis=1)
svmmean.shape = (len(svmmean),1)
svm_mat = svm_mat - svmmean
# svm_mat = preprocessing.scale(svm_mat,axis=0)
corr_rho, err_rho = rsaCV(svm_mat, labels, ruledim)
diff_rho = corr_rho - err_rho
# diff_rho = np.arctanh(corr_rho) - np.arctanh(err_rho)
# diff_rho = np.arctanh(corr_rho) - np.arctanh(err_rho)
return corr_rho, err_rho, diff_rho
In [123]:
netkeys = {0:'fpn', 1:'dan', 2:'con', 3:'dmn', 4:'vis', 5:'aud', 6:'smn'}
In [124]:
ruledims = ['logic','sensory','motor']
corr_rho = {}
err_rho = {}
diff_rho = {}
avg_acc = {}
for ruledim in ruledims:
avg_acc[ruledim] = {}
corr_rho[ruledim] = np.zeros((len(netkeys),len(subjNums)))
err_rho[ruledim] = np.zeros((len(netkeys),len(subjNums)))
diff_rho[ruledim] = np.zeros((len(netkeys),len(subjNums)))
print 'Running', ruledim
for net in netkeys.keys():
# print 'Running network', net
inputs = []
for subj in subjNums: inputs.append((subj,ruledim,networkmappings[netkeys[net]]))
pool = mp.Pool(processes=11)
results = pool.map_async(subjRSACV,inputs).get()
pool.close()
pool.join()
scount = 0
for result in results:
tmp_corr, tmp_err, tmp_diff = result
corr_rho[ruledim][net,scount] = tmp_corr
err_rho[ruledim][net,scount] = tmp_err
diff_rho[ruledim][net,scount] = tmp_diff
scount += 1
avg_acc[ruledim][net] = np.mean(diff_rho[ruledim][net])
In [125]:
# Compute group stats
chance = 0.0
results_dict_fdr = {}
for ruledim in ruledims:
results_dict_fdr[ruledim] = {}
for net in netkeys.keys(): results_dict_fdr[ruledim][netkeys[net]] = {}
pvals = []
for net in netkeys.keys():
results_dict_fdr[ruledim][netkeys[net]]['Accuracy'] = str(round(np.mean(avg_acc[ruledim][net]),3))
t, p = stats.ttest_1samp(diff_rho[ruledim][net],chance)
results_dict_fdr[ruledim][netkeys[net]]['T-stats'] = t
results_dict_fdr[ruledim][netkeys[net]]['P-values'] = p
pvals.append(p)
qvals = mc.fdrcorrection0(pvals)[1]
qcount = 0
for net in netkeys.keys():
results_dict_fdr[ruledim][netkeys[net]]['Q-values'] = qvals[qcount]
qcount += 1
In [126]:
results_dframe_fdr = {}
for ruledim in ruledims:
print 'Dataframe for', ruledim, 'classification'
results_dframe_fdr[ruledim] = pandas.DataFrame(data=results_dict_fdr[ruledim])
display(results_dframe_fdr[ruledim])
In [127]:
#### Compute statistics for bar plot
bar_avgs = {}
bar_sems = {}
bar_avg_all = {}
for net in netkeys.keys(): bar_avg_all[net] = np.zeros((len(ruledims),len(subjNums)))
rulecount = 0
for ruledim in ruledims:
bar_avgs[ruledim] = {}
bar_sems[ruledim] = {}
for net in netkeys.keys():
bar_avgs[ruledim][net] = np.mean(diff_rho[ruledim][net])
bar_sems[ruledim][net] = np.std(diff_rho[ruledim][net])/np.sqrt(len(subjNums))
bar_avg_all[net][rulecount,:] = diff_rho[ruledim][net]
rulecount += 1
bar_sem_all = {}
for net in netkeys.keys():
meanacc = np.mean(bar_avg_all[net],axis=0)
bar_avg_all[net] = np.mean(meanacc)
bar_sem_all[net] = np.std(meanacc)/np.sqrt(len(subjNums))
##### Generate figures
width=0.25
width=.265
networks = netkeys.keys()
nbars = len(networks)
fig = plt.figure()
ax = fig.add_subplot(111)
rects = {}
widthcount = 0
colors = ['b','g','r']
colorcount = 0
for ruledim in ruledims:
rects[ruledim] = ax.bar(np.arange(nbars)+widthcount, bar_avgs[ruledim].values(), width,align='center',
yerr=bar_sems[ruledim].values(), color=colors[colorcount], error_kw=dict(ecolor='black'))
widthcount += width
colorcount += 1
ax.set_title('Network Information Estimation of CPRO rules (FDR)',
y=1.04, fontsize=16)
ax.set_ylabel('Match V. Mismatch Difference (Rho)',fontsize=12)
ax.set_xlabel('Rule types by Networks', fontsize=12)
ax.set_xticks(np.arange(nbars)+width)
ax.set_xticklabels(netkeys.values(),rotation=-45)
vmax=0.07
ax.set_ylim([0,vmax])
plt.legend((rects['logic'], rects['sensory'], rects['motor']),
('Logic', 'Sensory', 'Motor'), loc=((1.08,.65)))
## Add asterisks
def autolabel(rects,df,ruledim):
# attach some text labels
netcount = 0
for rect in rects:
height = rect.get_height()
# Decide where to put the asterisk
if height > vmax:
yax = vmax - .005
else:
yax = height + .01
# Slightly move this asterisk since it's in the way
if ruledim=='sensory' and netkeys[netcount]=='dan': yax -= .0025
# Retrive q-value and assign asterisk accordingly
q = df[netkeys[netcount]]['Q-values']
if q > .05: asterisk=''
if q < .05: asterisk='*'
if q < .01: asterisk='**'
if q < .001: asterisk='***'
# Label bar
ax.text(rect.get_x() + rect.get_width()/2., yax,
asterisk, ha='center', va='bottom', fontsize=8)
# Go to next network
netcount += 1
for ruledim in ruledims:
autolabel(rects[ruledim],results_dframe_fdr[ruledim],ruledim)
# autolabel(rects2)
plt.tight_layout()
# plt.savefig('FigS1a_NetworkRSA_InformationEstimate.pdf')
In [128]:
# Compute group stats
chance = 0.0
results_dict_fwe = {}
for ruledim in ruledims:
results_dict_fwe[ruledim] = {}
for net in netkeys.keys(): results_dict_fwe[ruledim][netkeys[net]] = {}
pvals = []
for net in netkeys.keys():
results_dict_fwe[ruledim][netkeys[net]]['Accuracy'] = str(round(np.mean(avg_acc[ruledim][net]),3))
t, p = pt.permutationFWE(diff_rho[ruledim],nullmean=0,permutations=10000,nproc=15)
# t, p = stats.ttest_1samp(diff_rho[ruledim][net],chance)
results_dict_fwe[ruledim][netkeys[net]]['T-stats'] = t[net]
results_dict_fwe[ruledim][netkeys[net]]['P-FWE'] = 1.0 - p[net]
In [129]:
results_dframe_fwe = {}
for ruledim in ruledims:
print 'Dataframe for', ruledim, 'classification'
results_dframe_fwe[ruledim] = pandas.DataFrame(data=results_dict_fwe[ruledim])
display(results_dframe_fwe[ruledim])
In [130]:
results_dict_fwe[ruledim]['fpn'].keys()
Out[130]:
In [131]:
ie = {}
t_avg = {}
p_avg = {}
for ruledim in ruledims:
ie[ruledim] = {'sig':[], 'nonsig':[]}
t_avg[ruledim] = {'sig':[], 'nonsig':[]}
p_avg[ruledim] = {'sig':[], 'nonsig':[]}
for net in results_dict_fwe[ruledim].keys():
if results_dict_fwe[ruledim][net]['P-FWE']<0.05:
ie[ruledim]['sig'].append(float(results_dict_fwe[ruledim][net]['Accuracy']))
t_avg[ruledim]['sig'].append(float(results_dict_fwe[ruledim][net]['T-stats']))
p_avg[ruledim]['sig'].append(float(results_dict_fwe[ruledim][net]['P-FWE']))
else:
ie[ruledim]['nonsig'].append(float(results_dict_fwe[ruledim][net]['Accuracy']))
t_avg[ruledim]['nonsig'].append(float(results_dict_fwe[ruledim][net]['T-stats']))
p_avg[ruledim]['nonsig'].append(float(results_dict_fwe[ruledim][net]['P-FWE']))
# Read out statistics for manuscript
print 'Average significant IE for', ruledim, ':', np.mean(ie[ruledim]['sig'])
print 'Average significant T-stats for', ruledim, ':', np.mean(t_avg[ruledim]['sig'])
print 'Max significant p-value for', ruledim, ':', np.max(p_avg[ruledim]['sig'])
print '#################'
print 'Average nonsignificant IE for', ruledim, ':', np.mean(ie[ruledim]['nonsig'])
print 'Average nonsignificant T-stats for', ruledim, ':', np.mean(t_avg[ruledim]['nonsig'])
print 'Min nonsignificant p-value for', ruledim, ':', np.min(p_avg[ruledim]['nonsig'])
print '\n'
In [132]:
#### Compute statistics for bar plot
bar_avgs = {}
bar_sems = {}
bar_avg_all = {}
t_avgs = {}
p_avgs = {}
for net in netkeys.keys(): bar_avg_all[net] = np.zeros((len(ruledims),len(subjNums)))
rulecount = 0
for ruledim in ruledims:
bar_avgs[ruledim] = {}
bar_sems[ruledim] = {}
for net in netkeys.keys():
bar_avgs[ruledim][net] = np.mean(diff_rho[ruledim][net])
bar_sems[ruledim][net] = np.std(diff_rho[ruledim][net])/np.sqrt(len(subjNums))
bar_avg_all[net][rulecount,:] = diff_rho[ruledim][net]
rulecount += 1
bar_sem_all = {}
for net in netkeys.keys():
meanacc = np.mean(bar_avg_all[net],axis=0)
bar_avg_all[net] = np.mean(meanacc)
bar_sem_all[net] = np.std(meanacc)/np.sqrt(len(subjNums))
##### Generate figures
width=0.25
width=.265
networks = netkeys.keys()
nbars = len(networks)
fig = plt.figure()
ax = fig.add_subplot(111)
rects = {}
widthcount = 0
colors = ['b','g','r']
colorcount = 0
for ruledim in ruledims:
rects[ruledim] = ax.bar(np.arange(nbars)+widthcount, bar_avgs[ruledim].values(), width,align='center',
yerr=bar_sems[ruledim].values(), color=colors[colorcount], error_kw=dict(ecolor='black'))
widthcount += width
colorcount += 1
ax.set_title('Network Information Estimation of CPRO rules (FWE)',
y=1.04, fontsize=16)
ax.set_ylabel('Match V. Mismatch Difference (Rho)',fontsize=12)
ax.set_xlabel('Rule types by Networks', fontsize=12)
ax.set_xticks(np.arange(nbars)+width)
ax.set_xticklabels(netkeys.values(),rotation=-45)
vmax=0.07
ax.set_ylim([0,vmax])
plt.legend((rects['logic'], rects['sensory'], rects['motor']),
('Logic', 'Sensory', 'Motor'), loc=((1.08,.65)))
## Add asterisks
def autolabel(rects,df,ruledim):
# attach some text labels
netcount = 0
for rect in rects:
height = rect.get_height()
# Decide where to put the asterisk
if height > vmax:
yax = vmax - .005
else:
yax = height + .01
# Slightly move this asterisk since it's in the way
if ruledim=='sensory' and netkeys[netcount]=='dan': yax -= .0025
# Retrive q-value and assign asterisk accordingly
q = results_dict_fwe[ruledim][netkeys[netcount]]['P-FWE']
if q > .05: asterisk=''
if q < .05: asterisk='*'
if q < .01: asterisk='**'
if q < .001: asterisk='***'
# Label bar
ax.text(rect.get_x() + rect.get_width()/2., yax,
asterisk, ha='center', va='bottom', fontsize=8)
# Go to next network
netcount += 1
for ruledim in ruledims:
autolabel(rects[ruledim],results_dframe_fwe[ruledim],ruledim)
# autolabel(rects2)
plt.tight_layout()
# plt.savefig('FigS1a_NetworkRSA_InformationEstimate.pdf')