Identify optimal parameter set after optimisation

The optimisation algorithm used in the optimisation coverges on a local and not necessarily absolute optimum. To avoid such cases, several different start values for the optimisation are chosen and then the best optimal parameter configuration across the different start values is selected. Taking the output from the optimisation procedure the following code identifies the optimal parameter set for each individual subject and stores it an new file.

Real Behavioural Data


In [55]:
import numpy as np


#load data

inputFiles = ['../output/real/M1.csv','../output/real/M21.csv',
                '../output/real/M31.csv','../output/real/M4.csv',
                '../output/real/M22.csv','../output/real/M23.csv',
                '../output/real/M32.csv','../output/real/M33.csv']

#create output files for Simulation
outputFilesS = ['../input/M1.csv','../input/M21.csv',
                '../input/M31.csv','../input/M4.csv',
                '../input/M22.csv','../input/M23.csv',
                '../input/M32.csv','../input/M33.csv']

outputFiles = ['../output/M1.csv','../output/M21.csv',
               '../output/M31.csv','../output/M4.csv',
               '../output/M22.csv','../output/M23.csv',
               '../output/M32.csv','../output/M33.csv']

#number of starting values
nrStartValues = [16,32,32,64,32,32,32,32]

iterator = 0

for (iFile,oFileS,oFile) in zip(inputFiles,outputFilesS,outputFiles):
    
    optData = np.genfromtxt(iFile, delimiter=',') #load data file
    subjects = np.size(optData, axis=1)
    nrParam = (np.size(optData, axis=0)/nrStartValues[iterator])-1
    optParam = np.zeros((subjects, nrParam ))
    optParam2 = np.zeros((subjects, nrParam+1 ))#result array
    #subjects=2
    
    for s in range(subjects):
        
        #identify index of minimum because fminsearch is 
        #minimising a negative number (=maximisation)
        maxIndex = optData[-nrStartValues[iterator]-1:-1,s].argmin(axis=0)
        
        for param in range(nrParam):
            
            #collect optimal parameter set using index
            optParam[s,param]=optData[(nrStartValues[iterator]*param)+maxIndex-1,s]
            #print(nrStartValues[iterator]*param)
            
            
        #add LH value for 
        optParam2[s,0:nrParam]=optParam[s,:]
        optParam2[s,-1]=np.amin(optData[-nrStartValues[iterator]-1:-1,s],axis=0)
        
    np.savetxt(oFileS,optParam, delimiter=',')
    np.savetxt(oFile,optParam2, delimiter=',')
    iterator +=1

Simulated Data

Similar to above, the optimisation algorithm used in the optimisation coverges on a local and not necessarily absolute optimum. Taking the output from the optimisation procedure the following code identifies the optimal parameter set for each individual subject and simulation and stores it an new file.


In [65]:
pwd


Out[65]:
u'/Users/l/Programming/Git/Counterfactual-RL'

In [66]:
import numpy as np


#load data

inputFiles = ['output/simul/Sim_M1.csv','output/simul/Sim_M21.csv',
                'output/simul/Sim_M31.csv','output/simul/Sim_M4.csv',
                'output/simul/Sim_M22.csv','output/simul/Sim_M23.csv',
                'output/simul/Sim_M32.csv','output/simul/Sim_M33.csv']

#create output files
outputFiles = ['output/Simul_M1.csv','output/Simul_M21.csv',
               'output/Simul_M31.csv','output/Simul_M4.csv',
               'output/Simul_M22.csv','output/Simul_M23.csv',
               'output/Simul_M32.csv','output/Simul_M33.csv']

#number of starting values
nrIt = 30
nrStartValues = [16,32,32,64,32,32,32,32]

iterator = 0

for (iFile,oFile) in zip(inputFiles,outputFiles):
    
    optData = np.genfromtxt(iFile, delimiter=',') #load data file
    subjects = np.size(optData, axis=1)
    nrParam = ((np.size(optData, axis=0)/nrStartValues[iterator])/nrIt)
    optParam = np.zeros((subjects*nrIt, nrParam )) #result array
    #subjects=1
    #print(nrParam)
    
    for s in range(subjects):
        
        #identify index of minimum because fminsearch is 
        #minimising a negative number (=maximisation) for each simulation
        count = 1
        for n in range(nrIt):
            oneIt=optData[nrStartValues[iterator]*nrIt*(nrParam-1)+n*nrStartValues[iterator]:
                               nrStartValues[iterator]*nrIt*(nrParam-1)+
                               (n+1)*nrStartValues[iterator],s]
            maxIndex = oneIt.argmin(axis=0)
            
            for param in range(nrParam):

                #collect optimal parameter set using index for each simulation
                optParam[(s*nrIt)+n,param]=optData[(nrStartValues[iterator]*nrIt*param)+
                                                    n*nrStartValues[iterator]+maxIndex,s]
            
    np.savetxt(oFile,optParam, delimiter=',')
    iterator +=1

Calculate Bayesian Information Criterion

Calculate Bayesian Information Criterion as to compare model fit between models with different numbers of parameters.

Real Behavioural Data


In [15]:
import numpy as np

inputFiles = ['output/M1.csv','output/M21.csv',
              'output/M22.csv','output/M23.csv',
               'output/M31.csv','output/M32.csv',
               'output/M33.csv','output/M4.csv']

nrParam = [3,4,4,4,5,5,5,6]
iterator = 0

for iFile in inputFiles:
    
    optData = np.genfromtxt(iFile, delimiter=',') #load data file
    subjects = np.size(optData, axis=0)
    nrTrials = 220
    outputData = np.zeros((subjects,nrParam[iterator]+1)) #result array
    outputData[:,:nrParam[iterator]] = optData[:,:nrParam[iterator]]
    
    #subjects=1
    for s in range(subjects):

        #calulate BIC and add it to the inputfile
        outputData[s,nrParam[iterator]]=-2 * np.log(-optData[s,nrParam[iterator]-1])+(nrParam[iterator]-1)*np.log(nrTrials)        
    np.savetxt(iFile,outputData, delimiter=',')
    iterator +=1

Simulated Data


In [116]:
import numpy as np

inputFiles = ['output/Simul_M1.csv','output/Simul_M21.csv',
              'output/Simul_M22.csv','output/Simul_M23.csv',
               'output/Simul_M31.csv','output/Simul_M32.csv',
               'output/Simul_M33.csv','output/Simul_M4.csv']

nrParam = [3,4,4,4,5,5,5,6]
iterator = 0
nrTrials = 220

for iFile in inputFiles:
    
    optData = np.genfromtxt(iFile, delimiter=',') #load data file
    subjects = np.size(optData, axis=0)
    outputData = np.zeros((subjects,nrParam[iterator]+1)) #result array
    outputData[:,:nrParam[iterator]] = optData[:,:nrParam[iterator]]
    
    #subjects=1
    for s in range(subjects):

        #calulate BIC and add it to the inputfile
        outputData[s,nrParam[iterator]]=-2 * np.log(-optData[s,nrParam[iterator]-1])+(nrParam[iterator]-1)*np.log(nrTrials)        
    np.savetxt(iFile,outputData, delimiter=',')
    iterator +=1

Calculate the most parsimonous model

Behavioural Data

The following code answers which of the 8 models best approximated the original behavioural data for each individual subject and writes two files: one with the optimal parameter combination and one with the name of the optimal model.


In [16]:
import numpy as np
import csv

#load files

inputFiles = ['output/M1.csv','output/M21.csv',
              'output/M22.csv','output/M23.csv',
               'output/M31.csv','output/M32.csv',
               'output/M33.csv','output/M4.csv']
outputFile = 'output/Real_PF.csv'


nrParam = [3,4,4,4,5,5,5,6]
data = np.loadtxt(inputFiles[0],delimiter=',')
subjects = np.size(data, axis = 0)
params = 7 #params + lnlh + BIC

allModels = np.zeros((len(inputFiles),subjects,params))

# first load data an make all models include the same number of parameter 
# by substituting missing parameter with 1
count = 0
for m in range(len(inputFiles)):
    
    help = np.loadtxt(inputFiles[m],delimiter=',')
    allModels[m,:,0:2]=help[:,0:2]
    allModels[m,:,-2:]=help[:,-2:]
    ones = np.ones((np.size(help,axis=0)))
    
    if count == 0 : #model 1
        #allModels[r,:,2:4]=[help[:,2] ones ones]
        allModels[m,:,2]=ones
        allModels[m,:,3]=ones
        allModels[m,:,4]=ones
    elif count == 1: #model 21
        allModels[m,:,2]=help[:,2]
        allModels[m,:,3]=ones
        allModels[m,:,4]=ones
    elif count == 2: #model 22
        allModels[m,:,3]=help[:,2]
        allModels[m,:,2]=ones
        allModels[m,:,4]=ones
    elif count == 3: #model 23
        allModels[m,:,4]=help[:,2]
        allModels[m,:,2]=ones
        allModels[m,:,3]=ones
    elif count == 4: #model 31
        allModels[m,:,2]=help[:,2]
        allModels[m,:,3]=help[:,3]
        allModels[m,:,4]=ones 
    elif count == 5: #model 32
        allModels[m,:,2]=help[:,2]
        allModels[m,:,3]=ones
        allModels[m,:,4]=help[:,3]
    elif count == 6: #model 33
        allModels[m,:,2]=ones
        allModels[m,:,3]=help[:,2]
        allModels[m,:,4]=help[:,3]
    else: #model 4
        allModels[m,:,2:4]=help[:,2:4]
    count += 1
#idetify index of parsimonious model 
outputIndex =np.zeros((subjects))
for s in range(subjects):
    temp = np.array(np.where(allModels[:,s,-1]==min(allModels[:,s,-1])))[0]
    outputIndex[s]=temp[0]

#collate parameter values for parsimonious model and save to file
output = np.zeros((subjects,params))
for s in range(subjects):
    for p in range(params):
        output[s,p]=allModels[outputIndex[s].astype(int)-1,s,p]
            
np.savetxt(outputFile,output,delimiter=',')

#create file with the most parsimonious model names

stringArr = []
outputDic = {'M1':0,'M21':1,'M22':2,'M23':3,'M31':4,'M32':5,'M33':6,'M4':7}
outputMFile= 'output/Real_PF_Model.csv'

for s in range(subjects):
        stringArr.append(outputDic.keys()[outputDic.values().index(outputIndex[s])])

with open(outputMFile, "wb") as f:
    writer = csv.writer(f)
    writer.writerows(stringArr)


[ 0.  7.  0.  1.  4.  1.  7.  0.  6.  1.  7.  3.  3.  3.  5.  1.  1.  7.
  1.  7.  1.  7.  4.  0.  1.  5.  1.  7.  7.  0.  0.  5.  3.  0.  1.  7.
  0.  7.  0.  4.  7.  1.  7.  4.  1.  7.  6.  3.  5.  5.  7.  1.  1.  7.
  0.  1.  5.  4.  5.  0.  4.  7.  5.  0.  5.  1.  5.  7.  5.  3.  3.  5.
  3.  5.  0.  1.  1.  3.  1.  0.]

Simulated Data


In [25]:
import numpy as np
import csv

#load files and simulated data in array

inputFiles = ['output/Simul_M1.csv','output/Simul_M21.csv',
                'output/Simul_M22.csv','output/Simul_M23.csv',
                'output/Simul_M31.csv','output/Simul_M32.csv',
                'output/Simul_M33.csv','output/Simul_M4.csv']
outputFile = 'output/Simul_PFF.csv'

data = np.loadtxt(inputFiles[0],delimiter=',')
runs = 30
subjects = np.size(data, axis = 0)/runs
params = 7 #params + lnlh + BIC

allModels = np.zeros((len(inputFiles),subjects*runs,params))

count = 0
for m in range(len(inputFiles)):
    print(inputFiles[m])
    help = np.loadtxt(inputFiles[m],delimiter=',')
    allModels[m,:,0:2]=help[:,0:2]
    allModels[m,:,-2:]=help[:,-2:]
    ones = np.ones((np.size(help,axis=0)))
    
    if count == 0 : #model 1
        #allModels[r,:,2:4]=[help[:,2] ones ones]
        allModels[m,:,2]=ones
        allModels[m,:,3]=ones
        allModels[m,:,4]=ones
    elif count == 1: #model 21
        allModels[m,:,2]=help[:,2]
        allModels[m,:,3]=ones
        allModels[m,:,4]=ones
    elif count == 2: #model 22
        allModels[m,:,3]=help[:,2]
        allModels[m,:,2]=ones
        allModels[m,:,4]=ones
    elif count == 3: #model 23
        allModels[m,:,4]=help[:,2]
        allModels[m,:,2]=ones
        allModels[m,:,3]=ones
    elif count == 4: #model 31
        allModels[m,:,2]=help[:,2]
        allModels[m,:,3]=help[:,3]
        allModels[m,:,4]=ones 
    elif count == 5: #model 32
        allModels[m,:,2]=help[:,2]
        allModels[m,:,3]=ones
        allModels[m,:,4]=help[:,3]
    elif count == 6: #model 33
        allModels[m,:,2]=ones
        allModels[m,:,3]=help[:,2]
        allModels[m,:,4]=help[:,3]
    else: #model 4
        allModels[m,:,2:4]=help[:,2:4]
    count += 1

#for r in range(len(inputFiles)):
#    print(inputFiles[r])
#    allModels[r,:,:]=np.loadtxt(inputFiles[r],delimiter=',')
    
#identify index of parsimonious model 
outputIndex =np.zeros((runs,subjects))
for s in range(subjects):
    oList = np.zeros((runs))
    for r in range(runs):  
        oList[r]=np.array(np.where(allModels[:,s*runs+r,-1]==
                                    min(allModels[:,s*runs+r,-1])))[0].astype(int)
    outputIndex[:,s]=oList

#collate parameter values for parsimonious model and save to file
output = np.zeros((subjects*runs,params))
for s in range(subjects):
    for r in range(runs):
        for p in range(params):
            output[(s*runs)+r,p]=allModels[outputIndex[r,s].astype(int),(s*runs)+r,p]
            
np.savetxt(outputFile,output,delimiter=',')

#create file with the most parsimonious model names

stringArr = []
outputDic = {'M1':0,'M21':1,'M22':2,'M23':3,'M31':4,'M32':5,'M33':6,'M4':7}
outputMFile= 'output/Simul_PFF_Model.csv'

for s in range(subjects):
    for r in range(runs):
        stringArr.append(outputDic.keys()[outputDic.values().index(outputIndex[r,s])])

with open(outputMFile, "wb") as f:
    writer = csv.writer(f)
    writer.writerows(stringArr)


output/Simul_M1.csv
output/Simul_M21.csv
output/Simul_M22.csv
output/Simul_M23.csv
output/Simul_M31.csv
output/Simul_M32.csv
output/Simul_M33.csv
output/Simul_M4.csv