Sheet 8

Anna Sommani, Steffen Albert



In [1]:

    
%matplotlib inline

import numpy as np
import matplotlib.pyplot as plt



In [2]:

    
#load data from file
import scipy.io as sio
data=sio.loadmat('Tut8_file1.mat')
state = data['state'].squeeze()
reward = data['reward'].squeeze()
response = data['response'].squeeze()
T = len(state)



In [3]:

    
#do meth stuff
V11 = np.zeros(T+1)
V12 = np.zeros(T+1)
V21 = np.zeros(T+1)
V22 = np.zeros(T+1)

for t in range(1,T+1):
    V11[t], V12[t], V21[t], V22[t] = V11[t-1], V12[t-1], V21[t-1], V22[t-1]
    if state[t-1] == 1 and response[t-1] == 1:
        V11[t]=V11[t-1]+0.1*(reward[t-1]-V11[t-1])
    if state[t-1] == 1 and response[t-1] == 2:
        V12[t]=V12[t-1]+0.1*(reward[t-1]-V12[t-1])
    if state[t-1] == 2 and response[t-1] == 1:
        V21[t]=V21[t-1]+0.1*(reward[t-1]-V21[t-1])
    if state[t-1] == 2 and response[t-1] == 2:
        V22[t]=V22[t-1]+0.1*(reward[t-1]-V22[t-1])



In [4]:

    
plt.plot(V11, label='$V_{11}$')
plt.plot(V12, label='$V_{12}$')
plt.legend()









    Out[4]:





<matplotlib.legend.Legend at 0x7fb2d9887128>



In [5]:

    
plt.plot(V21, label='$V_{21}$')
plt.plot(V22, label='$V_{22}$')
plt.plot((68, 68), (0, 1), 'r:')
plt.legend()









    Out[5]:





<matplotlib.legend.Legend at 0x7fb2a7b25400>



In [6]:

    
for t in range(T):
    if (state[t] != response[t] and reward[t] == 1) or (state[t] == response[t] and reward[t] == 0):
        print(f't = {t+1:3d} for state {state[t]} and response {response[t]} and reward {reward[t]}')
    if state[t] == 1 and response[t] == 2 and reward[t] == 1:
        print(f't = {t+1:3d} for state {state[t]} and response {response[t]} and reward {reward[t]}')









    



t =  68 for state 2 and response 2 and reward 0
t =  71 for state 2 and response 2 and reward 0
t =  72 for state 2 and response 2 and reward 0
t =  74 for state 2 and response 2 and reward 0
t =  76 for state 2 and response 2 and reward 0
t =  79 for state 2 and response 2 and reward 0
t =  80 for state 2 and response 2 and reward 0
t =  83 for state 2 and response 1 and reward 1
t =  84 for state 2 and response 2 and reward 0
t =  86 for state 2 and response 2 and reward 0
t =  88 for state 2 and response 2 and reward 0
t =  90 for state 2 and response 2 and reward 0
t =  92 for state 2 and response 1 and reward 1
t =  94 for state 2 and response 1 and reward 1
t =  97 for state 2 and response 1 and reward 1
t =  99 for state 2 and response 2 and reward 0
t = 100 for state 2 and response 2 and reward 0
t = 102 for state 2 and response 2 and reward 0
t = 105 for state 2 and response 2 and reward 0
t = 107 for state 2 and response 2 and reward 0
t = 109 for state 2 and response 1 and reward 1
t = 111 for state 2 and response 1 and reward 1
t = 112 for state 2 and response 1 and reward 1
t = 114 for state 2 and response 1 and reward 1
t = 117 for state 2 and response 1 and reward 1
t = 119 for state 2 and response 1 and reward 1
t = 120 for state 2 and response 2 and reward 0
t = 123 for state 2 and response 1 and reward 1
t = 125 for state 2 and response 1 and reward 1

So at first, the reward was given if the reponse was the same as the state, for the state 1, it stayed that way for the whole time and the rat learned it. For the state 2, the meaning of the light was switched in experiment 68, so that the rat had to press the other lever to get a reward, which it learned also.



In [11]:

    
def LL(state, reward, response ,a,b):
    T = len(state)
    
    V11 = np.zeros(T+1)
    V12 = np.zeros(T+1)
    V21 = np.zeros(T+1)
    V22 = np.zeros(T+1)
    
    LL = - np.log(2)

    for t in range(1,T):
        V11[t], V12[t], V21[t], V22[t] = V11[t-1], V12[t-1], V21[t-1], V22[t-1]
        if state[t-1] == 1 and response[t-1] == 1:
            V11[t]=V11[t-1]+a*(reward[t-1]-V11[t-1])
            LL += b * V11[t]
        elif state[t-1] == 1 and response[t-1] == 2:
            V12[t]=V12[t-1]+a*(reward[t-1]-V12[t-1])
            LL += b * V12[t]
        elif state[t-1] == 2 and response[t-1] == 1:
            V21[t]=V21[t-1]+a*(reward[t-1]-V21[t-1])
            LL += b * V21[t]
        elif state[t-1] == 2 and response[t-1] == 2:
            V22[t]=V22[t-1]+a*(reward[t-1]-V22[t-1])
            LL += b * V22[t]
            
        if state[t] == 1:
            LL += -np.log(np.exp(b*V11[t])+np.exp(b*V12[t]))
        else:
            LL += -np.log(np.exp(b*V21[t])+np.exp(b*V22[t]))
    
    return LL

alpha = np.linspace(0,1,201)
beta = np.linspace(0,3,601)
data = np.array([[LL(state, reward, response ,a,b) for b in beta] for a in alpha])
print(data)









    



---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-11-ad80a9746cc5> in <module>()
     33 alpha = np.linspace(0,1,201)
     34 beta = np.linspace(0,3,601)
---> 35 data = np.array([[LL(state, reward, response ,a,b) for b in beta] for a in alpha])
     36 print(data)

<ipython-input-11-ad80a9746cc5> in <listcomp>(.0)
     33 alpha = np.linspace(0,1,201)
     34 beta = np.linspace(0,3,601)
---> 35 data = np.array([[LL(state, reward, response ,a,b) for b in beta] for a in alpha])
     36 print(data)

<ipython-input-11-ad80a9746cc5> in <listcomp>(.0)
     33 alpha = np.linspace(0,1,201)
     34 beta = np.linspace(0,3,601)
---> 35 data = np.array([[LL(state, reward, response ,a,b) for b in beta] for a in alpha])
     36 print(data)

<ipython-input-11-ad80a9746cc5> in LL(state, reward, response, a, b)
     17             V12[t]=V12[t-1]+a*(reward[t-1]-V12[t-1])
     18             LL += b * V12[t]
---> 19         elif state[t-1] == 2 and response[t-1] == 1:
     20             V21[t]=V21[t-1]+a*(reward[t-1]-V21[t-1])
     21             LL += b * V21[t]

KeyboardInterrupt:



In [ ]:

    
#find maximum
maximum = np.where(data == np.amax(data))
maxalpha = alpha[maximum[0][0]]
maxbeta = beta[maximum[1][0]]
print(f'The maximum likelihood is at alpha = {maxalpha:.2f} and ' +
f'beta = {maxbeta:.2f} with a log likelihood of {np.amax(data):.2f}')

plt.rcParams['figure.figsize'] = [18.0, 6.0]
import matplotlib.cm as cm
plt.imshow(data[::-1], extent=(np.min(beta), np.max(beta), np.min(alpha), np.max(alpha)), cmap=cm.gist_rainbow)
plt.xlabel('beta')
plt.ylabel('alpha')
plt.colorbar()

#plot maximum
plt.scatter(x=[maxbeta+0.025], y=[maxalpha+0.025], c='w', s=40)

#plt.colorbar()
plt.rcParams['figure.figsize'] = [8.0, 6.0]



In [319]:

    
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.ticker import LinearLocator, FormatStrFormatter


fig = plt.figure()
ax = fig.gca(projection='3d')

# Make data.
X = beta
Y = alpha
X, Y = np.meshgrid(X, Y)
Z = data

# Plot the surface.
surf = ax.plot_surface(X, Y, Z, cmap=cm.gist_rainbow,
                       linewidth=0, antialiased=True)

# Customize the z axis.
ax.set_zlim(np.amin(data), np.amax(data))
ax.zaxis.set_major_locator(LinearLocator(10))
ax.zaxis.set_major_formatter(FormatStrFormatter('%.02f'))

# Add a color bar which maps values to colors.
fig.colorbar(surf, shrink=0.5, aspect=5)

plt.show()



In [9]:

    
t









    Out[9]:





124



In [10]:

    
a









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-10-60b725f10c9c> in <module>()
----> 1 a

NameError: name 'a' is not defined



In [ ]: