In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
In [2]:
#load data from file
import scipy.io as sio
data=sio.loadmat('Tut8_file1.mat')
state = data['state'].squeeze()
reward = data['reward'].squeeze()
response = data['response'].squeeze()
T = len(state)
In [3]:
#do meth stuff
V11 = np.zeros(T+1)
V12 = np.zeros(T+1)
V21 = np.zeros(T+1)
V22 = np.zeros(T+1)
for t in range(1,T+1):
V11[t], V12[t], V21[t], V22[t] = V11[t-1], V12[t-1], V21[t-1], V22[t-1]
if state[t-1] == 1 and response[t-1] == 1:
V11[t]=V11[t-1]+0.1*(reward[t-1]-V11[t-1])
if state[t-1] == 1 and response[t-1] == 2:
V12[t]=V12[t-1]+0.1*(reward[t-1]-V12[t-1])
if state[t-1] == 2 and response[t-1] == 1:
V21[t]=V21[t-1]+0.1*(reward[t-1]-V21[t-1])
if state[t-1] == 2 and response[t-1] == 2:
V22[t]=V22[t-1]+0.1*(reward[t-1]-V22[t-1])
In [4]:
plt.plot(V11, label='$V_{11}$')
plt.plot(V12, label='$V_{12}$')
plt.legend()
Out[4]:
In [5]:
plt.plot(V21, label='$V_{21}$')
plt.plot(V22, label='$V_{22}$')
plt.plot((68, 68), (0, 1), 'r:')
plt.legend()
Out[5]:
In [6]:
for t in range(T):
if (state[t] != response[t] and reward[t] == 1) or (state[t] == response[t] and reward[t] == 0):
print(f't = {t+1:3d} for state {state[t]} and response {response[t]} and reward {reward[t]}')
if state[t] == 1 and response[t] == 2 and reward[t] == 1:
print(f't = {t+1:3d} for state {state[t]} and response {response[t]} and reward {reward[t]}')
So at first, the reward was given if the reponse was the same as the state, for the state 1, it stayed that way for the whole time and the rat learned it. For the state 2, the meaning of the light was switched in experiment 68, so that the rat had to press the other lever to get a reward, which it learned also.
In [11]:
def LL(state, reward, response ,a,b):
T = len(state)
V11 = np.zeros(T+1)
V12 = np.zeros(T+1)
V21 = np.zeros(T+1)
V22 = np.zeros(T+1)
LL = - np.log(2)
for t in range(1,T):
V11[t], V12[t], V21[t], V22[t] = V11[t-1], V12[t-1], V21[t-1], V22[t-1]
if state[t-1] == 1 and response[t-1] == 1:
V11[t]=V11[t-1]+a*(reward[t-1]-V11[t-1])
LL += b * V11[t]
elif state[t-1] == 1 and response[t-1] == 2:
V12[t]=V12[t-1]+a*(reward[t-1]-V12[t-1])
LL += b * V12[t]
elif state[t-1] == 2 and response[t-1] == 1:
V21[t]=V21[t-1]+a*(reward[t-1]-V21[t-1])
LL += b * V21[t]
elif state[t-1] == 2 and response[t-1] == 2:
V22[t]=V22[t-1]+a*(reward[t-1]-V22[t-1])
LL += b * V22[t]
if state[t] == 1:
LL += -np.log(np.exp(b*V11[t])+np.exp(b*V12[t]))
else:
LL += -np.log(np.exp(b*V21[t])+np.exp(b*V22[t]))
return LL
alpha = np.linspace(0,1,201)
beta = np.linspace(0,3,601)
data = np.array([[LL(state, reward, response ,a,b) for b in beta] for a in alpha])
print(data)
In [ ]:
#find maximum
maximum = np.where(data == np.amax(data))
maxalpha = alpha[maximum[0][0]]
maxbeta = beta[maximum[1][0]]
print(f'The maximum likelihood is at alpha = {maxalpha:.2f} and ' +
f'beta = {maxbeta:.2f} with a log likelihood of {np.amax(data):.2f}')
plt.rcParams['figure.figsize'] = [18.0, 6.0]
import matplotlib.cm as cm
plt.imshow(data[::-1], extent=(np.min(beta), np.max(beta), np.min(alpha), np.max(alpha)), cmap=cm.gist_rainbow)
plt.xlabel('beta')
plt.ylabel('alpha')
plt.colorbar()
#plot maximum
plt.scatter(x=[maxbeta+0.025], y=[maxalpha+0.025], c='w', s=40)
#plt.colorbar()
plt.rcParams['figure.figsize'] = [8.0, 6.0]
In [319]:
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.ticker import LinearLocator, FormatStrFormatter
fig = plt.figure()
ax = fig.gca(projection='3d')
# Make data.
X = beta
Y = alpha
X, Y = np.meshgrid(X, Y)
Z = data
# Plot the surface.
surf = ax.plot_surface(X, Y, Z, cmap=cm.gist_rainbow,
linewidth=0, antialiased=True)
# Customize the z axis.
ax.set_zlim(np.amin(data), np.amax(data))
ax.zaxis.set_major_locator(LinearLocator(10))
ax.zaxis.set_major_formatter(FormatStrFormatter('%.02f'))
# Add a color bar which maps values to colors.
fig.colorbar(surf, shrink=0.5, aspect=5)
plt.show()
In [9]:
t
Out[9]:
In [10]:
a
In [ ]: