This notebook provides an example of a simple reinforcement learning algorithm. In this case, the learner has to decide between two actions, which are probabilistically reinforced. We will start with a simple Rescorla-Wagner learner.
In [1]:
# set up imports
import numpy
import statsmodels.nonparametric.smoothers_lowess
import matplotlib.pyplot as plt
%matplotlib inline
In [131]:
learning_rate=0.001
# probability of reward for the two responses
p_A=0.75
def softmax(q,temp):
p=numpy.exp(q[0]/temp)/(numpy.exp(q[0]/temp)+numpy.exp(q[1]/temp))
if p>numpy.random.rand():
return 0
else:
return 1
def outcome(resp,p_A):
if numpy.random.rand()<p_A:
rewarded_outcome=1
else:
rewarded_outcome=0
if resp==rewarded_outcome:
return 1
else:
return 0
In [132]:
ntrials=1000
temp=0.01
q=[0.0,0,0]
resp=numpy.zeros(ntrials)
reward=numpy.zeros(ntrials)
correct=numpy.zeros(ntrials)
for i in range(ntrials):
resp[i]=softmax(q,temp)
reward[i]=outcome(resp[i],p_A)
correct[i]=resp[i]==1
q[int(resp[i])]=q[int(resp[i])] + learning_rate*(reward[i]-q[int(resp[i])])
In [133]:
blocksize=20
nblocks=int(ntrials/blocksize)
block_reward=numpy.zeros(nblocks)
for i in range(nblocks):
block_trials=numpy.arange(i*blocksize,(i+1)*blocksize)
block_reward[i]=numpy.mean(reward[block_trials])
In [134]:
plt.plot(block_reward)
smooth=statsmodels.nonparametric.smoothers_lowess.lowess(block_reward,numpy.arange(block_reward.shape[0]),frac=0.25)
plt.plot(smooth[:,1])
Out[134]:
In [136]:
print('mean proportion rewarded trials: %0.3f'%numpy.mean(reward))
In [ ]: