This notebook provides an example of a simple reinforcement learning algorithm. In this case, the learner has to decide between two actions, which are probabilistically reinforced. We will start with a simple Rescorla-Wagner learner.


In [1]:
# set up imports
import numpy
import statsmodels.nonparametric.smoothers_lowess
import matplotlib.pyplot as plt
%matplotlib inline

In [131]:
learning_rate=0.001

# probability of reward for the two responses
p_A=0.75

def softmax(q,temp):
        p=numpy.exp(q[0]/temp)/(numpy.exp(q[0]/temp)+numpy.exp(q[1]/temp))
        if p>numpy.random.rand():
            return 0
        else:
            return 1
        
def outcome(resp,p_A):
    if numpy.random.rand()<p_A:
        rewarded_outcome=1
    else:
        rewarded_outcome=0
    if resp==rewarded_outcome:
        return 1
    else:
        return 0

In [132]:
ntrials=1000
temp=0.01

q=[0.0,0,0]

resp=numpy.zeros(ntrials)
reward=numpy.zeros(ntrials)
correct=numpy.zeros(ntrials)


for i in range(ntrials):
    resp[i]=softmax(q,temp)
    reward[i]=outcome(resp[i],p_A)
    correct[i]=resp[i]==1
    q[int(resp[i])]=q[int(resp[i])] + learning_rate*(reward[i]-q[int(resp[i])])

In [133]:
blocksize=20
nblocks=int(ntrials/blocksize)
block_reward=numpy.zeros(nblocks)
for i in range(nblocks):
    block_trials=numpy.arange(i*blocksize,(i+1)*blocksize)
    block_reward[i]=numpy.mean(reward[block_trials])

In [134]:
plt.plot(block_reward)
smooth=statsmodels.nonparametric.smoothers_lowess.lowess(block_reward,numpy.arange(block_reward.shape[0]),frac=0.25)
plt.plot(smooth[:,1])


Out[134]:
[<matplotlib.lines.Line2D at 0x10e42e4e0>]

In [136]:
print('mean proportion rewarded trials: %0.3f'%numpy.mean(reward))


mean proportion rewarded trials: 0.748

In [ ]: