In [97]:
## Frozen Lake 8x8 environment. Reach the goal without falling
In [98]:
import gym
import numpy as np
import matplotlib as mpl
import matplotlib.pylab as plt
from __future__ import division,print_function
%matplotlib inline
In [102]:
env = gym.make('FrozenLake-v0')
In [103]:
def choose_egreedy(q_state,env_action_space,eps):
'''
q_state : Value vector for some state
a_state : Possible actions for such state
eps : Probability of choosing a random action
'''
## Taken from https://github.com/ShangtongZhang/reinforcement-learning-an-introduction/blob/master/chapter06/CliffWalking.py
## With Epsilon probability choose a random action: Binomial distribution,
## one trial with success probability of eps.
## np.random.binomial: Returns the number of successes over the number of trials specified.
## See it as throwing a coin with probabiility of ending in heads being epsilon
if np.random.binomial(1,eps)==1:
## Get a random action from the action vector
return env_action_space.sample()
else:
return np.argmax(q_state)
In [123]:
# State-action values
#Q = np.zeros((env.observation_space.n,env.action_space.n))
## Step size
alpha = .5
## Discount parameter
gamma = .99
## Number of episodes to simulate
n_episodes = 30000
## Epsilon
eps = 0.4
steps_episode = np.zeros(n_episodes)
for episode in xrange(n_episodes):
##Get first state
state = env.reset()
done = False
steps = 0
while not done:
## Choose action from S (state) derived from Q (epsilon-greedily)
action = choose_egreedy(Q[state,:],env.action_space,eps)
steps += 1
## Take action and observe next state and reward
next_state, reward, done, info = env.step(action)
## Update State-action value
Q[state,action] += alpha*(reward + gamma*np.max(Q[next_state,:])-Q[state,action])
state = next_state
steps_episode[episode] = steps
In [124]:
plt.plot(steps_episode)
Out[124]:
In [ ]: