In [10]:
import numpy as np
import gym

In [11]:
env = gym.make('FrozenLake-v0')


[2018-02-21 19:43:50,122] Making new env: FrozenLake-v0

Frozen lake consists of a 4X4 grid with 4 available actions (Up, Down, Left, Right), thus giving us a 16X4 table of Q-values.

Available states are Start, Safe, Danger, End


In [12]:
Q = np.zeros([env.observation_space.n, env.action_space.n])

In [13]:
# Set learning parameters

lr = 0.8 # Learning rate
y = 0.95 # Gamma
num_episodes = 2000

In [14]:
# Create lists to contain total rewards and steps / episode 
r_list = []

for i in range(num_episodes):
    # Reset environment and get first new observation
    s = env.reset()
    rAll = 0
    d = False
    j = 0
    
    # Q-table learning
    while j < 99:
        j+=1
        
        # Choose an action from Q-table with added noise (for exploration)
        a = np.argmax(Q[s,:] + np.random.randn(1, env.action_space.n)*(1./(i + 1)))
        
        # Get new state and reward for the environment 
        s1, r, d, _ = env.step(a)
        
        # Update Q-table with new knowledge
        Q[s, a] = Q[s, a] + lr*(r + y*np.max(Q[s1, :] - Q[s, a]))
        rAll += r
        s = s1
        
        if d == True:
            break
        r_list.append(rAll)

In [15]:
print("Score over time: " + str(sum(r_list)/num_episodes))


Score over time: 0.0

In [16]:
print("Final Q-table values")
print(Q)


Final Q-table values
[[  2.64257274e-03   2.21328463e-02   7.91232546e-01   0.00000000e+00]
 [  0.00000000e+00   2.55858656e-03   2.39192784e-04   7.31359958e-01]
 [  1.15323589e-02   3.94829049e-03   8.36162308e-04   7.13625534e-01]
 [  1.14859667e-04   3.99441304e-04   0.00000000e+00   7.07543166e-01]
 [  8.32813648e-01   0.00000000e+00   0.00000000e+00   0.00000000e+00]
 [  0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00]
 [  1.36768828e-04   1.60641658e-04   1.65046271e-01   5.74269007e-04]
 [  0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00]
 [  0.00000000e+00   0.00000000e+00   6.78935213e-03   9.59824443e-01]
 [  1.97916420e-03   9.37843392e-01   0.00000000e+00   0.00000000e+00]
 [  6.91546325e-01   1.03629239e-03   0.00000000e+00   0.00000000e+00]
 [  0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00]
 [  0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00]
 [  0.00000000e+00   5.47888514e-04   9.18021639e-01   2.70721590e-03]
 [  0.00000000e+00   0.00000000e+00   0.00000000e+00   1.01882738e+00]
 [  0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00]]

In [ ]: