In [1]:
%matplotlib inline
from matplotlib import pyplot as plt
import gym
import numpy as np
from gym.envs.registration import register
from gym import wrappers
import shutil

In [2]:
# register(
#     id='FrozenLakeNotSlippery-v0',
#     entry_point='gym.envs.toy_text:FrozenLakeEnv',
#     kwargs={'map_name' : '4x4', 'is_slippery': False},
#     max_episode_steps=100,
#     reward_threshold=0.78, # optimum = .8196
# )

#env = gym.make('FrozenLakeNotSlippery-v0')

In [3]:
env = gym.make('FrozenLake-v0')
shutil.rmtree('/tmp/FrozenLake_01') 
env = wrappers.Monitor(env, '/tmp/FrozenLake_01')


[2017-08-01 03:11:05,282] Making new env: FrozenLake-v0
[2017-08-01 03:11:07,029] Creating monitor directory /tmp/FrozenLake_01

In [4]:
#Initialize table with all zeros
#Q = np.zeros([env.observation_space.n,env.action_space.n])
Q= np.zeros((env.observation_space.n, env.action_space.n))
# Set learning p-arameters
y = 0.95
num_episodes = 2000

rList = []
alpha=np.log(0.000001)/num_episodes
for i in range(num_episodes):
    lr= np.exp(alpha*i)
    #Reset environment and get first new observation
    s = env.reset()
    rAll = 0
    done=False
    #The Q-Table learning algorithm
    while done==False:
        #Choose an action by greedily (with noise) picking from Q table
        if np.random.rand() < lr*0.1:
            a = np.random.randint(env.action_space.n)
        else:
            a = np.argmax(Q[s,:] )
        #Get new state and reward from environment
        s1,reward,done,_ = env.step(a)
        if done:
            r = 1.0 if reward > 0.0 else -1.0
        else:
            r = -0.01
        #Update Q-Table with new knowledge
        Q[s,a] = Q[s,a] +  lr*(r + y*np.max(Q[s1,:]) - Q[s,a])
        rAll += reward
        s = s1
        if done == True:
            break
    #jList.append(j)
    rList.append(rAll)
print "Score over time: " +  str(sum(rList[-100:])/100.0)
plt.plot(np.convolve(np.ones(100),rList,"valid"))


[2017-08-01 03:11:07,107] Starting new video recorder writing to C:\tmp\FrozenLake_01\openaigym.video.0.7812.video000000.json
[2017-08-01 03:11:07,112] Starting new video recorder writing to C:\tmp\FrozenLake_01\openaigym.video.0.7812.video000001.json
[2017-08-01 03:11:07,121] Starting new video recorder writing to C:\tmp\FrozenLake_01\openaigym.video.0.7812.video000008.json
[2017-08-01 03:11:07,141] Starting new video recorder writing to C:\tmp\FrozenLake_01\openaigym.video.0.7812.video000027.json
[2017-08-01 03:11:07,187] Starting new video recorder writing to C:\tmp\FrozenLake_01\openaigym.video.0.7812.video000064.json
[2017-08-01 03:11:07,240] Starting new video recorder writing to C:\tmp\FrozenLake_01\openaigym.video.0.7812.video000125.json
[2017-08-01 03:11:07,332] Starting new video recorder writing to C:\tmp\FrozenLake_01\openaigym.video.0.7812.video000216.json
[2017-08-01 03:11:07,528] Starting new video recorder writing to C:\tmp\FrozenLake_01\openaigym.video.0.7812.video000343.json
[2017-08-01 03:11:07,759] Starting new video recorder writing to C:\tmp\FrozenLake_01\openaigym.video.0.7812.video000512.json
[2017-08-01 03:11:08,003] Starting new video recorder writing to C:\tmp\FrozenLake_01\openaigym.video.0.7812.video000729.json
[2017-08-01 03:11:08,328] Starting new video recorder writing to C:\tmp\FrozenLake_01\openaigym.video.0.7812.video001000.json
Score over time: 0.79
Out[4]:
[<matplotlib.lines.Line2D at 0x93dcfd0>]

In [5]:
print "Final Q-Table Values"
print Q


Final Q-Table Values
[[-0.05434152 -0.16966545 -0.15454293 -0.17355605]
 [-0.37224107 -0.54903455 -0.40307729 -0.19496105]
 [-0.71983384 -0.32521472 -0.50019063 -0.58884444]
 [-0.65067512 -0.87505773 -0.65288723 -0.36438045]
 [-0.0249342  -0.4376649  -0.25386935 -0.45939011]
 [ 0.          0.          0.          0.        ]
 [-0.41101014 -0.73545578 -0.779566   -0.98993724]
 [ 0.          0.          0.          0.        ]
 [-0.7598697  -0.73937808 -0.51829132  0.03892334]
 [-0.6218725   0.14982506 -0.46546787 -0.15818039]
 [ 0.12086304 -0.36856093 -0.5076085  -0.83863779]
 [ 0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [-0.27952321 -0.18261356  0.35598277 -0.76409084]
 [ 0.27780403  0.63988891  0.19223119  0.19816148]
 [ 0.          0.          0.          0.        ]]

In [6]:
s = env.reset()
d=False
n=0
while d==False:
    n+=1
    a = np.argmax(Q[s,:])
    s,r,d,x = env.step(a)
    #print("%s %s %s %s"%(s,r,d,x))
env.render()
print n


[2017-08-01 03:11:09,793] Starting new video recorder writing to C:\tmp\FrozenLake_01\openaigym.video.0.7812.video002000.json
  (Down)
SFFF
FHFH
FFFH
HFFG
23

In [7]:
env.close()
gym.upload('/tmp/FrozenLake_01', api_key='sk_o9OoYpSkKamkW8MrKuHw')


[2017-08-01 03:11:18,020] Finished writing results. You can upload them to the scoreboard via gym.upload('C:\\tmp\\FrozenLake_01')
[2017-08-01 03:11:18,028] [FrozenLake-v0] Uploading 2001 episodes of training data
[2017-08-01 03:11:20,684] [FrozenLake-v0] Uploading videos of 12 training episodes (1634 bytes)
[2017-08-01 03:11:21,346] [FrozenLake-v0] Creating evaluation object from /tmp/FrozenLake_01 with learning curve and training video
[2017-08-01 03:11:21,773] 
****************************************************
You successfully uploaded your evaluation on FrozenLake-v0 to
OpenAI Gym! You can find it at:

    https://gym.openai.com/evaluations/eval_USL3ofLxRruwDEoZ9QCSPg

****************************************************

In [ ]: