Cartpole Game

Deep-Q-Learning with Keras and Gym, based on Keon Kim's code.

Import dependencies


In [1]:
import random
import gym
import numpy as np
from collections import deque
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
import os # for creating directories


Using TensorFlow backend.
/Users/jon/anaconda3/lib/python3.6/importlib/_bootstrap.py:219: RuntimeWarning: compiletime version 3.5 of module 'tensorflow.python.framework.fast_tensor_util' does not match runtime version 3.6
  return f(*args, **kwds)

Set hyperparameters


In [2]:
output_dir = 'model_output/cartpole/'

n_episodes = 1001 # n games we want agent to play (default 1001)

Define class for Deep-Q-Learning agent


In [3]:
class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000) # double-ended queue; acts like list, but elements can be added/removed from either end
        self.gamma = 0.95    # decay or discount rate: enables agent to take into account future actions in addition to the immediate ones, but discounted at this rate
        self.epsilon = 1.0  # exploration rate: how much to act randomly; more initially than later due to decay
        self.epsilon_min = 0.01 # minimum amount of random exploration permitted
        self.epsilon_decay = 0.995 # decrease number of random explorations as the agent's performance improves
        self.learning_rate = 0.001 # rate at which NN adjusts models parameters via SGD to reduce cost 
        self.model = self._build_model()

    def _build_model(self):
        # Neural Net for Deep-Q learning Model
        model = Sequential()
        
        model.add(Dense(24, input_dim=self.state_size, activation='relu'))
        model.add(Dense(24, activation='relu'))
        
        model.add(Dense(self.action_size, activation='linear'))
        
        model.compile(loss='mse',
                      optimizer=Adam(lr=self.learning_rate))
        return model

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done)) # list of previous experiences, enabling re-training later

    def act(self, state):
        if np.random.rand() <= self.epsilon: # if acting randomly, take random action
            return random.randrange(self.action_size)
        act_values = self.model.predict(state) # if not acting randomly, predict reward value based on current state
        return np.argmax(act_values[0]) # pick the action that will give the highest reward (i.e., go left or right?)

    def replay(self, batch_size): # method that trains NN with experiences sampled from memory
        minibatch = random.sample(self.memory, batch_size) # sample a minibatch from memory
        for state, action, reward, next_state, done in minibatch: # extract data for each minibatch sample
            target = reward # if done (boolean whether game ended or not, i.e., whether final state or not), then target = reward
            if not done: # if not done, then predict future discounted reward
                target = (reward + self.gamma * # (target) = reward + (discount rate gamma) * 
                          np.amax(self.model.predict(next_state)[0])) # (maximum target Q based on future action a')
            target_f = self.model.predict(state) # approximately map current state to future discounted reward
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0) # single epoch of training with x=state, y=target_f; fit decreases loss btwn target_f and y_hat
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

    def load(self, name):
        self.model.load_weights(name)

    def save(self, name):
        self.model.save_weights(name)

Set other parameters (some of these should be moved to top of file)


In [4]:
env = gym.make('CartPole-v1') # initialise environment
state_size = env.observation_space.shape[0]
action_size = env.action_space.n
agent = DQNAgent(state_size, action_size) # initialise agent
# agent.load("./save/cartpole-dqn.h5") # JK FIX
done = False
batch_size = 32


[2017-12-21 09:23:01,003] Making new env: CartPole-v1

In [5]:
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

In [ ]:
for e in range(n_episodes): # iterate over new episodes of the game
    state = env.reset() # reset state at start of each new episode of the game
    state = np.reshape(state, [1, state_size])
    for time in range(5000): # time represents a frame of the game; goal is to keep pole upright as long as possible up to range, e.g., 500 or 5000 timesteps
        env.render()
        action = agent.act(state) # action is either 0 or 1 (move cart left or right); decide on one or other here
        next_state, reward, done, _ = env.step(action) # agent interacts with env, gets feedback; 4 state data points, e.g., pole angle, cart position
        reward = reward if not done else -10 # reward +1 for each additional frame with pole upright
        next_state = np.reshape(next_state, [1, state_size])
        agent.remember(state, action, reward, next_state, done) # remember the previous timestep's state, actions, reward, etc.
        state = next_state # set "current state" for upcoming iteration to the current next state
        if done: # episode ends if agent drops pole or we reach timestep 5000
            print("episode: {}/{}, score: {}, e: {:.2}" # print the episode's score
                  .format(e, n_episodes, time, agent.epsilon))
            break # exit loop
    if len(agent.memory) > batch_size:
        agent.replay(batch_size) # train the agent by replaying the experiences of the episode
    if e % 50 == 0:
        agent.save(output_dir + "weights_" + '{:04d}'.format(e) + ".hdf5")


episode: 0/1001, score: 90, e: 1.0
episode: 1/1001, score: 11, e: 0.99
episode: 2/1001, score: 15, e: 0.99
episode: 3/1001, score: 19, e: 0.99
episode: 4/1001, score: 42, e: 0.98
episode: 5/1001, score: 15, e: 0.98
episode: 6/1001, score: 14, e: 0.97
episode: 7/1001, score: 30, e: 0.97
episode: 8/1001, score: 14, e: 0.96
episode: 9/1001, score: 49, e: 0.96
episode: 10/1001, score: 18, e: 0.95
episode: 11/1001, score: 15, e: 0.95
episode: 12/1001, score: 13, e: 0.94
episode: 13/1001, score: 16, e: 0.94
episode: 14/1001, score: 22, e: 0.93
episode: 15/1001, score: 36, e: 0.93
episode: 16/1001, score: 28, e: 0.92
episode: 17/1001, score: 12, e: 0.92
episode: 18/1001, score: 11, e: 0.91
episode: 19/1001, score: 16, e: 0.91
episode: 20/1001, score: 22, e: 0.9
episode: 21/1001, score: 21, e: 0.9
episode: 22/1001, score: 23, e: 0.9
episode: 23/1001, score: 31, e: 0.89
episode: 24/1001, score: 13, e: 0.89
episode: 25/1001, score: 11, e: 0.88
episode: 26/1001, score: 17, e: 0.88
episode: 27/1001, score: 15, e: 0.87
episode: 28/1001, score: 9, e: 0.87
episode: 29/1001, score: 9, e: 0.86
episode: 30/1001, score: 9, e: 0.86
episode: 31/1001, score: 21, e: 0.86
episode: 32/1001, score: 18, e: 0.85
episode: 33/1001, score: 46, e: 0.85
episode: 34/1001, score: 13, e: 0.84
episode: 35/1001, score: 16, e: 0.84
episode: 36/1001, score: 34, e: 0.83
episode: 37/1001, score: 26, e: 0.83
episode: 38/1001, score: 17, e: 0.83
episode: 39/1001, score: 17, e: 0.82
episode: 40/1001, score: 9, e: 0.82
episode: 41/1001, score: 9, e: 0.81
episode: 42/1001, score: 14, e: 0.81
episode: 43/1001, score: 14, e: 0.81
episode: 44/1001, score: 25, e: 0.8
episode: 45/1001, score: 16, e: 0.8
episode: 46/1001, score: 10, e: 0.79
episode: 47/1001, score: 27, e: 0.79
episode: 48/1001, score: 30, e: 0.79
episode: 49/1001, score: 24, e: 0.78
episode: 50/1001, score: 47, e: 0.78
episode: 51/1001, score: 38, e: 0.77
episode: 52/1001, score: 10, e: 0.77
episode: 53/1001, score: 17, e: 0.77
episode: 54/1001, score: 19, e: 0.76
episode: 55/1001, score: 33, e: 0.76
episode: 56/1001, score: 15, e: 0.76
episode: 57/1001, score: 21, e: 0.75
episode: 58/1001, score: 16, e: 0.75
episode: 59/1001, score: 13, e: 0.74
episode: 60/1001, score: 18, e: 0.74
episode: 61/1001, score: 11, e: 0.74
episode: 62/1001, score: 38, e: 0.73
episode: 63/1001, score: 17, e: 0.73
episode: 64/1001, score: 13, e: 0.73
episode: 65/1001, score: 24, e: 0.72
episode: 66/1001, score: 34, e: 0.72
episode: 67/1001, score: 26, e: 0.71
episode: 68/1001, score: 18, e: 0.71
episode: 69/1001, score: 24, e: 0.71
episode: 70/1001, score: 46, e: 0.7
episode: 71/1001, score: 19, e: 0.7
episode: 72/1001, score: 28, e: 0.7
episode: 73/1001, score: 25, e: 0.69
episode: 74/1001, score: 36, e: 0.69
episode: 75/1001, score: 29, e: 0.69
episode: 76/1001, score: 28, e: 0.68
episode: 77/1001, score: 27, e: 0.68
episode: 78/1001, score: 13, e: 0.68
episode: 79/1001, score: 17, e: 0.67
episode: 80/1001, score: 12, e: 0.67
episode: 81/1001, score: 17, e: 0.67
episode: 82/1001, score: 11, e: 0.66
episode: 83/1001, score: 11, e: 0.66
episode: 84/1001, score: 27, e: 0.66
episode: 85/1001, score: 21, e: 0.65
episode: 86/1001, score: 18, e: 0.65
episode: 87/1001, score: 14, e: 0.65
episode: 88/1001, score: 17, e: 0.64
episode: 89/1001, score: 9, e: 0.64
episode: 90/1001, score: 28, e: 0.64
episode: 91/1001, score: 21, e: 0.63
episode: 92/1001, score: 13, e: 0.63
episode: 93/1001, score: 36, e: 0.63
episode: 94/1001, score: 60, e: 0.62
episode: 95/1001, score: 33, e: 0.62
episode: 96/1001, score: 16, e: 0.62
episode: 97/1001, score: 24, e: 0.61
episode: 98/1001, score: 23, e: 0.61
episode: 99/1001, score: 31, e: 0.61
episode: 100/1001, score: 28, e: 0.61
episode: 101/1001, score: 19, e: 0.6
episode: 102/1001, score: 16, e: 0.6
episode: 103/1001, score: 28, e: 0.6
episode: 104/1001, score: 27, e: 0.59
episode: 105/1001, score: 16, e: 0.59
episode: 106/1001, score: 28, e: 0.59
episode: 107/1001, score: 12, e: 0.58
episode: 108/1001, score: 15, e: 0.58
episode: 109/1001, score: 31, e: 0.58
episode: 110/1001, score: 19, e: 0.58
episode: 111/1001, score: 17, e: 0.57
episode: 112/1001, score: 9, e: 0.57
episode: 113/1001, score: 14, e: 0.57
episode: 114/1001, score: 19, e: 0.56
episode: 115/1001, score: 20, e: 0.56
episode: 116/1001, score: 20, e: 0.56
episode: 117/1001, score: 30, e: 0.56
episode: 118/1001, score: 12, e: 0.55
episode: 119/1001, score: 20, e: 0.55
episode: 120/1001, score: 30, e: 0.55
episode: 121/1001, score: 14, e: 0.55
episode: 122/1001, score: 44, e: 0.54
episode: 123/1001, score: 34, e: 0.54
episode: 124/1001, score: 45, e: 0.54
episode: 125/1001, score: 21, e: 0.53
episode: 126/1001, score: 33, e: 0.53
episode: 127/1001, score: 22, e: 0.53
episode: 128/1001, score: 49, e: 0.53
episode: 129/1001, score: 20, e: 0.52
episode: 130/1001, score: 25, e: 0.52
episode: 131/1001, score: 13, e: 0.52
episode: 132/1001, score: 35, e: 0.52
episode: 133/1001, score: 20, e: 0.51
episode: 134/1001, score: 29, e: 0.51
episode: 135/1001, score: 28, e: 0.51
episode: 136/1001, score: 25, e: 0.51
episode: 137/1001, score: 36, e: 0.5
episode: 138/1001, score: 24, e: 0.5
episode: 139/1001, score: 23, e: 0.5
episode: 140/1001, score: 43, e: 0.5
episode: 141/1001, score: 27, e: 0.49
episode: 142/1001, score: 20, e: 0.49
episode: 143/1001, score: 23, e: 0.49
episode: 144/1001, score: 39, e: 0.49
episode: 145/1001, score: 92, e: 0.48
episode: 146/1001, score: 110, e: 0.48
episode: 147/1001, score: 77, e: 0.48
episode: 148/1001, score: 82, e: 0.48
episode: 149/1001, score: 39, e: 0.47
episode: 150/1001, score: 9, e: 0.47
episode: 151/1001, score: 14, e: 0.47
episode: 152/1001, score: 10, e: 0.47
episode: 153/1001, score: 10, e: 0.46
episode: 154/1001, score: 29, e: 0.46
episode: 155/1001, score: 14, e: 0.46
episode: 156/1001, score: 14, e: 0.46
episode: 157/1001, score: 21, e: 0.46
episode: 158/1001, score: 16, e: 0.45
episode: 159/1001, score: 51, e: 0.45
episode: 160/1001, score: 97, e: 0.45
episode: 161/1001, score: 43, e: 0.45
episode: 162/1001, score: 57, e: 0.44
episode: 163/1001, score: 52, e: 0.44
episode: 164/1001, score: 88, e: 0.44
episode: 165/1001, score: 40, e: 0.44
episode: 166/1001, score: 21, e: 0.44
episode: 167/1001, score: 18, e: 0.43
episode: 168/1001, score: 20, e: 0.43
episode: 169/1001, score: 22, e: 0.43
episode: 170/1001, score: 39, e: 0.43
episode: 171/1001, score: 26, e: 0.42
episode: 172/1001, score: 68, e: 0.42
episode: 173/1001, score: 63, e: 0.42
episode: 174/1001, score: 67, e: 0.42
episode: 175/1001, score: 17, e: 0.42
episode: 176/1001, score: 35, e: 0.41
episode: 177/1001, score: 64, e: 0.41
episode: 178/1001, score: 32, e: 0.41
episode: 179/1001, score: 53, e: 0.41
episode: 180/1001, score: 41, e: 0.41
episode: 181/1001, score: 45, e: 0.4
episode: 182/1001, score: 18, e: 0.4
episode: 183/1001, score: 19, e: 0.4
episode: 184/1001, score: 12, e: 0.4
episode: 185/1001, score: 63, e: 0.4
episode: 186/1001, score: 47, e: 0.39
episode: 187/1001, score: 33, e: 0.39
episode: 188/1001, score: 33, e: 0.39
episode: 189/1001, score: 47, e: 0.39
episode: 190/1001, score: 40, e: 0.39
episode: 191/1001, score: 89, e: 0.38
episode: 192/1001, score: 49, e: 0.38
episode: 193/1001, score: 22, e: 0.38
episode: 194/1001, score: 29, e: 0.38
episode: 195/1001, score: 35, e: 0.38
episode: 196/1001, score: 94, e: 0.37
episode: 197/1001, score: 51, e: 0.37
episode: 198/1001, score: 109, e: 0.37
episode: 199/1001, score: 74, e: 0.37
episode: 200/1001, score: 66, e: 0.37
episode: 201/1001, score: 33, e: 0.37
episode: 202/1001, score: 93, e: 0.36
episode: 203/1001, score: 58, e: 0.36
episode: 204/1001, score: 56, e: 0.36
episode: 205/1001, score: 63, e: 0.36
episode: 206/1001, score: 42, e: 0.36
episode: 207/1001, score: 34, e: 0.35
episode: 208/1001, score: 36, e: 0.35
episode: 209/1001, score: 24, e: 0.35
episode: 210/1001, score: 26, e: 0.35
episode: 211/1001, score: 20, e: 0.35
episode: 212/1001, score: 55, e: 0.35
episode: 213/1001, score: 17, e: 0.34
episode: 214/1001, score: 18, e: 0.34
episode: 215/1001, score: 31, e: 0.34
episode: 216/1001, score: 67, e: 0.34
episode: 217/1001, score: 70, e: 0.34
episode: 218/1001, score: 32, e: 0.34
episode: 219/1001, score: 32, e: 0.33
episode: 220/1001, score: 54, e: 0.33
episode: 221/1001, score: 51, e: 0.33
episode: 222/1001, score: 55, e: 0.33
episode: 223/1001, score: 35, e: 0.33
episode: 224/1001, score: 75, e: 0.33
episode: 225/1001, score: 34, e: 0.32
episode: 226/1001, score: 28, e: 0.32
episode: 227/1001, score: 32, e: 0.32
episode: 228/1001, score: 34, e: 0.32
episode: 229/1001, score: 51, e: 0.32
episode: 230/1001, score: 64, e: 0.32
episode: 231/1001, score: 35, e: 0.31
episode: 232/1001, score: 42, e: 0.31
episode: 233/1001, score: 34, e: 0.31
episode: 234/1001, score: 30, e: 0.31
episode: 235/1001, score: 47, e: 0.31
episode: 236/1001, score: 54, e: 0.31
episode: 237/1001, score: 46, e: 0.3
episode: 238/1001, score: 54, e: 0.3
episode: 239/1001, score: 60, e: 0.3
episode: 240/1001, score: 83, e: 0.3
episode: 241/1001, score: 38, e: 0.3
episode: 242/1001, score: 59, e: 0.3
episode: 243/1001, score: 54, e: 0.3
episode: 244/1001, score: 34, e: 0.29
episode: 245/1001, score: 52, e: 0.29
episode: 246/1001, score: 89, e: 0.29
episode: 247/1001, score: 93, e: 0.29
episode: 248/1001, score: 69, e: 0.29
episode: 249/1001, score: 43, e: 0.29
episode: 250/1001, score: 140, e: 0.29
episode: 251/1001, score: 83, e: 0.28
episode: 252/1001, score: 56, e: 0.28
episode: 253/1001, score: 80, e: 0.28
episode: 254/1001, score: 113, e: 0.28
episode: 255/1001, score: 100, e: 0.28
episode: 256/1001, score: 84, e: 0.28
episode: 257/1001, score: 95, e: 0.28
episode: 258/1001, score: 60, e: 0.27
episode: 259/1001, score: 101, e: 0.27
episode: 260/1001, score: 39, e: 0.27
episode: 261/1001, score: 41, e: 0.27
episode: 262/1001, score: 47, e: 0.27
episode: 263/1001, score: 50, e: 0.27
episode: 264/1001, score: 50, e: 0.27
episode: 265/1001, score: 41, e: 0.26
episode: 266/1001, score: 30, e: 0.26
episode: 267/1001, score: 27, e: 0.26
episode: 268/1001, score: 61, e: 0.26
episode: 269/1001, score: 63, e: 0.26
episode: 270/1001, score: 64, e: 0.26
episode: 271/1001, score: 57, e: 0.26
episode: 272/1001, score: 77, e: 0.26
episode: 273/1001, score: 87, e: 0.25
episode: 274/1001, score: 63, e: 0.25
episode: 275/1001, score: 48, e: 0.25
episode: 276/1001, score: 65, e: 0.25
episode: 277/1001, score: 36, e: 0.25
episode: 278/1001, score: 44, e: 0.25
episode: 279/1001, score: 52, e: 0.25
episode: 280/1001, score: 55, e: 0.25
episode: 281/1001, score: 135, e: 0.24
episode: 282/1001, score: 171, e: 0.24
episode: 283/1001, score: 72, e: 0.24
episode: 284/1001, score: 70, e: 0.24
episode: 285/1001, score: 71, e: 0.24
episode: 286/1001, score: 140, e: 0.24
episode: 287/1001, score: 205, e: 0.24
episode: 288/1001, score: 130, e: 0.24
episode: 289/1001, score: 79, e: 0.23
episode: 290/1001, score: 65, e: 0.23
episode: 291/1001, score: 74, e: 0.23
episode: 292/1001, score: 73, e: 0.23
episode: 293/1001, score: 97, e: 0.23
episode: 294/1001, score: 51, e: 0.23
episode: 295/1001, score: 67, e: 0.23
episode: 296/1001, score: 77, e: 0.23
episode: 297/1001, score: 46, e: 0.23
episode: 298/1001, score: 107, e: 0.22
episode: 299/1001, score: 111, e: 0.22
episode: 300/1001, score: 65, e: 0.22
episode: 301/1001, score: 55, e: 0.22
episode: 302/1001, score: 68, e: 0.22
episode: 303/1001, score: 68, e: 0.22
episode: 304/1001, score: 76, e: 0.22
episode: 305/1001, score: 58, e: 0.22
episode: 306/1001, score: 47, e: 0.22
episode: 307/1001, score: 74, e: 0.21
episode: 308/1001, score: 117, e: 0.21
episode: 309/1001, score: 54, e: 0.21
episode: 310/1001, score: 75, e: 0.21
episode: 311/1001, score: 121, e: 0.21
episode: 312/1001, score: 76, e: 0.21
episode: 313/1001, score: 76, e: 0.21
episode: 314/1001, score: 74, e: 0.21
episode: 315/1001, score: 242, e: 0.21
episode: 316/1001, score: 196, e: 0.21
episode: 317/1001, score: 72, e: 0.2
episode: 318/1001, score: 153, e: 0.2
episode: 319/1001, score: 49, e: 0.2
episode: 320/1001, score: 81, e: 0.2
episode: 321/1001, score: 435, e: 0.2
episode: 322/1001, score: 319, e: 0.2
episode: 323/1001, score: 106, e: 0.2
episode: 324/1001, score: 97, e: 0.2
episode: 325/1001, score: 17, e: 0.2
episode: 326/1001, score: 45, e: 0.2
episode: 327/1001, score: 154, e: 0.19
episode: 328/1001, score: 81, e: 0.19
episode: 329/1001, score: 140, e: 0.19
episode: 330/1001, score: 49, e: 0.19
episode: 331/1001, score: 57, e: 0.19
episode: 332/1001, score: 28, e: 0.19
episode: 333/1001, score: 31, e: 0.19
episode: 334/1001, score: 38, e: 0.19
episode: 335/1001, score: 84, e: 0.19
episode: 336/1001, score: 69, e: 0.19
episode: 337/1001, score: 66, e: 0.18
episode: 338/1001, score: 147, e: 0.18
episode: 339/1001, score: 82, e: 0.18
episode: 340/1001, score: 141, e: 0.18
episode: 341/1001, score: 60, e: 0.18
episode: 342/1001, score: 68, e: 0.18
episode: 343/1001, score: 115, e: 0.18
episode: 344/1001, score: 262, e: 0.18
episode: 345/1001, score: 97, e: 0.18
episode: 346/1001, score: 105, e: 0.18
episode: 347/1001, score: 153, e: 0.18
episode: 348/1001, score: 89, e: 0.17
episode: 349/1001, score: 85, e: 0.17
episode: 350/1001, score: 241, e: 0.17
episode: 351/1001, score: 97, e: 0.17
episode: 352/1001, score: 111, e: 0.17
episode: 353/1001, score: 106, e: 0.17
episode: 354/1001, score: 116, e: 0.17
episode: 355/1001, score: 199, e: 0.17
episode: 356/1001, score: 64, e: 0.17
episode: 357/1001, score: 97, e: 0.17
episode: 358/1001, score: 106, e: 0.17
episode: 359/1001, score: 110, e: 0.17
episode: 360/1001, score: 123, e: 0.16
episode: 361/1001, score: 134, e: 0.16
episode: 362/1001, score: 28, e: 0.16
episode: 363/1001, score: 22, e: 0.16
episode: 364/1001, score: 26, e: 0.16
episode: 365/1001, score: 57, e: 0.16
episode: 366/1001, score: 335, e: 0.16
episode: 367/1001, score: 111, e: 0.16
episode: 368/1001, score: 107, e: 0.16
episode: 369/1001, score: 57, e: 0.16
episode: 370/1001, score: 141, e: 0.16
episode: 371/1001, score: 54, e: 0.16
episode: 372/1001, score: 93, e: 0.15
episode: 373/1001, score: 70, e: 0.15
episode: 374/1001, score: 54, e: 0.15
episode: 375/1001, score: 106, e: 0.15
episode: 376/1001, score: 64, e: 0.15
episode: 377/1001, score: 69, e: 0.15
episode: 378/1001, score: 69, e: 0.15
episode: 379/1001, score: 68, e: 0.15
episode: 380/1001, score: 121, e: 0.15
episode: 381/1001, score: 138, e: 0.15
episode: 382/1001, score: 72, e: 0.15
episode: 383/1001, score: 64, e: 0.15
episode: 384/1001, score: 44, e: 0.15
episode: 385/1001, score: 35, e: 0.15
episode: 386/1001, score: 41, e: 0.14
episode: 387/1001, score: 62, e: 0.14
episode: 388/1001, score: 65, e: 0.14
episode: 389/1001, score: 73, e: 0.14
episode: 390/1001, score: 67, e: 0.14
episode: 391/1001, score: 42, e: 0.14
episode: 392/1001, score: 59, e: 0.14
episode: 393/1001, score: 66, e: 0.14
episode: 394/1001, score: 48, e: 0.14
episode: 395/1001, score: 64, e: 0.14
episode: 396/1001, score: 74, e: 0.14
episode: 397/1001, score: 66, e: 0.14
episode: 398/1001, score: 65, e: 0.14
episode: 399/1001, score: 44, e: 0.14
episode: 400/1001, score: 88, e: 0.13
episode: 401/1001, score: 66, e: 0.13
episode: 402/1001, score: 140, e: 0.13
episode: 403/1001, score: 216, e: 0.13
episode: 404/1001, score: 95, e: 0.13
episode: 405/1001, score: 98, e: 0.13
episode: 406/1001, score: 73, e: 0.13
episode: 407/1001, score: 176, e: 0.13
episode: 408/1001, score: 142, e: 0.13
episode: 409/1001, score: 186, e: 0.13
episode: 410/1001, score: 499, e: 0.13
episode: 411/1001, score: 386, e: 0.13
episode: 412/1001, score: 141, e: 0.13
episode: 413/1001, score: 135, e: 0.13
episode: 414/1001, score: 153, e: 0.13
episode: 415/1001, score: 136, e: 0.12
episode: 416/1001, score: 278, e: 0.12
episode: 417/1001, score: 136, e: 0.12
episode: 418/1001, score: 165, e: 0.12
episode: 419/1001, score: 142, e: 0.12
episode: 420/1001, score: 90, e: 0.12
episode: 421/1001, score: 158, e: 0.12
episode: 422/1001, score: 148, e: 0.12
episode: 423/1001, score: 153, e: 0.12
episode: 424/1001, score: 187, e: 0.12
episode: 425/1001, score: 149, e: 0.12
episode: 426/1001, score: 393, e: 0.12
episode: 427/1001, score: 322, e: 0.12
episode: 428/1001, score: 342, e: 0.12
episode: 429/1001, score: 198, e: 0.12
episode: 430/1001, score: 255, e: 0.12
episode: 431/1001, score: 425, e: 0.12
episode: 432/1001, score: 226, e: 0.11
episode: 433/1001, score: 224, e: 0.11
episode: 434/1001, score: 461, e: 0.11
episode: 435/1001, score: 310, e: 0.11
episode: 436/1001, score: 299, e: 0.11
episode: 437/1001, score: 171, e: 0.11
episode: 438/1001, score: 212, e: 0.11
episode: 439/1001, score: 229, e: 0.11
episode: 440/1001, score: 395, e: 0.11
episode: 441/1001, score: 152, e: 0.11
episode: 442/1001, score: 288, e: 0.11
episode: 443/1001, score: 305, e: 0.11
episode: 444/1001, score: 213, e: 0.11
episode: 445/1001, score: 172, e: 0.11
episode: 446/1001, score: 154, e: 0.11
episode: 447/1001, score: 297, e: 0.11
episode: 448/1001, score: 274, e: 0.11
episode: 449/1001, score: 499, e: 0.11
episode: 450/1001, score: 483, e: 0.1
episode: 451/1001, score: 118, e: 0.1
episode: 452/1001, score: 459, e: 0.1
episode: 453/1001, score: 319, e: 0.1
episode: 454/1001, score: 499, e: 0.1
episode: 455/1001, score: 329, e: 0.1
episode: 456/1001, score: 218, e: 0.1
episode: 457/1001, score: 499, e: 0.1
episode: 458/1001, score: 216, e: 0.1
episode: 459/1001, score: 435, e: 0.1
episode: 460/1001, score: 499, e: 0.1
episode: 461/1001, score: 160, e: 0.099
episode: 462/1001, score: 303, e: 0.099
episode: 463/1001, score: 439, e: 0.098
episode: 464/1001, score: 26, e: 0.098
episode: 465/1001, score: 39, e: 0.097
episode: 466/1001, score: 69, e: 0.097
episode: 467/1001, score: 86, e: 0.096
episode: 468/1001, score: 112, e: 0.096
episode: 469/1001, score: 221, e: 0.095
episode: 470/1001, score: 100, e: 0.095
episode: 471/1001, score: 104, e: 0.094
episode: 472/1001, score: 95, e: 0.094
episode: 473/1001, score: 37, e: 0.093
episode: 474/1001, score: 61, e: 0.093
episode: 475/1001, score: 319, e: 0.092
episode: 476/1001, score: 178, e: 0.092
episode: 477/1001, score: 356, e: 0.092
episode: 478/1001, score: 282, e: 0.091
episode: 479/1001, score: 138, e: 0.091
episode: 480/1001, score: 162, e: 0.09
episode: 481/1001, score: 41, e: 0.09
episode: 482/1001, score: 110, e: 0.089
episode: 483/1001, score: 257, e: 0.089
episode: 484/1001, score: 63, e: 0.088
episode: 485/1001, score: 231, e: 0.088
episode: 486/1001, score: 139, e: 0.088
episode: 487/1001, score: 72, e: 0.087
episode: 488/1001, score: 97, e: 0.087
episode: 489/1001, score: 499, e: 0.086
episode: 490/1001, score: 137, e: 0.086
episode: 491/1001, score: 499, e: 0.085
episode: 492/1001, score: 108, e: 0.085
episode: 493/1001, score: 114, e: 0.084
episode: 494/1001, score: 97, e: 0.084
episode: 495/1001, score: 148, e: 0.084
episode: 496/1001, score: 470, e: 0.083
episode: 497/1001, score: 57, e: 0.083
episode: 498/1001, score: 76, e: 0.082
episode: 499/1001, score: 117, e: 0.082
episode: 500/1001, score: 251, e: 0.082
episode: 501/1001, score: 265, e: 0.081
episode: 502/1001, score: 167, e: 0.081
episode: 503/1001, score: 106, e: 0.08
episode: 504/1001, score: 132, e: 0.08
episode: 505/1001, score: 312, e: 0.08
episode: 506/1001, score: 45, e: 0.079
episode: 507/1001, score: 102, e: 0.079
episode: 508/1001, score: 250, e: 0.078
episode: 509/1001, score: 312, e: 0.078
episode: 510/1001, score: 265, e: 0.078
episode: 511/1001, score: 248, e: 0.077
episode: 512/1001, score: 237, e: 0.077
episode: 513/1001, score: 291, e: 0.076
episode: 514/1001, score: 148, e: 0.076
episode: 515/1001, score: 91, e: 0.076
episode: 516/1001, score: 133, e: 0.075
episode: 517/1001, score: 499, e: 0.075
episode: 518/1001, score: 251, e: 0.075
episode: 519/1001, score: 95, e: 0.074
episode: 520/1001, score: 162, e: 0.074
episode: 521/1001, score: 78, e: 0.073
episode: 522/1001, score: 126, e: 0.073
episode: 523/1001, score: 79, e: 0.073
episode: 524/1001, score: 123, e: 0.072
episode: 525/1001, score: 151, e: 0.072
episode: 526/1001, score: 109, e: 0.072
episode: 527/1001, score: 136, e: 0.071
episode: 528/1001, score: 138, e: 0.071
episode: 529/1001, score: 290, e: 0.071
episode: 530/1001, score: 57, e: 0.07
episode: 531/1001, score: 239, e: 0.07
episode: 532/1001, score: 373, e: 0.069
episode: 533/1001, score: 96, e: 0.069
episode: 534/1001, score: 58, e: 0.069
episode: 535/1001, score: 57, e: 0.068
episode: 536/1001, score: 180, e: 0.068
episode: 537/1001, score: 203, e: 0.068
episode: 538/1001, score: 184, e: 0.067
episode: 539/1001, score: 48, e: 0.067
episode: 540/1001, score: 61, e: 0.067
episode: 541/1001, score: 40, e: 0.066
episode: 542/1001, score: 82, e: 0.066
episode: 543/1001, score: 56, e: 0.066
episode: 544/1001, score: 53, e: 0.065
episode: 545/1001, score: 93, e: 0.065
episode: 546/1001, score: 37, e: 0.065
episode: 547/1001, score: 75, e: 0.064
episode: 548/1001, score: 75, e: 0.064
episode: 549/1001, score: 16, e: 0.064
episode: 550/1001, score: 416, e: 0.063
episode: 551/1001, score: 89, e: 0.063
episode: 552/1001, score: 54, e: 0.063
episode: 553/1001, score: 127, e: 0.063
episode: 554/1001, score: 21, e: 0.062
episode: 555/1001, score: 59, e: 0.062
episode: 556/1001, score: 499, e: 0.062
episode: 557/1001, score: 57, e: 0.061
episode: 558/1001, score: 67, e: 0.061
episode: 559/1001, score: 116, e: 0.061
episode: 560/1001, score: 499, e: 0.06
episode: 561/1001, score: 102, e: 0.06
episode: 562/1001, score: 348, e: 0.06
episode: 563/1001, score: 449, e: 0.059
episode: 564/1001, score: 85, e: 0.059
episode: 565/1001, score: 82, e: 0.059
episode: 566/1001, score: 109, e: 0.059
episode: 567/1001, score: 357, e: 0.058
episode: 568/1001, score: 414, e: 0.058
episode: 569/1001, score: 163, e: 0.058
episode: 570/1001, score: 214, e: 0.057
episode: 571/1001, score: 499, e: 0.057
episode: 572/1001, score: 64, e: 0.057
episode: 573/1001, score: 396, e: 0.057
episode: 574/1001, score: 131, e: 0.056
episode: 575/1001, score: 396, e: 0.056
episode: 576/1001, score: 168, e: 0.056
episode: 577/1001, score: 499, e: 0.055
episode: 578/1001, score: 499, e: 0.055
episode: 579/1001, score: 148, e: 0.055
episode: 580/1001, score: 91, e: 0.055
episode: 581/1001, score: 167, e: 0.054
episode: 582/1001, score: 63, e: 0.054
episode: 583/1001, score: 499, e: 0.054
episode: 584/1001, score: 158, e: 0.054
episode: 585/1001, score: 394, e: 0.053
episode: 586/1001, score: 58, e: 0.053
episode: 587/1001, score: 499, e: 0.053
episode: 588/1001, score: 75, e: 0.052
episode: 589/1001, score: 206, e: 0.052
episode: 590/1001, score: 159, e: 0.052
episode: 591/1001, score: 499, e: 0.052

In [ ]:
# env.render(close=True)

In [ ]: