In [1]:
import numpy as np
import tensorflow as tf
import random
from collections import deque
import dqn
from gym.envs.registration import register
import gym
from gym import wrappers

In [2]:
env = gym.make('CartPole-v0')
env._max_episode_steps = 5000
input_size = env.observation_space.shape[0]
output_size = env.action_space.n

dis = 0.9
REPLAY_MEMORY = 50000


[2017-08-02 17:16:28,968] Making new env: CartPole-v0

In [ ]:
def replay_train(mainDQN, targetDQN, train_batch):
    x_stack = np.empty(0).reshape(0, input_size)
    y_stack = np.empty(0).reshape(0, output_size)
    for state, action, reward, next_state, done in train_batch:
        Q = mainDQN.predict(state)

        if done:
            Q[0, action] = reward
        else:
            Q[0, action] = reward + dis * np.max(targetDQN.predict(next_state))

        y_stack = np.vstack([y_stack, Q])
        x_stack = np.vstack([x_stack, state])

    return mainDQN.update(x_stack, y_stack)

def get_copy_var_ops(*, dest_scope_name="target", src_scope_name="main"):

    op_holder = []

    src_vars = tf.get_collection(
        tf.GraphKeys.TRAINABLE_VARIABLES, scope=src_scope_name)
    dest_vars = tf.get_collection(
        tf.GraphKeys.TRAINABLE_VARIABLES, scope=dest_scope_name)

    for src_var, dest_var in zip(src_vars, dest_vars):
        op_holder.append(dest_var.assign(src_var.value()))

    return op_holder


def bot_play(mainDQN, env=env):
    state = env.reset()
    reward_sum = 0
    while True:
        env.render()
        action = np.argmax(mainDQN.predict(state))
        state, reward, done, _ = env.step(action)
        reward_sum += reward
        if done:
            print("Total score: {}".format(reward_sum))
            break

In [ ]:
def main():
    max_episodes = 5000
    replay_buffer = deque()

    last_100_game_reward = deque()

    with tf.Session() as sess:
        mainDQN = dqn.DQN(sess, input_size, output_size, name="main")
        targetDQN = dqn.DQN(sess, input_size, output_size, name="target")
        tf.global_variables_initializer().run()

        copy_ops = get_copy_var_ops(dest_scope_name="target",
                                    src_scope_name="main")
        sess.run(copy_ops)

        for episode in range(max_episodes):
            e = 1. / ((episode / 10) + 1)
            done = False
            step_count = 0
            state = env.reset()

            while not done:
                if np.random.rand(1) < e:
                    action = env.action_space.sample()
                else:
                    action = np.argmax(mainDQN.predict(state))
                next_state, reward, done, _ = env.step(action)
                if done:  
                    reward = -100

                replay_buffer.append((state, action, reward, next_state, done))
                if len(replay_buffer) > REPLAY_MEMORY:
                    replay_buffer.popleft()

                state = next_state
                step_count += 1

            print("Episode: {}  steps: {}".format(episode, step_count))

            if episode % 10 == 1: 
                for _ in range(50):
                    minibatch = random.sample(replay_buffer, 10)
                    loss, _ = replay_train(mainDQN, targetDQN, minibatch)

                print("Loss: ", loss)
                sess.run(copy_ops)

            last_100_game_reward.append(step_count)

            if len(last_100_game_reward) > 100:
                last_100_game_reward.popleft()

                avg_reward = np.mean(last_100_game_reward)

                if avg_reward > 4950:
                    print("Game Cleared in ",episode, "episodes with avg reward ",avg_reward)
                    break
                    
        env2 = wrappers.Monitor(env, 'gym-results', force=True)

        for i in range(200):
            bot_play(mainDQN, env=env2)

        env2.close()
        gym.upload("gym-results", api_key="sk_VT2wPcSSOylnlPORltmQ")


if __name__ == "__main__":
    main()


Episode: 0  steps: 13
Episode: 1  steps: 24
Loss:  1.78031
Episode: 2  steps: 12
Episode: 3  steps: 11
Episode: 4  steps: 14
Episode: 5  steps: 12
Episode: 6  steps: 13
Episode: 7  steps: 12
Episode: 8  steps: 10
Episode: 9  steps: 11
Episode: 10  steps: 10
Episode: 11  steps: 9
Loss:  5.40925
Episode: 12  steps: 9
Episode: 13  steps: 10
Episode: 14  steps: 10
Episode: 15  steps: 16
Episode: 16  steps: 11
Episode: 17  steps: 10
Episode: 18  steps: 14
Episode: 19  steps: 12
Episode: 20  steps: 10
Episode: 21  steps: 8
Loss:  1312.22
Episode: 22  steps: 10
Episode: 23  steps: 12
Episode: 24  steps: 8
Episode: 25  steps: 9
Episode: 26  steps: 9
Episode: 27  steps: 8
Episode: 28  steps: 9
Episode: 29  steps: 11
Episode: 30  steps: 8
Episode: 31  steps: 10
Loss:  365.26
Episode: 32  steps: 11
Episode: 33  steps: 10
Episode: 34  steps: 11
Episode: 35  steps: 10
Episode: 36  steps: 10
Episode: 37  steps: 10
Episode: 38  steps: 9
Episode: 39  steps: 15
Episode: 40  steps: 9
Episode: 41  steps: 10
Loss:  298.223
Episode: 42  steps: 10
Episode: 43  steps: 10
Episode: 44  steps: 11
Episode: 45  steps: 12
Episode: 46  steps: 10
Episode: 47  steps: 9
Episode: 48  steps: 10
Episode: 49  steps: 12
Episode: 50  steps: 10
Episode: 51  steps: 11
Loss:  291.975
Episode: 52  steps: 54
Episode: 53  steps: 57
Episode: 54  steps: 45
Episode: 55  steps: 32
Episode: 56  steps: 58
Episode: 57  steps: 140
Episode: 58  steps: 32
Episode: 59  steps: 43
Episode: 60  steps: 34
Episode: 61  steps: 63
Loss:  5.34222
Episode: 62  steps: 77
Episode: 63  steps: 50
Episode: 64  steps: 48
Episode: 65  steps: 37
Episode: 66  steps: 38
Episode: 67  steps: 51
Episode: 68  steps: 44
Episode: 69  steps: 71
Episode: 70  steps: 34
Episode: 71  steps: 47
Loss:  10.212
Episode: 72  steps: 42
Episode: 73  steps: 123
Episode: 74  steps: 60
Episode: 75  steps: 96
Episode: 76  steps: 163
Episode: 77  steps: 160
Episode: 78  steps: 67
Episode: 79  steps: 106
Episode: 80  steps: 143
Episode: 81  steps: 47
Loss:  417.753
Episode: 82  steps: 136
Episode: 83  steps: 33
Episode: 84  steps: 51
Episode: 85  steps: 147
Episode: 86  steps: 117
Episode: 87  steps: 63
Episode: 88  steps: 48
Episode: 89  steps: 89
Episode: 90  steps: 57
Episode: 91  steps: 103
Loss:  5.03202
Episode: 92  steps: 83
Episode: 93  steps: 169
Episode: 94  steps: 70
Episode: 95  steps: 153
Episode: 96  steps: 191
Episode: 97  steps: 131
Episode: 98  steps: 86
Episode: 99  steps: 122
Episode: 100  steps: 82
Episode: 101  steps: 739
Loss:  4.35775
Episode: 102  steps: 47
Episode: 103  steps: 150
Episode: 104  steps: 77
Episode: 105  steps: 58
Episode: 106  steps: 77
Episode: 107  steps: 131
Episode: 108  steps: 125
Episode: 109  steps: 99
Episode: 110  steps: 45
Episode: 111  steps: 141
Loss:  182.489
Episode: 112  steps: 60
Episode: 113  steps: 58
Episode: 114  steps: 215
Episode: 115  steps: 39
Episode: 116  steps: 126
Episode: 117  steps: 241
Episode: 118  steps: 110
Episode: 119  steps: 39
Episode: 120  steps: 174
Episode: 121  steps: 50
Loss:  0.787672
Episode: 122  steps: 285
Episode: 123  steps: 209
Episode: 124  steps: 235
Episode: 125  steps: 164
Episode: 126  steps: 77
Episode: 127  steps: 83
Episode: 128  steps: 57
Episode: 129  steps: 96
Episode: 130  steps: 83
Episode: 131  steps: 121
Loss:  0.488281
Episode: 132  steps: 111
Episode: 133  steps: 204
Episode: 134  steps: 65
Episode: 135  steps: 69
Episode: 136  steps: 175
Episode: 137  steps: 111
Episode: 138  steps: 112
Episode: 139  steps: 325
Episode: 140  steps: 77
Episode: 141  steps: 254
Loss:  0.708153
Episode: 142  steps: 199
Episode: 143  steps: 72
Episode: 144  steps: 74
Episode: 145  steps: 167
Episode: 146  steps: 237
Episode: 147  steps: 197
Episode: 148  steps: 219
Episode: 149  steps: 100
Episode: 150  steps: 143
Episode: 151  steps: 102
Loss:  1.33653
Episode: 152  steps: 125
Episode: 153  steps: 62
Episode: 154  steps: 48
Episode: 155  steps: 245
Episode: 156  steps: 230
Episode: 157  steps: 57
Episode: 158  steps: 213
Episode: 159  steps: 71
Episode: 160  steps: 131
Episode: 161  steps: 58
Loss:  0.658904
Episode: 162  steps: 64
Episode: 163  steps: 64
Episode: 164  steps: 53
Episode: 165  steps: 97
Episode: 166  steps: 65
Episode: 167  steps: 115
Episode: 168  steps: 380
Episode: 169  steps: 61
Episode: 170  steps: 135
Episode: 171  steps: 252
Loss:  116.689
Episode: 172  steps: 126
Episode: 173  steps: 99
Episode: 174  steps: 116
Episode: 175  steps: 168
Episode: 176  steps: 125
Episode: 177  steps: 50
Episode: 178  steps: 69
Episode: 179  steps: 419
Episode: 180  steps: 67
Episode: 181  steps: 78
Loss:  0.695187
Episode: 182  steps: 66
Episode: 183  steps: 191
Episode: 184  steps: 69
Episode: 185  steps: 63
Episode: 186  steps: 60
Episode: 187  steps: 52
Episode: 188  steps: 97
Episode: 189  steps: 162
Episode: 190  steps: 125
Episode: 191  steps: 167
Loss:  2.36047
Episode: 192  steps: 130
Episode: 193  steps: 82
Episode: 194  steps: 66
Episode: 195  steps: 124
Episode: 196  steps: 69
Episode: 197  steps: 91
Episode: 198  steps: 112
Episode: 199  steps: 211
Episode: 200  steps: 81
Episode: 201  steps: 45
Loss:  1.83495
Episode: 202  steps: 55
Episode: 203  steps: 40
Episode: 204  steps: 171
Episode: 205  steps: 211
Episode: 206  steps: 134
Episode: 207  steps: 154
Episode: 208  steps: 70
Episode: 209  steps: 83
Episode: 210  steps: 52
Episode: 211  steps: 63
Loss:  1.34356
Episode: 212  steps: 107
Episode: 213  steps: 75
Episode: 214  steps: 49
Episode: 215  steps: 67
Episode: 216  steps: 49
Episode: 217  steps: 59
Episode: 218  steps: 83
Episode: 219  steps: 63
Episode: 220  steps: 125
Episode: 221  steps: 67
Loss:  1.16707
Episode: 222  steps: 71
Episode: 223  steps: 66
Episode: 224  steps: 128
Episode: 225  steps: 62
Episode: 226  steps: 198
Episode: 227  steps: 71
Episode: 228  steps: 94
Episode: 229  steps: 173
Episode: 230  steps: 289
Episode: 231  steps: 56
Loss:  1.06043
Episode: 232  steps: 176
Episode: 233  steps: 199
Episode: 234  steps: 109
Episode: 235  steps: 100
Episode: 236  steps: 69
Episode: 237  steps: 44
Episode: 238  steps: 266
Episode: 239  steps: 55
Episode: 240  steps: 80
Episode: 241  steps: 209
Loss:  1.28873
Episode: 242  steps: 69
Episode: 243  steps: 75
Episode: 244  steps: 317
Episode: 245  steps: 72
Episode: 246  steps: 118
Episode: 247  steps: 49
Episode: 248  steps: 184
Episode: 249  steps: 225
Episode: 250  steps: 53
Episode: 251  steps: 69
Loss:  1.4058
Episode: 252  steps: 86
Episode: 253  steps: 76
Episode: 254  steps: 99
Episode: 255  steps: 50
Episode: 256  steps: 65
Episode: 257  steps: 62
Episode: 258  steps: 111
Episode: 259  steps: 304
Episode: 260  steps: 55
Episode: 261  steps: 191
Loss:  2.78974
Episode: 262  steps: 62
Episode: 263  steps: 139
Episode: 264  steps: 70
Episode: 265  steps: 64
Episode: 266  steps: 163
Episode: 267  steps: 61
Episode: 268  steps: 51
Episode: 269  steps: 83
Episode: 270  steps: 64
Episode: 271  steps: 63
Loss:  0.971678
Episode: 272  steps: 51
Episode: 273  steps: 169
Episode: 274  steps: 144
Episode: 275  steps: 258
Episode: 276  steps: 78
Episode: 277  steps: 60
Episode: 278  steps: 203
Episode: 279  steps: 197
Episode: 280  steps: 205
Episode: 281  steps: 210
Loss:  2.11429
Episode: 282  steps: 340
Episode: 283  steps: 216
Episode: 284  steps: 80
Episode: 285  steps: 47
Episode: 286  steps: 61
Episode: 287  steps: 119
Episode: 288  steps: 78
Episode: 289  steps: 74
Episode: 290  steps: 61
Episode: 291  steps: 162
Loss:  1.76493
Episode: 292  steps: 62
Episode: 293  steps: 71
Episode: 294  steps: 76
Episode: 295  steps: 75
Episode: 296  steps: 232
Episode: 297  steps: 97
Episode: 298  steps: 391
Episode: 299  steps: 152
Episode: 300  steps: 56
Episode: 301  steps: 132
Loss:  2.47204
Episode: 302  steps: 202
Episode: 303  steps: 206
Episode: 304  steps: 56
Episode: 305  steps: 142
Episode: 306  steps: 186
Episode: 307  steps: 56
Episode: 308  steps: 52
Episode: 309  steps: 68
Episode: 310  steps: 61
Episode: 311  steps: 139
Loss:  3.92604
Episode: 312  steps: 63
Episode: 313  steps: 46
Episode: 314  steps: 58
Episode: 315  steps: 95
Episode: 316  steps: 74
Episode: 317  steps: 458
Episode: 318  steps: 74
Episode: 319  steps: 174
Episode: 320  steps: 64
Episode: 321  steps: 54
Loss:  0.8199
Episode: 322  steps: 166
Episode: 323  steps: 72
Episode: 324  steps: 56
Episode: 325  steps: 375
Episode: 326  steps: 150
Episode: 327  steps: 59
Episode: 328  steps: 83
Episode: 329  steps: 94
Episode: 330  steps: 103
Episode: 331  steps: 60
Loss:  2.47375
Episode: 332  steps: 58
Episode: 333  steps: 63
Episode: 334  steps: 68
Episode: 335  steps: 227
Episode: 336  steps: 215
Episode: 337  steps: 68
Episode: 338  steps: 172
Episode: 339  steps: 81
Episode: 340  steps: 77
Episode: 341  steps: 72
Loss:  1.1932
Episode: 342  steps: 87
Episode: 343  steps: 188
Episode: 344  steps: 342
Episode: 345  steps: 191

In [ ]:


In [ ]: