Running the DQN model


In [2]:
import cntk as C
from DriveItGym import *

env = DriveItEnv(time_limit=10.0, trail_length=2.2)
random_position=False

def run_model(path, num_episodes=1):
    stateCnt  = env.observation_space.shape[0]
    observation = env._reset(random_position)

    model = C.load_model(path)

    done = False
    reward_episode = 0
    reward_sum = 0
    reward_no_crash = 0
    crashes = 0

    for i_episode in range(1, num_episodes + 1):
        while not done:
            if num_episodes < 2:
                env.render()

            action = np.argmax(model.eval(\
                observation.reshape(1, stateCnt).astype(np.float32)))

            observation, reward, done, info = env.step(action)
            reward_episode += reward

        if num_episodes <= 10:
            print('Episode %d %s. reward: %f, laps: %f' % \
                  (i_episode, info['done'], reward_episode, \
                   reward_episode / lap_median_length))
        if info['done'] != 'complete':
            crashes += 1
        else:
            reward_no_crash += reward_episode
        reward_sum += reward_episode
        reward_episode = 0
        observation = env._reset(random_position)
        done = False

    if num_episodes > 1:
        print('Average reward: %f with %d crashes (%f excl. crashes)' % \
              (reward_sum / num_episodes, crashes, \
               reward_no_crash / (num_episodes - crashes)))

In [3]:
modelPath = 'best.mod'

In [2]:
modelPath = 'D:/jc/OneDrive - manoli.net/Courses/CS229/Project/cntk/bigboss/best.mod'

In [18]:
run_model(modelPath, 100)


Average reward: 22.096251 with 0 crashes (22.096251 excl. crashes)

In [19]:
run_model(modelPath, 10)


Episode 1 complete. reward: 22.080785, laps: 4.386076
Episode 2 complete. reward: 22.218435, laps: 4.413418
Episode 3 complete. reward: 22.048827, laps: 4.379728
Episode 4 complete. reward: 22.020093, laps: 4.374020
Episode 5 complete. reward: 22.066715, laps: 4.383281
Episode 6 complete. reward: 22.117833, laps: 4.393435
Episode 7 complete. reward: 22.132000, laps: 4.396249
Episode 8 complete. reward: 22.077661, laps: 4.385455
Episode 9 complete. reward: 22.104040, laps: 4.390695
Episode 10 complete. reward: 22.278536, laps: 4.425357
Average reward: 22.114492 with 0 crashes (22.114492 excl. crashes)

In [4]:
run_model(modelPath, 1)


Episode 1 complete. reward: 21.758745, laps: 4.322106

In [5]:
env.close()

In [26]:
env._reset(random_position=False)


Out[26]:
array([ 0.00200037, -0.03441861,  0.        ,  0.        ])

In [13]:
env.render()

In [20]:
env.step(1)


Out[20]:
(array([ 0.17223266,  0.00231618,  0.2       ,  0.01960784]),
 0.04174948671969142,
 False,
 {'checkpoint': False, 'done': 'unknown', 'lap': False})

In [3]:
lr = [0.01, 0.005, 0.0025, 0.001]
lr_schedule = C.learning_rate_schedule(lr, C.UnitType.minibatch, epoch_size=5000)

In [12]:
lr_schedule[5000]


Out[12]:
0.005

In [15]:
#max penalty
1.0 * 2.5 / 60.0 / (1 - 0.98)


Out[15]:
2.0833333333333313

In [5]:
checkpoint_median_length


Out[5]:
2.517145867644259

In [ ]: