In [2]:
import cntk as C
from DriveItGym import *
env = DriveItEnv(time_limit=10.0, trail_length=2.2)
random_position=False
def run_model(path, num_episodes=1):
stateCnt = env.observation_space.shape[0]
observation = env._reset(random_position)
model = C.load_model(path)
done = False
reward_episode = 0
reward_sum = 0
reward_no_crash = 0
crashes = 0
for i_episode in range(1, num_episodes + 1):
while not done:
if num_episodes < 2:
env.render()
action = np.argmax(model.eval(\
observation.reshape(1, stateCnt).astype(np.float32)))
observation, reward, done, info = env.step(action)
reward_episode += reward
if num_episodes <= 10:
print('Episode %d %s. reward: %f, laps: %f' % \
(i_episode, info['done'], reward_episode, \
reward_episode / lap_median_length))
if info['done'] != 'complete':
crashes += 1
else:
reward_no_crash += reward_episode
reward_sum += reward_episode
reward_episode = 0
observation = env._reset(random_position)
done = False
if num_episodes > 1:
print('Average reward: %f with %d crashes (%f excl. crashes)' % \
(reward_sum / num_episodes, crashes, \
reward_no_crash / (num_episodes - crashes)))
In [3]:
modelPath = 'best.mod'
In [2]:
modelPath = 'D:/jc/OneDrive - manoli.net/Courses/CS229/Project/cntk/bigboss/best.mod'
In [18]:
run_model(modelPath, 100)
In [19]:
run_model(modelPath, 10)
In [4]:
run_model(modelPath, 1)
In [5]:
env.close()
In [26]:
env._reset(random_position=False)
Out[26]:
In [13]:
env.render()
In [20]:
env.step(1)
Out[20]:
In [3]:
lr = [0.01, 0.005, 0.0025, 0.001]
lr_schedule = C.learning_rate_schedule(lr, C.UnitType.minibatch, epoch_size=5000)
In [12]:
lr_schedule[5000]
Out[12]:
In [15]:
#max penalty
1.0 * 2.5 / 60.0 / (1 - 0.98)
Out[15]:
In [5]:
checkpoint_median_length
Out[5]:
In [ ]: