In [1]:
import cntk as C
from DriveItMultiGym import *
from belief import *
car = Car()
env = DriveItEnv([car], time_limit=10.0, gamma=0.98, noisy=True)
belief = PositionTracking(car)
random_position=False
stateCnt = belief.observation_space.shape[0]
def run_episode(model, render=False):
o = env.reset()
s = belief.reset(o[car])
s = s / belief.observation_space.high
R = 0
actions = {}
while True:
if render: env.render()
a = np.argmax(model.eval(s.reshape(1, stateCnt).astype(np.float32)))
actions[car] = a
o_, r, done, info = env.step(actions)
s_ = belief.update(o_[car], env.dt)
s_ = s_ / belief.observation_space.high
if done:
s_ = None
s = s_
R += r[car]
if done:
return R, info
def run_model(path, num_episodes=1):
model = C.load_model(path)
reward_sum = 0
reward_no_crash = 0
for i_episode in range(1, num_episodes + 1):
R, info = run_episode(model, num_episodes < 2)
if num_episodes <= 10:
print('Episode %d %s. reward: %f, laps: %f' % \
(i_episode, info['done'], R, R / lap_median_length))
if info['done'] != 'complete':
crashes += 1
else:
reward_no_crash += R
reward_sum += R
if num_episodes > 1:
print('Average reward: %f with %d crashes (%f excl. crashes)' % \
(reward_sum / num_episodes, crashes, \
reward_no_crash / (num_episodes - crashes)))
In [2]:
modelPath = 'last.mod'
In [2]:
modelPath = 'D:/jc/OneDrive - manoli.net/Courses/CS229/Project/cntk/bigboss/best.mod'
In [18]:
run_model(modelPath, 100)
In [19]:
run_model(modelPath, 10)
In [3]:
run_model(modelPath, 1)
In [4]:
env.close()
In [26]:
env._reset(random_position=False)
Out[26]:
In [13]:
env.render()
In [20]:
env.step(1)
Out[20]:
In [3]:
lr = [0.01, 0.005, 0.0025, 0.001]
lr_schedule = C.learning_rate_schedule(lr, C.UnitType.minibatch, epoch_size=5000)
In [12]:
lr_schedule[5000]
Out[12]:
In [15]:
#max penalty
1.0 * 2.5 / 60.0 / (1 - 0.98)
Out[15]:
In [5]:
checkpoint_median_length
Out[5]:
In [ ]: