In [13]:
from DriveItGym import *
import numpy as np
from numpy import pi
num_episodes = 1 # number of episodes to run
time_limit = 10.0
throttle_limit = 0.75
gamma = 0.98
epsilon = 0.1
kp, kv = 1.0, 0.3
random_position=False
env = DriveItEnv(time_limit, throttle_limit, gamma, show_belief_state=True, \
noisy=True, trail_length=3.3)
observation = env._reset(random_position)
ym_ = 0.0
done = False
reward_episode = 0
reward_sum = 0.0
crashes = 0
obs = [];pos = [];err = []
for i_episode in range(1, num_episodes + 1):
while not done:
if num_episodes < 2 and time_limit < 60.0:
env.render()
xm, ym, theta, steer, v, bl = observation
dym = ym - ym_
ym_ = ym
f = steer + kp * ym + kv * dym / dt
if f > epsilon: action = 2
elif f < -epsilon: action = 1
else: action = 0
if v < 2.5:
action += 3
s = env.state
sn = np.array((s[5], s[6], s[2], 0, s[4]*v_max, 0.0))
b = np.array(env.belief)
e = b - sn #(xm-sn[0], ym-sn[1], theta-sn[2], 0, v-sn[4])
obs.append(env.belief)
pos.append(sn)
err.append(e)
observation, reward, done, info = env.step(action)
reward_episode += reward
#reward_sum = reward + gamma * reward_sum
if num_episodes <= 10:
print('Episode %d %s. reward: %f, laps: %f' % \
(i_episode, info['done'], reward_episode, \
reward_episode / lap_median_length))
if info['done'] != 'complete':
crashes += 1
reward_sum += reward_episode
reward_episode = 0
observation = env._reset(random_position)
done = False
if num_episodes > 1:
print('Average reward: %f with %d crashes' % \
(reward_sum / num_episodes, crashes))
In [6]:
env.close()
In [14]:
import matplotlib.pyplot as plt
from matplotlib import style
import seaborn as sns
style.use('ggplot')
%matplotlib inline
sns.set()
y = plt.plot([o for o in obs])
#plt.xlabel('time');plt.ylabel('error')
In [15]:
i = 1
y = plt.plot([e[i] if abs(e[i])<1.0 else 0 for e in err])
#plt.plot([o[i] for o in obs])
In [16]:
y = plt.plot([p[i] for p in pos])
y = plt.plot([o[i] for o in obs])
In [6]:
env._reset(random_position)
Out[6]:
In [7]:
env.render()
In [22]:
# out penalty = max discounted reward for median laps at full speed
rmax = throttle_limit * v_max * dt / (1 - gamma)
rmax
Out[22]:
In [9]:
from filter import *
f = LowPassFilter(np.array([0.8, 0.5, 0.0, 0.3, 0.7]), np.array([1.0, 2, 3, 4, 5]))
f.filter([1,2,3,4,5])
Out[9]:
In [26]:
f.filter([1,2,3,4,5])
Out[26]:
In [27]:
f.filter([1,10,10,4,5])
Out[27]:
In [2]:
import numpy as np
g = 2.0
i = np.array([1.0, 2.0])
o = np.ones_like(i)
i * g
Out[2]:
In [7]:
import numpy as np
gain = 0.85
initial_value = 0.0
feedback_gain = np.ones_like(initial_value) * gain
output_gain = 1.0 - feedback_gain
output = initial_value
feedback_value = initial_value / output_gain
In [1]:
def plop(feedback_gain, initial_value=np.array([0.0])):
feedback_gain = np.ones_like(initial_value) * feedback_gain
print(feedback_gain)
plop(0.85)
In [ ]: