``````

In [1]:

import numpy as np
from scipy.linalg import *
import time

%matplotlib notebook

``````
``````

In [2]:

epsilon = 1e-5
Ts = 10

Q = np.eye(env.state_dim)*100.
R = np.eye(env.action_dim)*1.

``````

## Finite difference derivatives

``````

In [3]:

def calc_derivatives(x, u):
A = np.zeros((env.state_dim, env.state_dim))
for i in range(env.state_dim):
x_tmp = x.copy()
x_tmp[i] += epsilon
_, f_1 = env.dynamics_func(x_tmp, u)
x_tmp = x.copy()
x_tmp[i] -= epsilon
_, f_2 = env.dynamics_func(x_tmp, u)
fxdx = (f_1 - f_2) / (2*epsilon)
A[:, i] = fxdx

B = np.zeros((env.state_dim, env.action_dim))
for i in range(env.action_dim):
u_tmp = u.copy()
u_tmp[i] += epsilon
_, f_1 = env.dynamics_func(x, u_tmp)
u_tmp = u.copy()
u_tmp[i] -= epsilon
_, f_2 = env.dynamics_func(x, u_tmp)
fxdu = (f_1 - f_2) / (2*epsilon)
B[:, i] = fxdu

return A, B

``````
``````

In [4]:

def run_experiment():
x = env.reset()
env.render()
u = [0.]*env.action_dim

V = 0.
for _ in range(int(Ts/env.dt)):
# Calculate optimal feedback gain K
error = x - env.get_goal()
A, B = calc_derivatives(error, u)

P = solve_continuous_are(A, B, Q, R)
K = np.dot(np.linalg.pinv(R), np.dot(B.T, P))

u = -np.dot(K, error)

x, _, _, _ = env.step(u)

env.render()

V += np.dot(error.T, np.dot(P, error))

print('Episode %s - Total Cost: %s' % (str(i), V))

return env.goal - x

``````
``````

In [5]:

for i in range(5):
error = run_experiment()

``````
``````

Episode 0 - Total Cost: 2256.10063847
Episode 1 - Total Cost: 21754.5691166

---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-5-dbee3154e77f> in <module>()
1 for i in range(5):
----> 2     error = run_experiment()

<ipython-input-4-a020fa87ac81> in run_experiment()
11
12         P = solve_continuous_are(A, B, Q, R)
---> 13         K = np.dot(np.linalg.pinv(R), np.dot(B.T, P))
14
15         u = -np.dot(K, error)

/home/bartkeulen/anaconda3/envs/drl/lib/python3.5/site-packages/numpy/linalg/linalg.py in pinv(a, rcond)
1660     _assertNoEmpty2d(a)
1661     a = a.conjugate()
-> 1662     u, s, vt = svd(a, 0)
1663     m = u.shape[0]
1664     n = vt.shape[1]

/home/bartkeulen/anaconda3/envs/drl/lib/python3.5/site-packages/numpy/linalg/linalg.py in svd(a, full_matrices, compute_uv)
1402
1403         signature = 'D->DdD' if isComplexType(t) else 'd->ddd'
-> 1404         u, s, vt = gufunc(a, signature=signature, extobj=extobj)
1405         u = u.astype(result_t, copy=False)
1406         s = s.astype(_realType(result_t), copy=False)

KeyboardInterrupt:

``````
``````

In [ ]:

env.render(close=True)

``````

## Run with changing goal once goal is reached

``````

In [ ]:

num_targets = 5
def run_experiment2():
x = env.reset()
env.render()
u = [0.]*env.action_dim

V_tot = 0.
for i in range(num_targets):
env.set_goal()
goal_reached = False

while not goal_reached:
error = x - env.get_goal()
A, B = calc_derivatives(error, u)

P = solve_continuous_are(A, B, Q, R)
K = np.dot(np.linalg.pinv(R), np.dot(B.T, P))

u = -np.dot(K, error)

x, _, _, _ = env.step(u)

env.render()

V = np.dot(error.T, np.dot(P, error))
V_tot += V
if V < 1e-4:
goal_reached = True

print('Target %s - Total Cost: %s' % (str(i), V_tot))

``````
``````

In [ ]:

run_experiment2()

``````
``````

In [ ]:

env.render(close=True)

``````