In [1]:
import numpy as np
from scipy.linalg import *
from drl.env.arm import TwoLinkArm
import time

%matplotlib notebook

In [2]:
epsilon = 1e-5
Ts = 10
env = TwoLinkArm(g=0.)

Q = np.eye(env.state_dim)*100.
R = np.eye(env.action_dim)*1.

Finite difference derivatives


In [3]:
def calc_derivatives(x, u):
    A = np.zeros((env.state_dim, env.state_dim))
    for i in range(env.state_dim):
        x_tmp = x.copy()
        x_tmp[i] += epsilon
        _, f_1 = env.dynamics_func(x_tmp, u)
        x_tmp = x.copy()
        x_tmp[i] -= epsilon
        _, f_2 = env.dynamics_func(x_tmp, u)
        fxdx = (f_1 - f_2) / (2*epsilon)
        A[:, i] = fxdx
        
    B = np.zeros((env.state_dim, env.action_dim))
    for i in range(env.action_dim):
        u_tmp = u.copy()
        u_tmp[i] += epsilon
        _, f_1 = env.dynamics_func(x, u_tmp)
        u_tmp = u.copy()
        u_tmp[i] -= epsilon
        _, f_2 = env.dynamics_func(x, u_tmp)
        fxdu = (f_1 - f_2) / (2*epsilon)
        B[:, i] = fxdu
        
    return A, B

In [4]:
def run_experiment():
    x = env.reset()
    env.render()
    u = [0.]*env.action_dim
    
    V = 0.
    for _ in range(int(Ts/env.dt)):
        # Calculate optimal feedback gain K
        error = x - env.get_goal()
        A, B = calc_derivatives(error, u)

        P = solve_continuous_are(A, B, Q, R)
        K = np.dot(np.linalg.pinv(R), np.dot(B.T, P))
        
        u = -np.dot(K, error)

        x, _, _, _ = env.step(u)
        
        env.render()
        
        V += np.dot(error.T, np.dot(P, error))
    
    print('Episode %s - Total Cost: %s' % (str(i), V))

    return env.goal - x

In [5]:
for i in range(5):
    error = run_experiment()


Episode 0 - Total Cost: 2256.10063847
Episode 1 - Total Cost: 21754.5691166
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-5-dbee3154e77f> in <module>()
      1 for i in range(5):
----> 2     error = run_experiment()

<ipython-input-4-a020fa87ac81> in run_experiment()
     11 
     12         P = solve_continuous_are(A, B, Q, R)
---> 13         K = np.dot(np.linalg.pinv(R), np.dot(B.T, P))
     14 
     15         u = -np.dot(K, error)

/home/bartkeulen/anaconda3/envs/drl/lib/python3.5/site-packages/numpy/linalg/linalg.py in pinv(a, rcond)
   1660     _assertNoEmpty2d(a)
   1661     a = a.conjugate()
-> 1662     u, s, vt = svd(a, 0)
   1663     m = u.shape[0]
   1664     n = vt.shape[1]

/home/bartkeulen/anaconda3/envs/drl/lib/python3.5/site-packages/numpy/linalg/linalg.py in svd(a, full_matrices, compute_uv)
   1402 
   1403         signature = 'D->DdD' if isComplexType(t) else 'd->ddd'
-> 1404         u, s, vt = gufunc(a, signature=signature, extobj=extobj)
   1405         u = u.astype(result_t, copy=False)
   1406         s = s.astype(_realType(result_t), copy=False)

KeyboardInterrupt: 

In [ ]:
env.render(close=True)

Run with changing goal once goal is reached


In [ ]:
num_targets = 5
def run_experiment2():
    x = env.reset()
    env.render()
    u = [0.]*env.action_dim
    
    V_tot = 0.
    for i in range(num_targets):
        env.set_goal()
        goal_reached = False
        
        while not goal_reached:
            error = x - env.get_goal()
            A, B = calc_derivatives(error, u)

            P = solve_continuous_are(A, B, Q, R)
            K = np.dot(np.linalg.pinv(R), np.dot(B.T, P))

            u = -np.dot(K, error)

            x, _, _, _ = env.step(u)

            env.render()
            
            V = np.dot(error.T, np.dot(P, error))
            V_tot += V
            if V < 1e-4:
                goal_reached = True
                
        print('Target %s - Total Cost: %s' % (str(i), V_tot))

In [ ]:
run_experiment2()

In [ ]:
env.render(close=True)