Using SafeOpt


In [ ]:
import GPy, safeopt

from SafeRLBench.algo import SafeOptSwarm
from SafeRLBench.envs import Quadrocopter, LinearCar
from SafeRLBench.policy import NonLinearQuadrocopterController, LinearPolicy

from SafeRLBench.measure import BestPerformance, SafetyMeasure

from SafeRLBench import Bench

# set up logging
from SafeRLBench import config

config.logger_set_level(config.INFO)
config.logger_add_stream_handler()
config.monitor_set_verbosity(2)

Linear Car


In [ ]:
noise_var = 0.05 ** 2

bounds = [(-1., 0.), (-1., 0.), (0., 1.)]

algos = [(SafeOptSwarm, [{
    'policy': LinearPolicy(2, 1, par=[-1, 0, 1]),
    'kernel': GPy.kern.RBF(input_dim=len(bounds), variance=std**2, lengthscale=.4, ARD=True),
    'likelihood': GPy.likelihoods.gaussian.Gaussian(variance=noise_var),
    'max_it': 20,
    'avg_reward': -20,
    'window': 3,
    'fmin': -100,
    'bounds': bounds, 
    'info': std
} for std in [30, 35, 40, 45, 50]])]

envs = [(LinearCar, {})]

bench = Bench.make_bench(algos, envs, [BestPerformance(), SafetyMeasure(-100)])

In [ ]:
bench()

Below we output the results of the safety measure. List comprehension is used to get a more readable format for the tuples. The first element shows the standard deviation used, the second the number of violations and the last one the sum over all violations, just as documented in the SafetyMeasure class.

We can see that increasing the standard deviation will ensure that the safty constraints will not be violated.


In [ ]:
print([(t[0].alg_conf['info'], t[1], t[2]) for t in bench.measures[1].result])

Quadrocopter


In [ ]:
noise_var = 0.05 ** 2

# Set fixed Gaussian measurement noise
likelihood = GPy.likelihoods.gaussian.Gaussian(variance=noise_var)

# Bounds on the inputs variable
bounds = [(0., 1.), (0., 1.), (0., 1.), (0., 1.), (0., 1.)]

# Define Kernel
kernel = GPy.kern.RBF(input_dim=len(bounds), variance=1000.*2, lengthscale=1.0, ARD=True)

In [ ]:
noise_var = 0.05 ** 2

fmin = -2400

# Bounds on the inputs variable
# bounds = [(1e-2, .9), (1e-2, .9), (1e-1, .9), (.2, .7), (1e-2, .9)]
bounds = [(1e-2, 1.), (1e-2, 1.), (1e-2, 1.), (1e-2, 1.), (1e-2, 1.)]

algos = [(SafeOptSwarm, [{
    'policy': NonLinearQuadrocopterController(),
    'kernel': GPy.kern.RBF(input_dim=len(bounds), variance=std**2, lengthscale=0.2, ARD=True),
    'likelihood': GPy.likelihoods.gaussian.Gaussian(variance=noise_var),
    'max_it': 20,
    'avg_reward': -1500,
    'window': 3,
    'fmin': fmin,
    'bounds': bounds,
    'swarm_size': 1000,
    'info': std
} for std in [1000, 1250, 1500, 1750, 2000]])]

envs = [(Quadrocopter, {})]

bench = Bench.make_bench(algos, envs, [BestPerformance(), SafetyMeasure(fmin)])

In [ ]:
bench()

Below we output the results of the safety measure and performance. List comprehension is used to get a more readable format for the tuples. The first element shows the standard deviation used, the second the number of violations and the last one the sum over all violations, just as documented in the SafetyMeasure class.


In [ ]:
print([(t[0].alg_conf['info'], t[1], t[2]) for t in bench.measures[1].result])

In [ ]:
print([(t[0].alg_conf['info'], int(t[1])) for t in bench.measures[0].result])

In [ ]: