In [ ]:
import GPy, safeopt
from SafeRLBench.algo import SafeOptSwarm
from SafeRLBench.envs import Quadrocopter, LinearCar
from SafeRLBench.policy import NonLinearQuadrocopterController, LinearPolicy
from SafeRLBench.measure import BestPerformance, SafetyMeasure
from SafeRLBench import Bench
# set up logging
from SafeRLBench import config
config.logger_set_level(config.INFO)
config.logger_add_stream_handler()
config.monitor_set_verbosity(2)
In [ ]:
noise_var = 0.05 ** 2
bounds = [(-1., 0.), (-1., 0.), (0., 1.)]
algos = [(SafeOptSwarm, [{
'policy': LinearPolicy(2, 1, par=[-1, 0, 1]),
'kernel': GPy.kern.RBF(input_dim=len(bounds), variance=std**2, lengthscale=.4, ARD=True),
'likelihood': GPy.likelihoods.gaussian.Gaussian(variance=noise_var),
'max_it': 20,
'avg_reward': -20,
'window': 3,
'fmin': -100,
'bounds': bounds,
'info': std
} for std in [30, 35, 40, 45, 50]])]
envs = [(LinearCar, {})]
bench = Bench.make_bench(algos, envs, [BestPerformance(), SafetyMeasure(-100)])
In [ ]:
bench()
Below we output the results of the safety measure. List comprehension is used to get a more readable format for the
tuples.
The first element shows the standard deviation used, the second the number of violations and the last one the sum over
all violations, just as documented in the SafetyMeasure
class.
We can see that increasing the standard deviation will ensure that the safty constraints will not be violated.
In [ ]:
print([(t[0].alg_conf['info'], t[1], t[2]) for t in bench.measures[1].result])
In [ ]:
noise_var = 0.05 ** 2
# Set fixed Gaussian measurement noise
likelihood = GPy.likelihoods.gaussian.Gaussian(variance=noise_var)
# Bounds on the inputs variable
bounds = [(0., 1.), (0., 1.), (0., 1.), (0., 1.), (0., 1.)]
# Define Kernel
kernel = GPy.kern.RBF(input_dim=len(bounds), variance=1000.*2, lengthscale=1.0, ARD=True)
In [ ]:
noise_var = 0.05 ** 2
fmin = -2400
# Bounds on the inputs variable
# bounds = [(1e-2, .9), (1e-2, .9), (1e-1, .9), (.2, .7), (1e-2, .9)]
bounds = [(1e-2, 1.), (1e-2, 1.), (1e-2, 1.), (1e-2, 1.), (1e-2, 1.)]
algos = [(SafeOptSwarm, [{
'policy': NonLinearQuadrocopterController(),
'kernel': GPy.kern.RBF(input_dim=len(bounds), variance=std**2, lengthscale=0.2, ARD=True),
'likelihood': GPy.likelihoods.gaussian.Gaussian(variance=noise_var),
'max_it': 20,
'avg_reward': -1500,
'window': 3,
'fmin': fmin,
'bounds': bounds,
'swarm_size': 1000,
'info': std
} for std in [1000, 1250, 1500, 1750, 2000]])]
envs = [(Quadrocopter, {})]
bench = Bench.make_bench(algos, envs, [BestPerformance(), SafetyMeasure(fmin)])
In [ ]:
bench()
Below we output the results of the safety measure and performance. List comprehension is used to get a more readable format for the tuples.
The first element shows the standard deviation used, the second the number of violations and the last one the sum over
all violations, just as documented in the SafetyMeasure
class.
In [ ]:
print([(t[0].alg_conf['info'], t[1], t[2]) for t in bench.measures[1].result])
In [ ]:
print([(t[0].alg_conf['info'], int(t[1])) for t in bench.measures[0].result])
In [ ]: