In [1]:
import gym as gym
%pylab inline
In [2]:
env = gym.make('MountainCar-v0')
states = []
while len(states) < 1e5:
env.reset()
done = False
while not done:
state, _, done, _ = env.step(env.action_space.sample())
states.append(state)
print(len(states))
print(np.mean(states, axis=(0)))
print(np.var(states, axis=(0)))
high = env.observation_space.high
low = env.observation_space.low
normalization_var = ((high/2 - low/2)**2).astype(np.float32)
normalization_var[np.isinf(normalization_var)] = 1.0
normalization_mean = ((high + low)/2.0).astype(np.float32)
normalization_mean[np.isnan(normalization_mean)] = 0.0
print(normalization_mean)
print(normalization_var)
In [3]:
env = gym.make('Acrobot-v0')
states = []
while len(states) < 1e5:
env.reset()
done = False
while not done:
state, _, done, _ = env.step(env.action_space.sample())
states.append(state)
print(len(states))
print(np.mean(states, axis=(0)))
print(np.var(states, axis=(0)))
high = env.observation_space.high
low = env.observation_space.low
normalization_var = ((high/2 - low/2)**2).astype(np.float32)
normalization_var[np.isinf(normalization_var)] = 1.0
normalization_mean = ((high + low)/2.0).astype(np.float32)
normalization_mean[np.isnan(normalization_mean)] = 0.0
print(normalization_mean)
print(normalization_var)
In [3]:
env = gym.make('CartPole-v0')
print(env.observation_space)
print(env.action_space)
states = []
while len(states) < 1e5:
env.reset()
done = False
while not done:
state, _, done, _ = env.step(env.action_space.sample())
states.append(state)
print(len(states))
print(np.mean(states, axis=(0)))
print(np.var(states, axis=(0)))
high = env.observation_space.high
low = env.observation_space.low
normalization_var = ((high/2 - low/2)**2).astype(np.float32)
normalization_var[np.isinf(normalization_var)] = 1.0
normalization_mean = ((high + low)/2.0).astype(np.float32)
normalization_mean[np.isnan(normalization_mean)] = 0.0
print(normalization_mean)
print(normalization_var)
In [ ]: