In [7]:
import gym
In [19]:
env = gym.make('AcrobotContinuous-v0')
print('action space', env.action_space)
print('action low', env.action_space.low)
print('action high', env.action_space.high)
print('observations', env.observation_space)
print(env.observation_space.low)
print(env.observation_space.high)
In [1]:
done = False
while not done:
state, _, done, _ = env.step(env.action_space.sample())
env.render()
In [20]:
import numpy as np
#STATE NORMALIZATION
print('Calculating normalization by random action sampling...')
states = []
env.reset()
while len(states) < 1e5:
done = False
while not done:
state, _, done, _ = env.step(env.action_space.sample())
# state, _, done, _ = self.env.step(self.ou_process.ou_step())
states.append(state.squeeze())
# self.env.render()
# print('done, lastest {}'.format(len(states)))
normalization_mean = np.mean(states, axis=(0)).astype(np.float32)
normalization_var = np.var(states, axis=(0)).astype(np.float32)
print('montecarlo mean:', normalization_mean)
print('montecarlo var:', normalization_var)
In [ ]: