notebook.community

Edit and run



In [7]:

    
import gym









    



[2016-07-18 12:45:02,471] Site environment registry incorrect: Scoreboard did not register all envs: set(['AcrobotContinuous-v0'])



In [19]:

    
env = gym.make('AcrobotContinuous-v0')
print('action space', env.action_space)

print('action low', env.action_space.low)
print('action high', env.action_space.high)


print('observations', env.observation_space)
print(env.observation_space.low)
print(env.observation_space.high)









    



[2016-07-18 16:10:56,198] Making new env: AcrobotContinuous-v0






    



('action space', Box(1,))
('action low', array([-1.]))
('action high', array([ 1.]))
('observations', Box(4,))
[ -3.14159265  -3.14159265 -12.56637061 -28.27433388]
[  3.14159265   3.14159265  12.56637061  28.27433388]



In [1]:

    
done = False
while not done:
    state, _, done, _ = env.step(env.action_space.sample())
    env.render()









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-1-c4e2d21a0b17> in <module>()
      1 done = False
      2 while not done:
----> 3     state, _, done, _ = env.step(env.action_space.sample())
      4     env.render()

NameError: name 'env' is not defined



In [20]:

    
import numpy as np

#STATE NORMALIZATION
print('Calculating normalization by random action sampling...')
states = []

env.reset()

while len(states) < 1e5:
    
    done = False
    while not done:
        state, _, done, _ = env.step(env.action_space.sample())
        # state, _, done, _ = self.env.step(self.ou_process.ou_step())
        states.append(state.squeeze())

    # self.env.render()

#     print('done, lastest {}'.format(len(states)))


normalization_mean = np.mean(states, axis=(0)).astype(np.float32)
normalization_var = np.var(states, axis=(0)).astype(np.float32)

print('montecarlo mean:', normalization_mean)
print('montecarlo var:', normalization_var)









    



Calculating normalization by random action sampling...
('montecarlo mean:', array([-0.00020907, -0.00273588,  0.00071914, -0.01408309], dtype=float32))
('montecarlo var:', array([  0.79269701,   2.74056292,   3.55256128,  10.79969215], dtype=float32))



In [ ]: