In [1]:
import gym as gym
%pylab inline


[2016-07-19 10:04:56,893] Site environment registry incorrect: Scoreboard did not register all envs: set(['AcrobotContinuous-v0'])
Populating the interactive namespace from numpy and matplotlib

In [2]:
env = gym.make('MountainCar-v0')

states = []

while len(states) < 1e5:
  env.reset()
  done = False
  while not done:
    state, _, done, _ = env.step(env.action_space.sample())
    states.append(state)
    
print(len(states))
print(np.mean(states, axis=(0)))
print(np.var(states, axis=(0)))

high = env.observation_space.high
low = env.observation_space.low
normalization_var = ((high/2 - low/2)**2).astype(np.float32)
normalization_var[np.isinf(normalization_var)] = 1.0
normalization_mean = ((high + low)/2.0).astype(np.float32)
normalization_mean[np.isnan(normalization_mean)] = 0.0

print(normalization_mean)
print(normalization_var)


[2016-07-07 11:11:06,714] Making new env: MountainCar-v0
132319
[ -5.14191222e-01   5.58844290e-05]
[ 0.11860186  0.00060793]
[-0.30000001  0.        ]
[ 0.81    0.0049]

In [3]:
env = gym.make('Acrobot-v0')

states = []

while len(states) < 1e5:
  env.reset()
  done = False
  while not done:
    state, _, done, _ = env.step(env.action_space.sample())
    states.append(state)
    
print(len(states))
print(np.mean(states, axis=(0)))
print(np.var(states, axis=(0)))

high = env.observation_space.high
low = env.observation_space.low
normalization_var = ((high/2 - low/2)**2).astype(np.float32)
normalization_var[np.isinf(normalization_var)] = 1.0
normalization_mean = ((high + low)/2.0).astype(np.float32)
normalization_mean[np.isnan(normalization_mean)] = 0.0

print(normalization_mean)
print(normalization_var)


[2016-07-07 11:11:08,684] Making new env: Acrobot-v0
100014
[ 0.0004221   0.00629445 -0.00317496  0.02609741]
[ 0.53635338  2.11967184  2.74984364  8.1708995 ]
[ 0.  0.  0.  0.]
[   9.86960411    9.86960411  157.91366577  799.43792725]

In [3]:
env = gym.make('CartPole-v0')

print(env.observation_space)
print(env.action_space)

states = []

while len(states) < 1e5:
  env.reset()
  done = False
  while not done:
    state, _, done, _ = env.step(env.action_space.sample())
    states.append(state)
    
print(len(states))
print(np.mean(states, axis=(0)))
print(np.var(states, axis=(0)))

high = env.observation_space.high
low = env.observation_space.low
normalization_var = ((high/2 - low/2)**2).astype(np.float32)
normalization_var[np.isinf(normalization_var)] = 1.0
normalization_mean = ((high + low)/2.0).astype(np.float32)
normalization_mean[np.isnan(normalization_mean)] = 0.0

print(normalization_mean)
print(normalization_var)


[2016-07-19 10:05:18,233] Making new env: CartPole-v0
Box(4,)
Discrete(2)
100017
[ 0.00226849  0.00498453  0.00190249  0.00525343]
[ 0.0098635   0.31733479  0.01052998  0.72302333]
[ 0.  0.  0.  0.]
[ 5.76000023  1.          0.17545964  1.        ]
/home/ga24pug/mypython-deeprl/lib/python2.7/site-packages/ipykernel/__main__.py:23: RuntimeWarning: invalid value encountered in add

In [ ]: