In [2]:
# gym オープンソースライブラリの読み込み
import gym
In [3]:
# 環境を作る
env = gym.make('CartPole-v0') # 'CartPole-v0' は環境ID
#env = gym.make('MountainCar-v0') # 'MountainCar-v0'という別の環境
#env = gym.make('MsPacman-v0') # 'MsPacman-v0'という別の環境
env.seed(42)
[2017-01-18 23:15:36,839] Making new env: CartPole-v0
Out[3]:
[0L]
In [3]:
# 環境の初期化(最初の観測が得られる)
env.reset()
Out[3]:
array([-0.00808208, -0.02773516, 0.02830284, -0.03498637])
In [4]:
# 描画
env.render()
In [5]:
# 行動選択(手動)
action = 0 # 0: Left, 1: Right
# 環境に対して選択された行動を実行
# printで囲む
env.step(action)
# 描画
env.render()
In [6]:
# 画面を閉じる
env.render(close=True)
In [7]:
import time
In [8]:
# gym オープンソースライブラリの読み込み
import gym
In [9]:
# 環境を作る
env = gym.make('CartPole-v0') # 'CartPole-v0' は環境ID
#env = gym.make('MountainCar-v0') # 'MountainCar-v0'という別の環境
#env = gym.make('MsPacman-v0') # 'MsPacman-v0'という別の環境
[2017-01-18 13:50:34,453] Making new env: CartPole-v0
In [10]:
# ランダムな行動選択
env.action_space.sample()
Out[10]:
0
In [11]:
# 行動空間(エージェントが選択可能な行動が定義されている空間)
env.action_space
Out[11]:
Discrete(2)
In [12]:
# 環境の初期化(最初の観測が得られる)
env.reset()
Out[12]:
array([-0.04154346, -0.00285351, -0.03092677, 0.0460129 ])
In [13]:
for _ in range(100):
time.sleep(0.1) # 描画を遅くするために0.1秒スリープ
env.render() # 描画
action = env.action_space.sample() # ランダムな行動選択
print(action), # 選択された行動をプリント
print(env.step(action)) # 選択行動を実行
1 (array([-0.04160053, 0.19269795, -0.03000651, -0.25626504]), 1.0, False, {})
1 (array([-0.03774657, 0.38823518, -0.03513181, -0.55825944]), 1.0, False, {})
0 (array([-0.02998187, 0.19362354, -0.046297 , -0.27684874]), 1.0, False, {})
1 (array([-0.0261094 , 0.38937437, -0.05183397, -0.58376682]), 1.0, False, {})
1 (array([-0.01832191, 0.58518268, -0.06350931, -0.89231717]), 1.0, False, {})
1 (array([-0.00661826, 0.78110596, -0.08135565, -1.20426879]), 1.0, False, {})
1 (array([ 0.00900386, 0.97717979, -0.10544103, -1.52129894]), 1.0, False, {})
1 (array([ 0.02854746, 1.17340607, -0.13586701, -1.84494592]), 1.0, False, {})
1 (array([ 0.05201558, 1.36973929, -0.17276593, -2.17655384]), 1.0, False, {})
1 (array([ 0.07941037, 1.56607088, -0.216297 , -2.51720794]), 1.0, True, {})
[2017-01-18 13:50:35,651] You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.
0 (array([ 0.11073178, 1.37327814, -0.26664116, -2.2978539 ]), 0.0, True, {})
0 (array([ 0.13819735, 1.18141885, -0.31259824, -2.09770191]), 0.0, True, {})
1 (array([ 0.16182572, 1.37805651, -0.35455228, -2.46877854]), 0.0, True, {})
0 (array([ 0.18938685, 1.18722244, -0.40392785, -2.30239981]), 0.0, True, {})
0 (array([ 0.2131313 , 0.99739649, -0.44997585, -2.15612718]), 0.0, True, {})
0 (array([ 0.23307923, 0.80853294, -0.49309839, -2.02890523]), 0.0, True, {})
0 (array([ 0.24924989, 0.62057153, -0.53367649, -1.91971783]), 0.0, True, {})
0 (array([ 0.26166132, 0.43344484, -0.57207085, -1.82761821]), 0.0, True, {})
1 (array([ 0.27033022, 0.6291339 , -0.60862321, -2.23357993]), 0.0, True, {})
0 (array([ 0.2829129 , 0.44242225, -0.65329481, -2.1718936 ]), 0.0, True, {})
1 (array([ 0.29176134, 0.6364248 , -0.69673268, -2.5816705 ]), 0.0, True, {})
1 (array([ 0.30448984, 0.82864053, -0.74836609, -2.99146255]), 0.0, True, {})
0 (array([ 0.32106265, 0.6410831 , -0.80819535, -2.98534901]), 0.0, True, {})
0 (array([ 0.33388431, 0.45399431, -0.86790233, -3.00406063]), 0.0, True, {})
1 (array([ 0.3429642 , 0.64148597, -0.92798354, -3.41017508]), 0.0, True, {})
1 (array([ 0.35579392, 0.82576909, -0.99618704, -3.811199 ]), 0.0, True, {})
1 (array([ 0.3723093 , 1.00623466, -1.07241102, -4.2051104 ]), 0.0, True, {})
1 (array([ 0.39243399, 1.18228649, -1.15651323, -4.58957817]), 0.0, True, {})
0 (array([ 0.41607972, 0.98569126, -1.24830479, -4.74000295]), 0.0, True, {})
1 (array([ 0.43579355, 1.15330217, -1.34310485, -5.09852842]), 0.0, True, {})
0 (array([ 0.45885959, 0.95069694, -1.44507542, -5.31633944]), 0.0, True, {})
1 (array([ 0.47787353, 1.10885584, -1.55140221, -5.63776636]), 0.0, True, {})
0 (array([ 0.50005065, 0.89840189, -1.66415753, -5.92558911]), 0.0, True, {})
1 (array([ 0.51801868, 1.04728635, -1.78266932, -6.19748896]), 0.0, True, {})
1 (array([ 0.53896441, 1.19265907, -1.9066191 , -6.43905881]), 0.0, True, {})
0 (array([ 0.56281759, 0.9694434 , -2.03540027, -6.8269756 ]), 0.0, True, {})
1 (array([ 0.58220646, 1.10995261, -2.17193978, -6.99537465]), 0.0, True, {})
1 (array([ 0.60440551, 1.25194689, -2.31184728, -7.11736842]), 0.0, True, {})
0 (array([ 0.62944445, 1.02236464, -2.45419465, -7.56674365]), 0.0, True, {})
0 (array([ 0.64989174, 0.79156453, -2.60552952, -8.02087295]), 0.0, True, {})
1 (array([ 0.66572303, 0.94539497, -2.76594698, -7.97265695]), 0.0, True, {})
0 (array([ 0.68463093, 0.72480004, -2.92540012, -8.38833727]), 0.0, True, {})
1 (array([ 0.69912693, 0.90159607, -3.09316686, -8.19238323]), 0.0, True, {})
1 (array([ 0.71715885, 1.09282223, -3.25701453, -7.92011187]), 0.0, True, {})
1 (array([ 0.7390153 , 1.29643517, -3.41541676, -7.58286589]), 0.0, True, {})
1 (array([ 0.764944 , 1.50932131, -3.56707408, -7.19593164]), 0.0, True, {})
1 (array([ 0.79513043, 1.72796143, -3.71099272, -6.77586112]), 0.0, True, {})
0 (array([ 0.82968966, 1.56692514, -3.84650994, -6.8208009 ]), 0.0, True, {})
1 (array([ 0.86102816, 1.79163219, -3.98292596, -6.37357017]), 0.0, True, {})
0 (array([ 0.8968608 , 1.63937504, -4.11039736, -6.30659575]), 0.0, True, {})
1 (array([ 0.9296483 , 1.86210161, -4.23652927, -5.87508807]), 0.0, True, {})
1 (array([ 0.96689034, 2.08037733, -4.35403104, -5.46376262]), 0.0, True, {})
1 (array([ 1.00849788, 2.29378967, -4.46330629, -5.07616186]), 0.0, True, {})
1 (array([ 1.05437368, 2.50236675, -4.56482953, -4.71410892]), 0.0, True, {})
0 (array([ 1.10442101, 2.34223902, -4.6591117 , -4.45861793]), 0.0, True, {})
1 (array([ 1.15126579, 2.54285305, -4.74828406, -4.14901041]), 0.0, True, {})
0 (array([ 1.20212285, 2.37618022, -4.83126427, -3.84622762]), 0.0, True, {})
0 (array([ 1.24964646, 2.20597884, -4.90818882, -3.5240248 ]), 0.0, True, {})
1 (array([ 1.29376603, 2.39681332, -4.97866932, -3.29133305]), 0.0, True, {})
1 (array([ 1.3417023 , 2.58563132, -5.04449598, -3.08222435]), 0.0, True, {})
0 (array([ 1.39341493, 2.40656089, -5.10614047, -2.71671425]), 0.0, True, {})
1 (array([ 1.44154615, 2.59169852, -5.16047475, -2.55175583]), 0.0, True, {})
0 (array([ 1.49338012, 2.40764187, -5.21150987, -2.16716872]), 0.0, True, {})
1 (array([ 1.54153295, 2.59044826, -5.25485324, -2.04028712]), 0.0, True, {})
0 (array([ 1.59334192, 2.40254822, -5.29565898, -1.64298947]), 0.0, True, {})
0 (array([ 1.64139288, 2.21270888, -5.32851877, -1.24076482]), 0.0, True, {})
0 (array([ 1.68564706, 2.0213738 , -5.35333407, -0.83497188]), 0.0, True, {})
0 (array([ 1.72607454, 1.82896821, -5.37003351, -0.42674746]), 0.0, True, {})
0 (array([ 1.7626539 , 1.63589771, -5.37856846, -0.01704302]), 0.0, True, {})
1 (array([ 1.79537185, 1.81591071, -5.37890932, 0.04722787]), 0.0, True, {})
1 (array([ 1.83169007, 1.99592821, -5.37796476, 0.11136031]), 0.0, True, {})
0 (array([ 1.87160863, 1.8026017 , -5.37573755, 0.52168155]), 0.0, True, {})
0 (array([ 1.90766067, 1.60950278, -5.36530392, 0.93168789]), 0.0, True, {})
0 (array([ 1.93985072, 1.41701926, -5.34667017, 1.34061875]), 0.0, True, {})
1 (array([ 1.96819111, 1.59811055, -5.31985779, 1.41646344]), 0.0, True, {})
1 (array([ 2.00015332, 1.77918556, -5.29152852, 1.50283153]), 0.0, True, {})
0 (array([ 2.03573703, 1.58908191, -5.26147189, 1.90495683]), 0.0, True, {})
1 (array([ 2.06751867, 1.77114604, -5.22337275, 2.01320955]), 0.0, True, {})
0 (array([ 2.10294159, 1.58378616, -5.18310856, 2.40709359]), 0.0, True, {})
0 (array([ 2.13461731, 1.39866468, -5.13496669, 2.79505537]), 0.0, True, {})
1 (array([ 2.16259061, 1.58408813, -5.07906558, 2.94912682]), 0.0, True, {})
0 (array([ 2.19427237, 1.40359669, -5.02008305, 3.32064619]), 0.0, True, {})
1 (array([ 2.2223443 , 1.59229119, -4.95367012, 3.51511579]), 0.0, True, {})
1 (array([ 2.25419013, 1.78265712, -4.88336781, 3.73236838]), 0.0, True, {})
1 (array([ 2.28984327, 1.97509402, -4.80872044, 3.97296764]), 0.0, True, {})
1 (array([ 2.32934515, 2.17003876, -4.72926109, 4.23747915]), 0.0, True, {})
1 (array([ 2.37274592, 2.36795688, -4.64451151, 4.5264286 ]), 0.0, True, {})
1 (array([ 2.42010506, 2.56932559, -4.55398293, 4.84023844]), 0.0, True, {})
0 (array([ 2.47149157, 2.41035079, -4.45717816, 5.09294147]), 0.0, True, {})
1 (array([ 2.51969859, 2.61915695, -4.35531934, 5.45648832]), 0.0, True, {})
1 (array([ 2.57208173, 2.83248791, -4.24618957, 5.84379272]), 0.0, True, {})
0 (array([ 2.62873149, 2.68169052, -4.12931371, 6.00474405]), 0.0, True, {})
0 (array([ 2.6823653 , 2.53024736, -4.00921883, 6.12509195]), 0.0, True, {})
1 (array([ 2.73297024, 2.75096585, -3.88671699, 6.56344181]), 0.0, True, {})
0 (array([ 2.78798956, 2.59667952, -3.75544816, 6.59269088]), 0.0, True, {})
0 (array([ 2.83992315, 2.43662172, -3.62359434, 6.5657869 ]), 0.0, True, {})
1 (array([ 2.88865559, 2.65371708, -3.4922786 , 6.99061383]), 0.0, True, {})
1 (array([ 2.94172993, 2.8679946 , -3.35246633, 7.39346922]), 0.0, True, {})
0 (array([ 2.99908982, 2.68754926, -3.20459694, 7.19033533]), 0.0, True, {})
1 (array([ 3.0528408 , 2.88669046, -3.06079024, 7.50696546]), 0.0, True, {})
In [14]:
env.render(close=True) # 画面を閉じる
In [15]:
#!python keyboard_agent.py CartPole-v1
#!python keyboard_agent.py LunarLander-v2
#!python keyboard_agent.py MountainCar-v0
#!python keyboard_agent.py SpaceInvaders-v0
#!python keyboard_agent.py Breakout-v0
#!python keyboard_agent.py Acrobot-v1
done==Trueとなったときにエピソードを終了するのであれば、以下のようなコードになる。
In [16]:
import numpy as np
np.set_printoptions(suppress=True) # Scientific Notation (例 1.0e-0.5)を使わない
In [17]:
all_obs = []
import gym
env = gym.make('CartPole-v0')
for i_episode in range(5): # 5エピソード回す
observation = env.reset() # 環境を初期化し、最初の観測を得る。
all_obs.append(observation) # 観測を記録
for t in range(100): # 各エピソードの最大ステップ数は100
env.render()
print(observation)
action = env.action_space.sample() # ランダム方策
observation, reward, done, info = env.step(action) # 選択行動の実行
all_obs.append(observation) # 観測を記録
if done:
print("Episode finished after {} timesteps\n".format(t+1))
break
[2017-01-18 13:50:44,875] Making new env: CartPole-v0
[ 0.02459009 0.02267429 0.00733899 0.03650138]
[ 0.02504358 -0.17255213 0.00806902 0.33149077]
[ 0.02159254 -0.367788 0.01469883 0.62670731]
[ 0.01423678 -0.17287427 0.02723298 0.3386896 ]
[ 0.01077929 -0.36837294 0.03400677 0.63983436]
[ 0.00341183 -0.5639521 0.04680346 0.94302955]
[-0.00786721 -0.75967236 0.06566405 1.25004364]
[-0.02306066 -0.56545057 0.09066492 0.97863018]
[-0.03436967 -0.37165342 0.11023752 0.7157474 ]
[-0.04180274 -0.56811459 0.12455247 1.0409946 ]
[-0.05316503 -0.3748474 0.14537236 0.78986339]
[-0.06066198 -0.57163462 0.16116963 1.12451976]
[-0.07209467 -0.76845932 0.18366003 1.46310961]
Episode finished after 13 timesteps
[ 0.01031005 -0.01777533 -0.02391008 0.0479302 ]
[ 0.00995455 -0.21254641 -0.02295148 0.33297442]
[ 0.00570362 -0.40733429 -0.01629199 0.61833215]
[-0.00244307 -0.2119886 -0.00392535 0.32056287]
[-0.00668284 -0.40705443 0.00248591 0.61200531]
[-0.01482393 -0.21196731 0.01472602 0.32010639]
[-0.01906327 -0.40729586 0.02112814 0.61739673]
[-0.02720919 -0.21247533 0.03347608 0.33144237]
[-0.0314587 -0.01784548 0.04010493 0.0495013 ]
[-0.03181561 0.17667914 0.04109495 -0.23026332]
[-0.02828202 0.37119049 0.03648969 -0.5097055 ]
[-0.02085821 0.56577989 0.02629558 -0.79066971]
[-0.00954262 0.37030692 0.01048218 -0.48983161]
[-0.00213648 0.56527944 0.00068555 -0.77919262]
[ 0.00916911 0.76039196 -0.0148983 -1.07165977]
[ 0.02437695 0.95570768 -0.0363315 -1.36898078]
[ 0.0434911 1.15126492 -0.06371111 -1.67280218]
[ 0.0665164 0.95693804 -0.09716716 -1.40062164]
[ 0.08565516 1.15312397 -0.12517959 -1.72203464]
[ 0.10871764 1.34943736 -0.15962028 -2.05090733]
[ 0.13570639 1.15627074 -0.20063843 -1.81157012]
Episode finished after 21 timesteps
[ 0.04223869 0.02704968 0.00565435 -0.0105924 ]
[ 0.04277969 0.22209008 0.0054425 -0.30148596]
[ 0.04722149 0.02689098 -0.00058722 -0.00709157]
[ 0.04775931 -0.16822254 -0.00072905 0.28540603]
[ 0.04439486 -0.36333409 0.00497907 0.57785893]
[ 0.03712818 -0.55852547 0.01653625 0.87210621]
[ 0.02595767 -0.36363226 0.03397837 0.5846678 ]
[ 0.01868502 -0.16900234 0.04567173 0.30287915]
[ 0.01530498 -0.36474444 0.05172931 0.60960895]
[ 0.00801009 -0.56054989 0.06392149 0.91812588]
[-0.00320091 -0.36634762 0.08228401 0.64619752]
[-0.01052786 -0.56251387 0.09520796 0.96361555]
[-0.02177814 -0.36879109 0.11448027 0.70229626]
[-0.02915396 -0.1754263 0.1285262 0.44773137]
[-0.03266249 0.01766559 0.13748082 0.19816526]
[-0.03230918 0.21058074 0.14144413 -0.0481849 ]
[-0.02809756 0.01374377 0.14048043 0.28556802]
[-0.02782269 -0.18307299 0.14619179 0.61905039]
[-0.03148415 -0.37990173 0.1585728 0.95397158]
[-0.03908218 -0.18722735 0.17765223 0.71501132]
[-0.04282673 -0.38430534 0.19195246 1.05793114]
Episode finished after 21 timesteps
[ 0.04541968 -0.04680079 0.04973245 0.0389427 ]
[ 0.04448366 0.14757403 0.0505113 -0.23764372]
[ 0.04743514 0.34193933 0.04575843 -0.51397582]
[ 0.05427393 0.14620381 0.03547891 -0.20723157]
[ 0.05719801 0.34080094 0.03133428 -0.48851496]
[ 0.06401403 0.14525124 0.02156398 -0.18612363]
[ 0.06691905 -0.0501725 0.01784151 0.11328314]
[ 0.0659156 0.14468932 0.02010717 -0.17371794]
[ 0.06880939 -0.05071455 0.01663281 0.12523972]
[ 0.0677951 0.14416522 0.01913761 -0.16214967]
[ 0.0706784 0.33900804 0.01589461 -0.44873428]
[ 0.07745856 0.14366491 0.00691993 -0.15108377]
[ 0.08033186 -0.05155544 0.00389825 0.14377419]
[ 0.07930075 0.14351047 0.00677374 -0.14767637]
[ 0.08217096 -0.05170783 0.00382021 0.14713582]
[ 0.0811368 0.14335921 0.00676293 -0.14433946]
[ 0.08400399 -0.05185894 0.00387614 0.15046935]
[ 0.08296681 0.1432073 0.00688552 -0.14098822]
[ 0.08583095 -0.05201259 0.00406576 0.153859 ]
[ 0.0847907 0.14305091 0.00714294 -0.13753851]
[ 0.08765172 -0.05217262 0.00439217 0.15738932]
[ 0.08660827 0.14288618 0.00753996 -0.13390475]
[ 0.08946599 -0.05234296 0.00486186 0.16114735]
[ 0.08841913 -0.24753418 0.00808481 0.4553601 ]
[ 0.08346845 -0.4427695 0.01719201 0.75058042]
[ 0.07461306 -0.24788883 0.03220362 0.46335681]
[ 0.06965528 -0.44345073 0.04147075 0.76601379]
[ 0.06078627 -0.24892358 0.05679103 0.48666285]
[ 0.0558078 -0.44479895 0.06652429 0.79668983]
[ 0.04691182 -0.25064986 0.08245808 0.52565407]
[ 0.04189882 -0.44682944 0.09297117 0.84313881]
[ 0.03296223 -0.64308891 0.10983394 1.16355079]
[ 0.02010045 -0.83945598 0.13310496 1.48855283]
[ 0.00331133 -1.0359243 0.16287601 1.81966596]
[-0.01740715 -1.23243911 0.19926933 2.15821096]
Episode finished after 35 timesteps
[-0.03284825 0.01277057 0.04278306 0.03211212]
[-0.03259284 -0.18293796 0.04342531 0.33798068]
[-0.0362516 -0.37865009 0.05018492 0.64403514]
[-0.0438246 -0.18426215 0.06306562 0.36756851]
[-0.04750985 -0.38022089 0.07041699 0.67945129]
[-0.05511426 -0.5762467 0.08400602 0.99344679]
[-0.0666392 -0.77238593 0.10387495 1.31128685]
[-0.08208692 -0.57872141 0.13010069 1.05284025]
[-0.09366134 -0.77530582 0.1511575 1.38336659]
[-0.10916746 -0.9719553 0.17882483 1.71924863]
Episode finished after 10 timesteps
In [18]:
env.render(close=True)
In [19]:
%matplotlib inline
import matplotlib.pyplot as plt
In [20]:
fig, ax = plt.subplots(figsize=(10, 4))
ax.plot(np.array(all_obs))
ax.legend(['x', 'x_dot', 'theta', 'theta_dot'])
Out[20]:
<matplotlib.legend.Legend at 0x10ca8de90>
可能な行動や観測は Space
オブジェクトで記述されている。
In [21]:
import gym
env = gym.make('CartPole-v0') # 'CartPole-v0' は環境ID
#env = gym.make('MountainCar-v0') # 'MountainCar-v0'という別の環境
#env = gym.make('MsPacman-v0') # 'MsPacman-v0'という別の環境
[2017-01-18 13:50:47,968] Making new env: CartPole-v0
In [22]:
print(env.action_space)
Discrete(2)
In [23]:
print(env.observation_space)
Box(4,)
In [24]:
env.action_space.n
Out[24]:
2
In [25]:
env.observation_space.high
Out[25]:
array([ 4.80000000e+00, 3.40282347e+38, 4.18879020e-01,
3.40282347e+38])
In [26]:
env.observation_space.low
Out[26]:
array([ -4.80000000e+00, -3.40282347e+38, -4.18879020e-01,
-3.40282347e+38])
スペースからサンプリングすることも、ある値がスペースに含まれているか調べることもできる。
In [27]:
from gym import spaces
In [28]:
space = spaces.Discrete(8) # {0, 1, 2, ..., 7}
In [29]:
# サンプリング
x = space.sample()
x
Out[29]:
2
In [30]:
assert space.contains(x)
In [31]:
assert space.n == 8
gym
の主な役割は、強化学習で使える多様な環境を提供すること。
In [32]:
from gym import envs
In [33]:
# 使用可能な環境を列挙
envs.registry.all()
Out[33]:
[EnvSpec(PredictActionsCartpole-v0),
EnvSpec(Asteroids-ramDeterministic-v0),
EnvSpec(Asteroids-ramDeterministic-v3),
EnvSpec(Gopher-ramDeterministic-v3),
EnvSpec(Gopher-ramDeterministic-v0),
EnvSpec(DoubleDunk-ramDeterministic-v3),
EnvSpec(DoubleDunk-ramDeterministic-v0),
EnvSpec(Tennis-ramNoFrameskip-v3),
EnvSpec(RoadRunner-ramDeterministic-v0),
EnvSpec(Robotank-ram-v3),
EnvSpec(CartPole-v0),
EnvSpec(CartPole-v1),
EnvSpec(Gopher-ram-v3),
EnvSpec(Gopher-ram-v0),
EnvSpec(Pooyan-ram-v0),
EnvSpec(Pooyan-ram-v3),
EnvSpec(SpaceInvaders-ram-v3),
EnvSpec(CarRacing-v0),
EnvSpec(SpaceInvaders-ram-v0),
EnvSpec(YarsRevenge-ramDeterministic-v0),
EnvSpec(SpaceInvadersDeterministic-v0),
EnvSpec(DoubleDunk-ram-v3),
EnvSpec(DoubleDunk-ram-v0),
EnvSpec(SpaceInvadersDeterministic-v3),
EnvSpec(Centipede-v3),
EnvSpec(Centipede-v0),
EnvSpec(Pitfall-ramNoFrameskip-v3),
EnvSpec(Pitfall-ramNoFrameskip-v0),
EnvSpec(Frostbite-ramNoFrameskip-v0),
EnvSpec(Phoenix-ram-v3),
EnvSpec(AmidarNoFrameskip-v3),
EnvSpec(SkiingNoFrameskip-v0),
EnvSpec(SkiingNoFrameskip-v3),
EnvSpec(HotterColder-v0),
EnvSpec(RoadRunner-ramDeterministic-v3),
EnvSpec(Phoenix-ram-v0),
EnvSpec(Tennis-ramNoFrameskip-v0),
EnvSpec(Berzerk-ramNoFrameskip-v3),
EnvSpec(Berzerk-ramNoFrameskip-v0),
EnvSpec(AirRaidDeterministic-v3),
EnvSpec(AirRaidDeterministic-v0),
EnvSpec(ChopperCommandDeterministic-v3),
EnvSpec(AirRaidNoFrameskip-v0),
EnvSpec(AirRaidNoFrameskip-v3),
EnvSpec(ChopperCommandDeterministic-v0),
EnvSpec(Asteroids-ram-v0),
EnvSpec(Asteroids-ram-v3),
EnvSpec(KrullDeterministic-v0),
EnvSpec(Atlantis-ramDeterministic-v3),
EnvSpec(Atlantis-ramDeterministic-v0),
EnvSpec(KrullDeterministic-v3),
EnvSpec(OffSwitchCartpoleProb-v0),
EnvSpec(TimePilot-v3),
EnvSpec(Go19x19-v0),
EnvSpec(TimePilot-v0),
EnvSpec(Solaris-ram-v0),
EnvSpec(Solaris-ram-v3),
EnvSpec(VentureDeterministic-v3),
EnvSpec(FishingDerbyNoFrameskip-v3),
EnvSpec(FishingDerbyNoFrameskip-v0),
EnvSpec(Robotank-ram-v0),
EnvSpec(Qbert-v3),
EnvSpec(ReversedAddition-v0),
EnvSpec(Qbert-v0),
EnvSpec(Pitfall-v0),
EnvSpec(Pitfall-v3),
EnvSpec(RiverraidNoFrameskip-v0),
EnvSpec(RiverraidNoFrameskip-v3),
EnvSpec(BipedalWalkerHardcore-v2),
EnvSpec(Venture-ram-v3),
EnvSpec(Venture-ram-v0),
EnvSpec(Tennis-v0),
EnvSpec(Tennis-v3),
EnvSpec(MontezumaRevenge-ramNoFrameskip-v0),
EnvSpec(MontezumaRevenge-ramNoFrameskip-v3),
EnvSpec(Go9x9-v0),
EnvSpec(MountainCarContinuous-v0),
EnvSpec(SemisuperPendulumNoise-v0),
EnvSpec(Reacher-v1),
EnvSpec(ChopperCommand-ramNoFrameskip-v0),
EnvSpec(Taxi-v2),
EnvSpec(Pong-v3),
EnvSpec(Pong-v0),
EnvSpec(UpNDownDeterministic-v0),
EnvSpec(UpNDownDeterministic-v3),
EnvSpec(Enduro-v0),
EnvSpec(Enduro-v3),
EnvSpec(Zaxxon-ramDeterministic-v3),
EnvSpec(Krull-ramNoFrameskip-v0),
EnvSpec(Krull-ramNoFrameskip-v3),
EnvSpec(ElevatorAction-ramNoFrameskip-v3),
EnvSpec(ElevatorAction-ramNoFrameskip-v0),
EnvSpec(Venture-ramNoFrameskip-v3),
EnvSpec(QbertNoFrameskip-v3),
EnvSpec(Venture-ramNoFrameskip-v0),
EnvSpec(StarGunner-ramNoFrameskip-v3),
EnvSpec(StarGunner-ramNoFrameskip-v0),
EnvSpec(NameThisGame-ram-v3),
EnvSpec(YarsRevenge-ramDeterministic-v3),
EnvSpec(Breakout-ram-v0),
EnvSpec(Breakout-ram-v3),
EnvSpec(PrivateEye-ramNoFrameskip-v3),
EnvSpec(Bowling-v0),
EnvSpec(Bowling-v3),
EnvSpec(PrivateEye-ramNoFrameskip-v0),
EnvSpec(BattleZoneDeterministic-v3),
EnvSpec(BattleZoneDeterministic-v0),
EnvSpec(PitfallNoFrameskip-v0),
EnvSpec(PitfallNoFrameskip-v3),
EnvSpec(AirRaid-ramDeterministic-v0),
EnvSpec(AirRaid-ramDeterministic-v3),
EnvSpec(CentipedeNoFrameskip-v0),
EnvSpec(Skiing-ram-v0),
EnvSpec(CentipedeNoFrameskip-v3),
EnvSpec(EnduroDeterministic-v3),
EnvSpec(VentureNoFrameskip-v0),
EnvSpec(SpaceInvaders-ramNoFrameskip-v3),
EnvSpec(Freeway-ram-v3),
EnvSpec(Skiing-ram-v3),
EnvSpec(ConvergenceControl-v0),
EnvSpec(Riverraid-ramNoFrameskip-v3),
EnvSpec(Riverraid-ramNoFrameskip-v0),
EnvSpec(ChopperCommand-v3),
EnvSpec(ChopperCommand-v0),
EnvSpec(Pooyan-v0),
EnvSpec(Pooyan-v3),
EnvSpec(BattleZoneNoFrameskip-v0),
EnvSpec(PrivateEye-v3),
EnvSpec(PrivateEye-v0),
EnvSpec(BattleZoneNoFrameskip-v3),
EnvSpec(FrozenLake8x8-v0),
EnvSpec(Alien-ramNoFrameskip-v0),
EnvSpec(Alien-ramNoFrameskip-v3),
EnvSpec(WizardOfWor-ramDeterministic-v0),
EnvSpec(TutankhamDeterministic-v0),
EnvSpec(TutankhamDeterministic-v3),
EnvSpec(LunarLanderContinuous-v2),
EnvSpec(UpNDown-ramDeterministic-v3),
EnvSpec(UpNDown-ramDeterministic-v0),
EnvSpec(Phoenix-ramNoFrameskip-v3),
EnvSpec(Phoenix-ramNoFrameskip-v0),
EnvSpec(Asterix-ram-v3),
EnvSpec(Asterix-ram-v0),
EnvSpec(Jamesbond-ramNoFrameskip-v3),
EnvSpec(Jamesbond-ramNoFrameskip-v0),
EnvSpec(JourneyEscape-v0),
EnvSpec(JourneyEscape-v3),
EnvSpec(BipedalWalker-v2),
EnvSpec(CrazyClimberDeterministic-v3),
EnvSpec(CrazyClimberDeterministic-v0),
EnvSpec(FishingDerby-ramDeterministic-v3),
EnvSpec(QbertDeterministic-v0),
EnvSpec(SpaceInvaders-ramDeterministic-v3),
EnvSpec(QbertDeterministic-v3),
EnvSpec(SolarisDeterministic-v0),
EnvSpec(SolarisDeterministic-v3),
EnvSpec(YarsRevengeDeterministic-v0),
EnvSpec(YarsRevengeDeterministic-v3),
EnvSpec(SpaceInvaders-ramDeterministic-v0),
EnvSpec(TwoRoundDeterministicReward-v0),
EnvSpec(Bowling-ramNoFrameskip-v3),
EnvSpec(Bowling-ramNoFrameskip-v0),
EnvSpec(JourneyEscapeDeterministic-v3),
EnvSpec(NameThisGame-ramNoFrameskip-v0),
EnvSpec(TwoRoundNondeterministicReward-v0),
EnvSpec(AmidarNoFrameskip-v0),
EnvSpec(TimePilotNoFrameskip-v0),
EnvSpec(MsPacmanDeterministic-v0),
EnvSpec(MsPacmanDeterministic-v3),
EnvSpec(Pooyan-ramDeterministic-v0),
EnvSpec(Frostbite-ramNoFrameskip-v3),
EnvSpec(PhoenixDeterministic-v3),
EnvSpec(PhoenixDeterministic-v0),
EnvSpec(CrazyClimber-ram-v0),
EnvSpec(MontezumaRevenge-ram-v0),
EnvSpec(MontezumaRevenge-ram-v3),
EnvSpec(CrazyClimber-ram-v3),
EnvSpec(StarGunner-ramDeterministic-v0),
EnvSpec(StarGunner-ramDeterministic-v3),
EnvSpec(Centipede-ramNoFrameskip-v3),
EnvSpec(Centipede-ramNoFrameskip-v0),
EnvSpec(BeamRider-ramDeterministic-v0),
EnvSpec(BeamRider-ramDeterministic-v3),
EnvSpec(KungFuMaster-v0),
EnvSpec(KungFuMaster-v3),
EnvSpec(Jamesbond-ramDeterministic-v0),
EnvSpec(BreakoutDeterministic-v3),
EnvSpec(BreakoutDeterministic-v0),
EnvSpec(Jamesbond-ramDeterministic-v3),
EnvSpec(IceHockey-v0),
EnvSpec(IceHockey-v3),
EnvSpec(Venture-ramDeterministic-v0),
EnvSpec(Carnival-ram-v0),
EnvSpec(Venture-ramDeterministic-v3),
EnvSpec(PongDeterministic-v3),
EnvSpec(RobotankDeterministic-v0),
EnvSpec(RobotankDeterministic-v3),
EnvSpec(PongDeterministic-v0),
EnvSpec(Pong-ramDeterministic-v0),
EnvSpec(Pong-ramDeterministic-v3),
EnvSpec(NameThisGame-ramNoFrameskip-v3),
EnvSpec(Berzerk-v3),
EnvSpec(Berzerk-v0),
EnvSpec(SemisuperPendulumDecay-v0),
EnvSpec(MontezumaRevenge-v0),
EnvSpec(JourneyEscape-ramDeterministic-v3),
EnvSpec(JourneyEscape-ramDeterministic-v0),
EnvSpec(MontezumaRevenge-v3),
EnvSpec(AirRaid-ram-v0),
EnvSpec(AirRaid-ram-v3),
EnvSpec(Zaxxon-v0),
EnvSpec(BeamRiderDeterministic-v3),
EnvSpec(YarsRevenge-ramNoFrameskip-v0),
EnvSpec(YarsRevenge-ramNoFrameskip-v3),
EnvSpec(BeamRiderDeterministic-v0),
EnvSpec(RoadRunner-ramNoFrameskip-v0),
EnvSpec(TimePilotNoFrameskip-v3),
EnvSpec(Phoenix-ramDeterministic-v0),
EnvSpec(StarGunner-ram-v0),
EnvSpec(Phoenix-ramDeterministic-v3),
EnvSpec(Hex9x9-v0),
EnvSpec(Skiing-v0),
EnvSpec(Skiing-v3),
EnvSpec(StarGunner-ram-v3),
EnvSpec(Boxing-ramDeterministic-v0),
EnvSpec(Boxing-ramDeterministic-v3),
EnvSpec(AsteroidsDeterministic-v3),
EnvSpec(PrivateEye-ram-v3),
EnvSpec(Pooyan-ramDeterministic-v3),
EnvSpec(Centipede-ramDeterministic-v0),
EnvSpec(Centipede-ramDeterministic-v3),
EnvSpec(JourneyEscapeNoFrameskip-v3),
EnvSpec(JourneyEscapeNoFrameskip-v0),
EnvSpec(BattleZone-ramDeterministic-v0),
EnvSpec(BattleZone-ramDeterministic-v3),
EnvSpec(NameThisGameNoFrameskip-v0),
EnvSpec(NameThisGameNoFrameskip-v3),
EnvSpec(Seaquest-ram-v3),
EnvSpec(Seaquest-ram-v0),
EnvSpec(AsteroidsDeterministic-v0),
EnvSpec(ElevatorAction-ram-v3),
EnvSpec(ElevatorAction-ram-v0),
EnvSpec(ChopperCommand-ramDeterministic-v0),
EnvSpec(Zaxxon-ramNoFrameskip-v3),
EnvSpec(ChopperCommand-ramDeterministic-v3),
EnvSpec(Krull-ramDeterministic-v3),
EnvSpec(Krull-ramDeterministic-v0),
EnvSpec(BankHeistDeterministic-v0),
EnvSpec(BankHeistDeterministic-v3),
EnvSpec(VideoPinballDeterministic-v3),
EnvSpec(Reverse-v0),
EnvSpec(Zaxxon-ramNoFrameskip-v0),
EnvSpec(SeaquestDeterministic-v0),
EnvSpec(SeaquestDeterministic-v3),
EnvSpec(JourneyEscape-ram-v0),
EnvSpec(JourneyEscape-ram-v3),
EnvSpec(BerzerkDeterministic-v3),
EnvSpec(BerzerkDeterministic-v0),
EnvSpec(AssaultNoFrameskip-v0),
EnvSpec(Enduro-ramDeterministic-v0),
EnvSpec(Enduro-ramDeterministic-v3),
EnvSpec(AssaultNoFrameskip-v3),
EnvSpec(QbertNoFrameskip-v0),
EnvSpec(Gopher-ramNoFrameskip-v0),
EnvSpec(IceHockey-ramDeterministic-v0),
EnvSpec(IceHockey-ramDeterministic-v3),
EnvSpec(Gopher-ramNoFrameskip-v3),
EnvSpec(PhoenixNoFrameskip-v0),
EnvSpec(PhoenixNoFrameskip-v3),
EnvSpec(Humanoid-v1),
EnvSpec(NameThisGame-ram-v0),
EnvSpec(Tutankham-ramNoFrameskip-v0),
EnvSpec(Tutankham-ramNoFrameskip-v3),
EnvSpec(MsPacman-ramNoFrameskip-v0),
EnvSpec(ReversedAddition3-v0),
EnvSpec(Assault-ramDeterministic-v0),
EnvSpec(Atlantis-ramNoFrameskip-v0),
EnvSpec(Atlantis-ramNoFrameskip-v3),
EnvSpec(Assault-ramDeterministic-v3),
EnvSpec(Skiing-ramNoFrameskip-v0),
EnvSpec(BreakoutNoFrameskip-v0),
EnvSpec(BreakoutNoFrameskip-v3),
EnvSpec(KungFuMaster-ram-v0),
EnvSpec(KungFuMaster-ram-v3),
EnvSpec(OneRoundNondeterministicReward-v0),
EnvSpec(NameThisGame-ramDeterministic-v0),
EnvSpec(NameThisGame-ramDeterministic-v3),
EnvSpec(RoadRunner-ramNoFrameskip-v3),
EnvSpec(Frostbite-ramDeterministic-v3),
EnvSpec(Frostbite-ramDeterministic-v0),
EnvSpec(BankHeist-ramNoFrameskip-v0),
EnvSpec(BankHeist-ramNoFrameskip-v3),
EnvSpec(Qbert-ramNoFrameskip-v0),
EnvSpec(Ant-v1),
EnvSpec(Qbert-ramNoFrameskip-v3),
EnvSpec(Skiing-ramNoFrameskip-v3),
EnvSpec(YarsRevenge-ram-v0),
EnvSpec(YarsRevenge-ram-v3),
EnvSpec(FrostbiteNoFrameskip-v3),
EnvSpec(FishingDerby-ram-v0),
EnvSpec(FishingDerby-ram-v3),
EnvSpec(FrostbiteNoFrameskip-v0),
EnvSpec(BeamRiderNoFrameskip-v0),
EnvSpec(Enduro-ramNoFrameskip-v3),
EnvSpec(Enduro-ramNoFrameskip-v0),
EnvSpec(BeamRiderNoFrameskip-v3),
EnvSpec(CentipedeDeterministic-v3),
EnvSpec(Gravitar-ramNoFrameskip-v0),
EnvSpec(Gravitar-ramNoFrameskip-v3),
EnvSpec(CentipedeDeterministic-v0),
EnvSpec(Kangaroo-ram-v3),
EnvSpec(Alien-ram-v3),
EnvSpec(Kangaroo-ram-v0),
EnvSpec(VideoPinball-ramNoFrameskip-v0),
EnvSpec(VideoPinball-ramNoFrameskip-v3),
EnvSpec(StarGunnerDeterministic-v3),
EnvSpec(StarGunnerDeterministic-v0),
EnvSpec(PongNoFrameskip-v0),
EnvSpec(PongNoFrameskip-v3),
EnvSpec(TimePilotDeterministic-v3),
EnvSpec(TimePilotDeterministic-v0),
EnvSpec(CNNClassifierTraining-v0),
EnvSpec(Boxing-ram-v0),
EnvSpec(Boxing-ram-v3),
EnvSpec(Tennis-ramDeterministic-v0),
EnvSpec(StarGunner-v0),
EnvSpec(StarGunner-v3),
EnvSpec(Tennis-ramDeterministic-v3),
EnvSpec(DemonAttackNoFrameskip-v0),
EnvSpec(DemonAttackNoFrameskip-v3),
EnvSpec(PitfallDeterministic-v3),
EnvSpec(Assault-ram-v3),
EnvSpec(PooyanDeterministic-v0),
EnvSpec(PooyanDeterministic-v3),
EnvSpec(Assault-ram-v0),
EnvSpec(Amidar-ram-v3),
EnvSpec(PitfallDeterministic-v0),
EnvSpec(Amidar-ram-v0),
EnvSpec(ChopperCommandNoFrameskip-v0),
EnvSpec(ChopperCommandNoFrameskip-v3),
EnvSpec(Tutankham-ramDeterministic-v0),
EnvSpec(VentureDeterministic-v0),
EnvSpec(ElevatorActionDeterministic-v3),
EnvSpec(Solaris-ramDeterministic-v3),
EnvSpec(Solaris-ramDeterministic-v0),
EnvSpec(ElevatorActionDeterministic-v0),
EnvSpec(Riverraid-ram-v0),
EnvSpec(Riverraid-ram-v3),
EnvSpec(Solaris-v0),
EnvSpec(KungFuMasterNoFrameskip-v3),
EnvSpec(BattleZone-v3),
EnvSpec(BattleZone-v0),
EnvSpec(KungFuMasterNoFrameskip-v0),
EnvSpec(MsPacmanNoFrameskip-v3),
EnvSpec(MsPacmanNoFrameskip-v0),
EnvSpec(VideoPinballNoFrameskip-v3),
EnvSpec(Breakout-ramDeterministic-v0),
EnvSpec(Breakout-ramDeterministic-v3),
EnvSpec(VideoPinballNoFrameskip-v0),
EnvSpec(PrivateEye-ramDeterministic-v0),
EnvSpec(WizardOfWor-ram-v3),
EnvSpec(WizardOfWor-ram-v0),
EnvSpec(PrivateEye-ramDeterministic-v3),
EnvSpec(Gravitar-v0),
EnvSpec(RoadRunner-v3),
EnvSpec(RoadRunner-v0),
EnvSpec(Gravitar-v3),
EnvSpec(RoadRunner-ram-v3),
EnvSpec(Jamesbond-ram-v3),
EnvSpec(RoadRunner-ram-v0),
EnvSpec(MsPacman-ram-v0),
EnvSpec(MsPacman-ram-v3),
EnvSpec(Riverraid-ramDeterministic-v0),
EnvSpec(Riverraid-ramDeterministic-v3),
EnvSpec(Jamesbond-ram-v0),
EnvSpec(UpNDownNoFrameskip-v3),
EnvSpec(VideoPinball-ram-v3),
EnvSpec(VideoPinball-ram-v0),
EnvSpec(UpNDownNoFrameskip-v0),
EnvSpec(OffSwitchCartpole-v0),
EnvSpec(WizardOfWorNoFrameskip-v3),
EnvSpec(WizardOfWorNoFrameskip-v0),
EnvSpec(FreewayNoFrameskip-v3),
EnvSpec(FreewayNoFrameskip-v0),
EnvSpec(WizardOfWor-ramDeterministic-v3),
EnvSpec(Asterix-ramNoFrameskip-v0),
EnvSpec(Asterix-ramNoFrameskip-v3),
EnvSpec(AlienNoFrameskip-v3),
EnvSpec(AlienNoFrameskip-v0),
EnvSpec(BankHeist-ramDeterministic-v3),
EnvSpec(BankHeist-ramDeterministic-v0),
EnvSpec(InvertedDoublePendulum-v1),
EnvSpec(Asterix-v3),
EnvSpec(WizardOfWor-ramNoFrameskip-v0),
EnvSpec(Asterix-v0),
EnvSpec(AsteroidsNoFrameskip-v0),
EnvSpec(AsteroidsNoFrameskip-v3),
EnvSpec(Pong-ramNoFrameskip-v3),
EnvSpec(JamesbondDeterministic-v3),
EnvSpec(WizardOfWor-ramNoFrameskip-v3),
EnvSpec(ZaxxonDeterministic-v3),
EnvSpec(ZaxxonDeterministic-v0),
EnvSpec(Pong-ramNoFrameskip-v0),
EnvSpec(ChopperCommand-ram-v3),
EnvSpec(ChopperCommand-ram-v0),
EnvSpec(SpaceInvaders-ramNoFrameskip-v0),
EnvSpec(SeaquestNoFrameskip-v3),
EnvSpec(JamesbondDeterministic-v0),
EnvSpec(BowlingDeterministic-v3),
EnvSpec(BowlingDeterministic-v0),
EnvSpec(SemisuperPendulumRandom-v0),
EnvSpec(BankHeist-v0),
EnvSpec(BankHeist-v3),
EnvSpec(TimePilot-ramDeterministic-v0),
EnvSpec(TimePilot-ramDeterministic-v3),
EnvSpec(NChain-v0),
EnvSpec(FishingDerby-ramDeterministic-v0),
EnvSpec(SeaquestNoFrameskip-v0),
EnvSpec(StarGunnerNoFrameskip-v0),
EnvSpec(Seaquest-v3),
EnvSpec(CrazyClimber-ramNoFrameskip-v3),
EnvSpec(CrazyClimber-ramNoFrameskip-v0),
EnvSpec(Seaquest-v0),
EnvSpec(CrazyClimber-v0),
EnvSpec(CrazyClimber-v3),
EnvSpec(MsPacman-ramDeterministic-v3),
EnvSpec(Pitfall-ramDeterministic-v0),
EnvSpec(Pitfall-ramDeterministic-v3),
EnvSpec(Enduro-ram-v0),
EnvSpec(MsPacman-ramDeterministic-v0),
EnvSpec(Enduro-ram-v3),
EnvSpec(GravitarDeterministic-v0),
EnvSpec(GravitarDeterministic-v3),
EnvSpec(Breakout-ramNoFrameskip-v3),
EnvSpec(Swimmer-v1),
EnvSpec(Alien-ram-v0),
EnvSpec(Breakout-ramNoFrameskip-v0),
EnvSpec(GravitarNoFrameskip-v0),
EnvSpec(VideoPinballDeterministic-v0),
EnvSpec(AsterixDeterministic-v0),
EnvSpec(AsterixDeterministic-v3),
EnvSpec(AlienDeterministic-v0),
EnvSpec(AlienDeterministic-v3),
EnvSpec(RoadRunnerDeterministic-v3),
EnvSpec(RoadRunnerDeterministic-v0),
EnvSpec(RepeatCopy-v0),
EnvSpec(FrostbiteDeterministic-v0),
EnvSpec(Bowling-ramDeterministic-v0),
EnvSpec(Bowling-ramDeterministic-v3),
EnvSpec(Carnival-ramDeterministic-v0),
EnvSpec(EnduroNoFrameskip-v0),
EnvSpec(EnduroNoFrameskip-v3),
EnvSpec(Carnival-ramDeterministic-v3),
EnvSpec(FrostbiteDeterministic-v3),
EnvSpec(Asteroids-ramNoFrameskip-v3),
EnvSpec(Asteroids-ramNoFrameskip-v0),
EnvSpec(TennisNoFrameskip-v0),
EnvSpec(DemonAttackDeterministic-v3),
EnvSpec(Pitfall-ram-v0),
EnvSpec(DemonAttackDeterministic-v0),
EnvSpec(TennisNoFrameskip-v3),
EnvSpec(DemonAttack-ram-v3),
EnvSpec(DemonAttack-ram-v0),
EnvSpec(UpNDown-v0),
EnvSpec(BankHeistNoFrameskip-v3),
EnvSpec(BankHeistNoFrameskip-v0),
EnvSpec(UpNDown-v3),
EnvSpec(Pitfall-ram-v3),
EnvSpec(Kangaroo-ramDeterministic-v0),
EnvSpec(Kangaroo-ramDeterministic-v3),
EnvSpec(RobotankNoFrameskip-v3),
EnvSpec(RobotankNoFrameskip-v0),
EnvSpec(WizardOfWor-v3),
EnvSpec(WizardOfWor-v0),
EnvSpec(Hopper-v1),
EnvSpec(Asterix-ramDeterministic-v3),
EnvSpec(Robotank-v0),
EnvSpec(BattleZone-ramNoFrameskip-v0),
EnvSpec(PrivateEyeDeterministic-v3),
EnvSpec(Pooyan-ramNoFrameskip-v0),
EnvSpec(Pooyan-ramNoFrameskip-v3),
EnvSpec(PrivateEyeDeterministic-v0),
EnvSpec(ElevatorActionNoFrameskip-v0),
EnvSpec(ElevatorActionNoFrameskip-v3),
EnvSpec(TutankhamNoFrameskip-v0),
EnvSpec(Zaxxon-ramDeterministic-v0),
EnvSpec(Robotank-v3),
EnvSpec(JamesbondNoFrameskip-v0),
EnvSpec(JamesbondNoFrameskip-v3),
EnvSpec(HumanoidStandup-v1),
EnvSpec(KungFuMaster-ramDeterministic-v3),
EnvSpec(KungFuMaster-ramDeterministic-v0),
EnvSpec(Amidar-v3),
EnvSpec(Amidar-v0),
EnvSpec(BattleZone-ramNoFrameskip-v3),
EnvSpec(BerzerkNoFrameskip-v0),
EnvSpec(BerzerkNoFrameskip-v3),
EnvSpec(Amidar-ramNoFrameskip-v3),
EnvSpec(Amidar-ramNoFrameskip-v0),
EnvSpec(Gravitar-ramDeterministic-v3),
EnvSpec(Gravitar-ramDeterministic-v0),
EnvSpec(Asterix-ramDeterministic-v0),
EnvSpec(BattleZone-ram-v3),
EnvSpec(BattleZone-ram-v0),
EnvSpec(IceHockey-ram-v0),
EnvSpec(IceHockey-ram-v3),
EnvSpec(ChopperCommand-ramNoFrameskip-v3),
EnvSpec(MountainCar-v0),
EnvSpec(Qbert-ramDeterministic-v3),
EnvSpec(Qbert-ramDeterministic-v0),
EnvSpec(BeamRider-ramNoFrameskip-v3),
EnvSpec(Carnival-ram-v3),
EnvSpec(Carnival-v0),
EnvSpec(FrozenLake-v0),
EnvSpec(IceHockeyNoFrameskip-v0),
EnvSpec(IceHockeyNoFrameskip-v3),
EnvSpec(NameThisGameDeterministic-v3),
EnvSpec(NameThisGameDeterministic-v0),
EnvSpec(BeamRider-ramNoFrameskip-v0),
EnvSpec(DoubleDunk-ramNoFrameskip-v0),
EnvSpec(Tutankham-ram-v3),
EnvSpec(DoubleDunk-ramNoFrameskip-v3),
EnvSpec(YarsRevenge-v0),
EnvSpec(IceHockey-ramNoFrameskip-v3),
EnvSpec(IceHockey-ramNoFrameskip-v0),
EnvSpec(YarsRevenge-v3),
EnvSpec(MsPacman-v0),
EnvSpec(Solaris-ramNoFrameskip-v0),
EnvSpec(Solaris-ramNoFrameskip-v3),
EnvSpec(MsPacman-v3),
EnvSpec(Gopher-v3),
EnvSpec(Walker2d-v1),
EnvSpec(Gopher-v0),
EnvSpec(Zaxxon-ram-v3),
EnvSpec(Zaxxon-ram-v0),
EnvSpec(DoubleDunkDeterministic-v0),
EnvSpec(DoubleDunkDeterministic-v3),
EnvSpec(PooyanNoFrameskip-v3),
EnvSpec(PooyanNoFrameskip-v0),
EnvSpec(Seaquest-ramNoFrameskip-v0),
EnvSpec(Seaquest-ramNoFrameskip-v3),
EnvSpec(FreewayDeterministic-v0),
EnvSpec(FreewayDeterministic-v3),
EnvSpec(Blackjack-v0),
EnvSpec(TennisDeterministic-v3),
EnvSpec(TennisDeterministic-v0),
EnvSpec(Atlantis-v0),
EnvSpec(Atlantis-v3),
EnvSpec(EnduroDeterministic-v0),
EnvSpec(GuessingGame-v0),
EnvSpec(Copy-v0),
EnvSpec(CrazyClimber-ramDeterministic-v0),
EnvSpec(CrazyClimber-ramDeterministic-v3),
EnvSpec(Phoenix-v3),
EnvSpec(Phoenix-v0),
EnvSpec(Alien-ramDeterministic-v3),
EnvSpec(FishingDerbyDeterministic-v3),
EnvSpec(CarnivalDeterministic-v3),
EnvSpec(Asteroids-v0),
EnvSpec(Asteroids-v3),
EnvSpec(CarnivalDeterministic-v0),
EnvSpec(Tutankham-ramDeterministic-v3),
EnvSpec(Robotank-ramDeterministic-v3),
EnvSpec(Robotank-ramDeterministic-v0),
EnvSpec(IceHockeyDeterministic-v3),
EnvSpec(IceHockeyDeterministic-v0),
EnvSpec(Centipede-ram-v3),
EnvSpec(FishingDerby-ramNoFrameskip-v0),
EnvSpec(FishingDerby-ramNoFrameskip-v3),
EnvSpec(Centipede-ram-v0),
EnvSpec(Solaris-v3),
EnvSpec(Tennis-ram-v0),
EnvSpec(Assault-v3),
EnvSpec(Assault-v0),
EnvSpec(Tennis-ram-v3),
EnvSpec(HalfCheetah-v1),
EnvSpec(GopherNoFrameskip-v3),
EnvSpec(GopherNoFrameskip-v0),
EnvSpec(WizardOfWorDeterministic-v0),
EnvSpec(WizardOfWorDeterministic-v3),
EnvSpec(TimePilot-ram-v3),
EnvSpec(DoubleDunk-v3),
EnvSpec(DoubleDunk-v0),
EnvSpec(Tutankham-v0),
EnvSpec(LunarLander-v2),
EnvSpec(Tutankham-v3),
EnvSpec(BeamRider-v3),
EnvSpec(BeamRider-v0),
EnvSpec(CarnivalNoFrameskip-v0),
EnvSpec(BoxingDeterministic-v3),
EnvSpec(BoxingDeterministic-v0),
EnvSpec(CarnivalNoFrameskip-v3),
EnvSpec(Alien-v0),
EnvSpec(Alien-v3),
EnvSpec(Berzerk-ram-v3),
EnvSpec(Berzerk-ram-v0),
EnvSpec(PredictObsCartpole-v0),
EnvSpec(AmidarDeterministic-v3),
EnvSpec(AmidarDeterministic-v0),
EnvSpec(SolarisNoFrameskip-v0),
EnvSpec(GravitarNoFrameskip-v3),
EnvSpec(AssaultDeterministic-v3),
EnvSpec(Gravitar-ram-v0),
EnvSpec(Gravitar-ram-v3),
EnvSpec(AssaultDeterministic-v0),
EnvSpec(Frostbite-v3),
EnvSpec(Venture-v3),
EnvSpec(Venture-v0),
EnvSpec(Frostbite-v0),
EnvSpec(Acrobot-v1),
EnvSpec(Boxing-v0),
EnvSpec(Boxing-v3),
EnvSpec(ZaxxonNoFrameskip-v0),
EnvSpec(DemonAttack-v3),
EnvSpec(DemonAttack-v0),
EnvSpec(ZaxxonNoFrameskip-v3),
EnvSpec(Freeway-v0),
EnvSpec(NameThisGame-v0),
EnvSpec(NameThisGame-v3),
EnvSpec(Freeway-v3),
EnvSpec(KungFuMasterDeterministic-v0),
EnvSpec(KungFuMasterDeterministic-v3),
EnvSpec(RoadRunnerNoFrameskip-v0),
EnvSpec(RoadRunnerNoFrameskip-v3),
EnvSpec(Bowling-ram-v0),
EnvSpec(Bowling-ram-v3),
EnvSpec(Seaquest-ramDeterministic-v3),
EnvSpec(Krull-ram-v3),
EnvSpec(Krull-ram-v0),
EnvSpec(Seaquest-ramDeterministic-v0),
EnvSpec(TimePilot-ramNoFrameskip-v3),
EnvSpec(TimePilot-ramNoFrameskip-v0),
EnvSpec(BowlingNoFrameskip-v0),
EnvSpec(BowlingNoFrameskip-v3),
EnvSpec(UpNDown-ramNoFrameskip-v0),
EnvSpec(UpNDown-ramNoFrameskip-v3),
EnvSpec(Assault-ramNoFrameskip-v3),
EnvSpec(Assault-ramNoFrameskip-v0),
EnvSpec(KungFuMaster-ramNoFrameskip-v0),
EnvSpec(KungFuMaster-ramNoFrameskip-v3),
EnvSpec(PrivateEye-ram-v0),
EnvSpec(BankHeist-ram-v0),
EnvSpec(BankHeist-ram-v3),
EnvSpec(YarsRevengeNoFrameskip-v3),
EnvSpec(YarsRevengeNoFrameskip-v0),
EnvSpec(ElevatorAction-v3),
EnvSpec(ElevatorAction-v0),
EnvSpec(DemonAttack-ramDeterministic-v0),
EnvSpec(DemonAttack-ramDeterministic-v3),
EnvSpec(Carnival-ramNoFrameskip-v3),
EnvSpec(Carnival-ramNoFrameskip-v0),
EnvSpec(FishingDerby-v0),
EnvSpec(FishingDerby-v3),
EnvSpec(MontezumaRevengeDeterministic-v0),
EnvSpec(MontezumaRevengeDeterministic-v3),
EnvSpec(UpNDown-ram-v3),
EnvSpec(DoubleDunkNoFrameskip-v3),
EnvSpec(DoubleDunkNoFrameskip-v0),
EnvSpec(AsterixNoFrameskip-v3),
EnvSpec(AsterixNoFrameskip-v0),
EnvSpec(SolarisNoFrameskip-v3),
EnvSpec(Amidar-ramDeterministic-v0),
EnvSpec(Amidar-ramDeterministic-v3),
EnvSpec(Pong-ram-v3),
EnvSpec(Pong-ram-v0),
EnvSpec(ElevatorAction-ramDeterministic-v0),
EnvSpec(JourneyEscapeDeterministic-v0),
EnvSpec(Pendulum-v0),
EnvSpec(ElevatorAction-ramDeterministic-v3),
EnvSpec(Freeway-ramDeterministic-v3),
EnvSpec(Freeway-ramDeterministic-v0),
EnvSpec(Breakout-v0),
EnvSpec(Breakout-v3),
EnvSpec(BeamRider-ram-v3),
EnvSpec(Zaxxon-v3),
EnvSpec(InvertedPendulum-v1),
EnvSpec(BeamRider-ram-v0),
EnvSpec(VideoPinball-v3),
EnvSpec(VideoPinball-v0),
EnvSpec(MsPacman-ramNoFrameskip-v3),
EnvSpec(PrivateEyeNoFrameskip-v0),
EnvSpec(PrivateEyeNoFrameskip-v3),
EnvSpec(AtlantisDeterministic-v0),
EnvSpec(AtlantisDeterministic-v3),
EnvSpec(Berzerk-ramDeterministic-v0),
EnvSpec(Berzerk-ramDeterministic-v3),
EnvSpec(AirRaid-ramNoFrameskip-v3),
EnvSpec(AirRaid-ramNoFrameskip-v0),
EnvSpec(Roulette-v0),
EnvSpec(Atlantis-ram-v0),
EnvSpec(Atlantis-ram-v3),
EnvSpec(Freeway-ramNoFrameskip-v0),
EnvSpec(Freeway-ramNoFrameskip-v3),
EnvSpec(Boxing-ramNoFrameskip-v3),
EnvSpec(Boxing-ramNoFrameskip-v0),
EnvSpec(Jamesbond-v3),
EnvSpec(Jamesbond-v0),
EnvSpec(Skiing-ramDeterministic-v0),
EnvSpec(SpaceInvaders-v3),
EnvSpec(Skiing-ramDeterministic-v3),
EnvSpec(CrazyClimberNoFrameskip-v0),
EnvSpec(CrazyClimberNoFrameskip-v3),
EnvSpec(KangarooNoFrameskip-v0),
EnvSpec(AtlantisNoFrameskip-v3),
EnvSpec(AtlantisNoFrameskip-v0),
EnvSpec(KangarooNoFrameskip-v3),
EnvSpec(SpaceInvaders-v0),
EnvSpec(SpaceInvadersNoFrameskip-v3),
EnvSpec(StarGunnerNoFrameskip-v3),
EnvSpec(Kangaroo-ramNoFrameskip-v3),
EnvSpec(Kangaroo-ramNoFrameskip-v0),
EnvSpec(FishingDerbyDeterministic-v0),
EnvSpec(SpaceInvadersNoFrameskip-v0),
EnvSpec(DuplicatedInput-v0),
EnvSpec(Robotank-ramNoFrameskip-v0),
EnvSpec(Robotank-ramNoFrameskip-v3),
EnvSpec(Qbert-ram-v3),
EnvSpec(DemonAttack-ramNoFrameskip-v3),
EnvSpec(DemonAttack-ramNoFrameskip-v0),
EnvSpec(Frostbite-ram-v3),
EnvSpec(GopherDeterministic-v0),
EnvSpec(GopherDeterministic-v3),
EnvSpec(Frostbite-ram-v0),
EnvSpec(Alien-ramDeterministic-v0),
EnvSpec(VideoPinball-ramDeterministic-v3),
EnvSpec(OneRoundDeterministicReward-v0),
EnvSpec(VideoPinball-ramDeterministic-v0),
EnvSpec(Qbert-ram-v0),
EnvSpec(Tutankham-ram-v0),
EnvSpec(TutankhamNoFrameskip-v3),
EnvSpec(SkiingDeterministic-v3),
EnvSpec(Freeway-ram-v0),
EnvSpec(KangarooDeterministic-v3),
EnvSpec(AirRaid-v0),
EnvSpec(AirRaid-v3),
EnvSpec(KangarooDeterministic-v0),
EnvSpec(VentureNoFrameskip-v3),
EnvSpec(Krull-v3),
EnvSpec(JourneyEscape-ramNoFrameskip-v0),
EnvSpec(JourneyEscape-ramNoFrameskip-v3),
EnvSpec(Krull-v0),
EnvSpec(KrullNoFrameskip-v3),
EnvSpec(Riverraid-v0),
EnvSpec(Riverraid-v3),
EnvSpec(KrullNoFrameskip-v0),
EnvSpec(MontezumaRevenge-ramDeterministic-v3),
EnvSpec(MontezumaRevenge-ramDeterministic-v0),
EnvSpec(RiverraidDeterministic-v3),
EnvSpec(RiverraidDeterministic-v0),
EnvSpec(Carnival-v3),
EnvSpec(TimePilot-ram-v0),
EnvSpec(MontezumaRevengeNoFrameskip-v3),
EnvSpec(BoxingNoFrameskip-v0),
EnvSpec(BoxingNoFrameskip-v3),
EnvSpec(MontezumaRevengeNoFrameskip-v0),
EnvSpec(SkiingDeterministic-v0),
EnvSpec(UpNDown-ram-v0),
EnvSpec(Kangaroo-v3),
EnvSpec(Kangaroo-v0)]
自分で環境を作ることも可能
ある環境でのパフォーマンスを計測し、同時にビデオに記録するには、環境(env)をMonitorでラッピングすれば良い。
In [34]:
import gym
from gym import wrappers # ラッパの呼び出し
env = gym.make('CartPole-v0')
env = wrappers.Monitor(env, './cartpole-v0-experiment-1', force=True) # envをMonitorでラッピング。force=Trueで前の結果を削除。
for i_episode in range(10):
observation = env.reset()
for t in range(100):
env.render()
print(observation)
action = env.action_space.sample()
observation, reward, done, info = env.step(action)
if done:
print("Episode finished after {} timesteps".format(t+1))
break
env.render(close=True)
[2017-01-18 13:50:48,084] Making new env: CartPole-v0
[2017-01-18 13:50:48,091] DEPRECATION WARNING: env.spec.timestep_limit has been deprecated. Replace your call to `env.spec.timestep_limit` with `env.spec.tags.get('wrapper_config.TimeLimit.max_episode_steps')`. This change was made 12/28/2016 and is included in version 0.7.0
[2017-01-18 13:50:48,092] Clearing 8 monitor files from previous run (because force=True was provided)
[2017-01-18 13:50:48,094] Starting new video recorder writing to /Users/otsuka/git/pydata.okinawa/meetup021_main/3_openai_gym/cartpole-v0-experiment-1/openaigym.video.0.17434.video000000.mp4
[ 0.01426023 -0.01140579 0.04470146 -0.04862813]
[ 0.01403212 0.18304762 0.0437289 -0.32687902]
[ 0.01769307 -0.01266874 0.03719132 -0.02073292]
[ 0.01743969 -0.20830378 0.03677666 0.28344843]
[ 0.01327362 -0.01372515 0.04244563 0.00258774]
[ 0.01299912 0.18076318 0.04249738 -0.27640672]
[ 0.01661438 -0.0149385 0.03696925 0.02937122]
[ 0.01631561 -0.21057057 0.03755667 0.33348531]
[ 0.0121042 -0.40620638 0.04422638 0.63777115]
[ 0.00398007 -0.21172814 0.0569818 0.35933736]
[-0.00025449 -0.01746056 0.06416855 0.08515252]
[-0.0006037 -0.21344084 0.0658716 0.39737053]
[-0.00487252 -0.40943248 0.07381901 0.71007265]
[2017-01-18 13:50:48,917] Starting new video recorder writing to /Users/otsuka/git/pydata.okinawa/meetup021_main/3_openai_gym/cartpole-v0-experiment-1/openaigym.video.0.17434.video000001.mp4
[-0.01306117 -0.60549495 0.08802046 1.02504851]
[-0.02517107 -0.80167169 0.10852143 1.34401905]
[-0.0412045 -0.99797843 0.13540181 1.66858966]
[-0.06116407 -0.80466577 0.16877361 1.42096131]
[-0.07725739 -1.00142538 0.19719283 1.76129145]
Episode finished after 18 timesteps
[-0.04992798 0.03723891 0.03524338 -0.00555281]
[-0.0491832 -0.1583703 0.03513232 0.2980382 ]
[-0.0523506 -0.35397498 0.04109309 0.60159095]
[-0.0594301 -0.54964693 0.05312491 0.90692907]
[-0.07042304 -0.35528293 0.07126349 0.63140575]
[-0.0775287 -0.16122384 0.0838916 0.36198968]
[-0.08075318 0.03261175 0.0911314 0.09689404]
[-0.08010094 -0.16369014 0.09306928 0.41688058]
[-0.08337474 -0.35999933 0.10140689 0.73739286]
[-0.09057473 -0.16641321 0.11615475 0.478268 ]
[-0.093903 0.02689374 0.12572011 0.22433419]
[-0.09336512 0.2200156 0.13020679 -0.0261979 ]
[-0.08896481 0.4130532 0.12968283 -0.2751305 ]
[-0.08070374 0.21634245 0.12418022 0.05547792]
[-0.0763769 0.01967896 0.12528978 0.38461677]
[-0.07598332 -0.17697835 0.13298211 0.71402939]
[-0.07952288 0.01607646 0.1472627 0.46598553]
[-0.07920135 0.20884426 0.15658241 0.22310241]
[-0.07502447 0.01187128 0.16104446 0.56079458]
[-0.07478704 0.20441031 0.17226035 0.32286579]
[-0.07069884 0.00730765 0.17871767 0.66453851]
[-0.07055268 -0.18979081 0.19200844 1.00774099]
Episode finished after 22 timesteps
[-0.03497605 -0.0335939 -0.04518329 -0.00131335]
[-0.03564792 -0.22803971 -0.04520956 0.27677833]
[-0.04020872 -0.0323029 -0.03967399 -0.02981397]
[-0.04085478 0.16336488 -0.04027027 -0.33474577]
[-0.03758748 0.35903613 -0.04696519 -0.63985096]
[-0.03040676 0.55478032 -0.05976221 -0.94694617]
[-0.01931115 0.36051187 -0.07870113 -0.67362334]
[-0.01210091 0.16656686 -0.0921736 -0.40672058]
[-0.00876958 -0.02713544 -0.10030801 -0.1444612 ]
[-0.00931228 0.1692693 -0.10319723 -0.46702855]
[-0.0059269 0.36568633 -0.11253781 -0.79037281]
[ 0.00138683 0.5621589 -0.12834526 -1.11623375]
[ 0.01263001 0.36893349 -0.15066994 -0.86640955]
[ 0.02000868 0.56574933 -0.16799813 -1.20241819]
[ 0.03132366 0.76259698 -0.19204649 -1.54269212]
Episode finished after 15 timesteps
[-0.0310065 -0.01032053 0.01595647 -0.01433684]
[-0.03121291 0.184569 0.01566974 -0.30194291]
[-0.02752153 -0.01077274 0.00963088 -0.00435956]
[-0.02773698 -0.20603148 0.00954369 0.2913464 ]
[-0.03185761 -0.0110469 0.01537062 0.00168865]
[-0.03207855 0.18385128 0.01540439 -0.2861053 ]
[-0.02840152 -0.01148694 0.00968228 0.01139597]
[-0.02863126 -0.2067464 0.0099102 0.30711798]
[-0.03276619 -0.40200815 0.01605256 0.60290978]
[-0.04080635 -0.20711436 0.02811076 0.31532601]
[-0.04494864 -0.01240387 0.03441728 0.03163915]
[-0.04519672 -0.20800205 0.03505006 0.33497934]
[-0.04935676 -0.01339601 0.04174965 0.05355217]
[-0.04962468 0.18110321 0.04282069 -0.22567161]
[-0.04600261 0.37558785 0.03830726 -0.50454571]
[-0.03849086 0.57014959 0.02821635 -0.78491463]
[-0.02708787 0.76487271 0.01251805 -1.06858854]
[-0.01179041 0.56958744 -0.00885372 -0.77200337]
[-0.00039866 0.37458842 -0.02429379 -0.48211927]
[ 0.00709311 0.17981764 -0.03393617 -0.19719099]
[ 0.01068946 0.37540815 -0.03787999 -0.50038314]
[ 0.01819762 0.1808401 -0.04788765 -0.21987449]
[ 0.02181442 0.3766127 -0.05228514 -0.52727033]
[ 0.02934668 0.1822639 -0.06283055 -0.25151033]
[ 0.03299196 0.37822416 -0.06786076 -0.56333075]
[ 0.04055644 0.57422941 -0.07912737 -0.87659767]
[ 0.05204103 0.38026694 -0.09665933 -0.6098035 ]
[ 0.05964637 0.18661957 -0.1088554 -0.34906193]
[ 0.06337876 -0.00679941 -0.11583663 -0.09259102]
[ 0.06324277 0.18977588 -0.11768845 -0.41945778]
[ 0.06703829 -0.00349897 -0.12607761 -0.1660715 ]
[ 0.06696831 -0.19661185 -0.12939904 0.08432994]
[ 0.06303607 0.00010466 -0.12771244 -0.24621513]
[ 0.06303816 0.19679734 -0.13263674 -0.57629678]
[ 0.06697411 0.39350469 -0.14416268 -0.90764713]
[ 0.0748442 0.59025302 -0.16231562 -1.24194536]
[ 0.08664926 0.39754331 -0.18715453 -1.00419186]
[ 0.09460013 0.59460485 -0.20723837 -1.34932522]
Episode finished after 38 timesteps
[ 0.04466735 -0.01779831 -0.04298495 0.0288087 ]
[ 0.04431139 -0.21227831 -0.04240877 0.30762555]
[ 0.04006582 -0.40677112 -0.03625626 0.58663802]
[ 0.0319304 -0.60136702 -0.0245235 0.86768303]
[ 0.01990306 -0.40592012 -0.00716984 0.56739164]
[ 0.01178465 -0.21069833 0.00417799 0.27245857]
[ 0.00757069 -0.01563625 0.00962716 -0.01890368]
[ 0.00725796 -0.21089493 0.00924909 0.27680111]
[ 0.00304006 -0.01590614 0.01478511 -0.01295036]
[ 0.00272194 -0.21123698 0.0145261 0.28436053]
[-0.0015028 -0.40656306 0.02021331 0.58158928]
[-0.00963406 -0.21173008 0.0318451 0.2953418 ]
[-0.01386866 -0.01707625 0.03775194 0.01286998]
[-0.01421019 0.17748452 0.03800934 -0.2676668 ]
[-0.0106605 0.37204399 0.032656 -0.54812316]
[-0.00321962 0.56669232 0.02169354 -0.83034084]
[ 0.00811423 0.76151114 0.00508672 -1.1161229 ]
[ 0.02334445 0.56632279 -0.01723574 -0.82184868]
[ 0.03467091 0.37144085 -0.03367271 -0.53463625]
[ 0.04209973 0.56701975 -0.04436544 -0.8377362 ]
[ 0.05344012 0.7627186 -0.06112016 -1.14403478]
[ 0.06869449 0.95858345 -0.08400086 -1.45524161]
[ 0.08786616 1.15463011 -0.11310569 -1.77294193]
[ 0.11095876 1.35083137 -0.14856453 -2.09854556]
[ 0.13797539 1.54710213 -0.19053544 -2.43322617]
Episode finished after 25 timesteps
[-0.01865691 0.01393139 0.014746 0.02192529]
[-0.01837828 0.20883879 0.01518451 -0.26606887]
[-0.0142015 0.40374077 0.00986313 -0.55392406]
[-0.00612669 0.59872284 -0.00121535 -0.84348323]
[ 0.00584777 0.4036175 -0.01808502 -0.55118274]
[ 0.01392012 0.20875416 -0.02910867 -0.26425224]
[ 0.0180952 0.01405952 -0.03439372 0.01910926]
[ 0.01837639 -0.18055274 -0.03401153 0.30074525]
[ 0.01476534 0.01503706 -0.02799663 -0.00246724]
[ 0.01506608 0.21054911 -0.02804597 -0.30385031]
[ 0.01927706 0.40605929 -0.03412298 -0.60524469]
[ 0.02739825 0.21143072 -0.04622787 -0.32350199]
[ 0.03162686 0.40717939 -0.05269791 -0.63039748]
[ 0.03977045 0.60299554 -0.06530586 -0.93919996]
[ 0.05183036 0.79893419 -0.08408986 -1.2516677 ]
[ 0.06780904 0.99502684 -0.10912321 -1.56946064]
[ 0.08770958 1.19126936 -0.14051243 -1.89409212]
[ 0.11153497 1.38760807 -0.17839427 -2.22687247]
Episode finished after 18 timesteps
[ 0.04923225 -0.00464438 0.02190189 0.03952069]
[ 0.04913936 -0.20007344 0.02269231 0.33903262]
[ 0.04513789 -0.39551082 0.02947296 0.63878417]
[ 0.03722767 -0.59103105 0.04224864 0.94060095]
[ 0.02540705 -0.7866962 0.06106066 1.24625417]
[ 0.00967313 -0.59240824 0.08598574 0.97330604]
[-0.00217504 -0.39853863 0.10545187 0.70882467]
[-0.01014581 -0.59495092 0.11962836 1.03275194]
[-0.02204483 -0.4016056 0.1402834 0.77989362]
[-0.03007694 -0.59834871 0.15588127 1.11321738]
[-0.04204391 -0.79513543 0.17814562 1.45046214]
[-0.05794662 -0.60259332 0.20715486 1.21831796]
Episode finished after 12 timesteps
[-0.04965213 -0.04317467 -0.01058068 -0.0446757 ]
[-0.05051562 0.15209739 -0.0114742 -0.34067806]
[-0.04747367 -0.04285944 -0.01828776 -0.05163541]
[-0.04833086 0.1525199 -0.01932047 -0.35003171]
[-0.04528046 -0.04232203 -0.0263211 -0.06350323]
[-0.0461269 -0.2370569 -0.02759117 0.22076039]
[-0.05086804 -0.43177383 -0.02317596 0.50461384]
[-0.05950352 -0.62656162 -0.01308368 0.78990394]
[-0.07203475 -0.43126246 0.0027144 0.49313379]
[-0.08066 -0.2361789 0.01257707 0.20130755]
[-0.08538358 -0.04123906 0.01660322 -0.08738151]
[-0.08620836 -0.23659502 0.01485559 0.21049316]
[-0.09094026 -0.0416886 0.01906546 -0.07746684]
[-0.09177403 -0.2370786 0.01751612 0.22116981]
[-0.0965156 -0.04221134 0.02193952 -0.06593676]
[-0.09735983 -0.23764086 0.02062078 0.23358667]
[-0.10211265 -0.43305129 0.02529251 0.53270207]
[-0.11077367 -0.62851966 0.03594656 0.83324606]
[-0.12334407 -0.82411387 0.05261148 1.13701385]
[-0.13982635 -0.62971806 0.07535175 0.86128425]
[-0.15242071 -0.43569865 0.09257744 0.59321341]
[-0.16113468 -0.24198622 0.10444171 0.33106795]
[-0.1659744 -0.43842778 0.11106307 0.65477472]
[-0.17474296 -0.24501306 0.12415856 0.39902559]
[-0.17964322 -0.44165748 0.13213907 0.72813054]
[-0.18847637 -0.63833455 0.14670168 1.05931022]
[-0.20124306 -0.44542803 0.16788789 0.81603434]
[-0.21015162 -0.25295354 0.18420857 0.5805094 ]
[-0.21521069 -0.06082549 0.19581876 0.35104465]
[-0.2164272 -0.25811463 0.20283966 0.69852618]
Episode finished after 30 timesteps
[2017-01-18 13:50:51,966] Starting new video recorder writing to /Users/otsuka/git/pydata.okinawa/meetup021_main/3_openai_gym/cartpole-v0-experiment-1/openaigym.video.0.17434.video000008.mp4
[-0.01078491 -0.01191037 -0.04722849 -0.01581334]
[-0.01102311 0.18385596 -0.04754476 -0.32301542]
[-0.00734599 -0.01055786 -0.05400507 -0.04569712]
[-0.00755715 0.18529523 -0.05491901 -0.35491823]
[-0.00385125 -0.00900459 -0.06201737 -0.08004597]
[-0.00403134 0.18694904 -0.06361829 -0.39163277]
[-0.00029236 0.38291344 -0.07145095 -0.70367589]
[ 0.00736591 0.1888506 -0.08552447 -0.43431308]
[ 0.01114292 -0.00496302 -0.09421073 -0.16976878]
[ 0.01104366 0.19137229 -0.0976061 -0.49062276]
[ 0.01487111 0.38772586 -0.10741856 -0.81240188]
[ 0.02262563 0.19422633 -0.1236666 -0.5553459 ]
[ 0.02651015 0.39084779 -0.13477351 -0.88429108]
[ 0.03432711 0.58751696 -0.15245933 -1.21612324]
[ 0.04607745 0.78424055 -0.1767818 -1.55243541]
[ 0.06176226 0.98098658 -0.20783051 -1.89465851]
Episode finished after 16 timesteps
[ 0.01674005 -0.02430572 -0.04798115 -0.02958559]
[ 0.01625393 0.1714703 -0.04857286 -0.33701269]
[ 0.01968334 -0.02292798 -0.05531311 -0.06003399]
[ 0.01922478 -0.21721503 -0.05651379 0.2146971 ]
[ 0.01488048 -0.02133258 -0.05221985 -0.0952637 ]
[ 0.01445383 -0.2156687 -0.05412512 0.18049747]
[ 0.01014045 -0.01981569 -0.05051517 -0.12875685]
[ 0.00974414 -0.21417897 -0.05309031 0.14757102]
[ 0.00546056 -0.40850205 -0.05013889 0.42304372]
[-0.00270948 -0.21270697 -0.04167802 0.11498553]
[-0.00696362 -0.01701338 -0.0393783 -0.19054973]
[-0.00730389 0.17864914 -0.0431893 -0.49539022]
[-0.00373091 0.37435269 -0.0530971 -0.80136551]
[ 0.00375615 0.57016109 -0.06912441 -1.11026739]
[ 0.01515937 0.37601204 -0.09132976 -0.84004569]
[ 0.02267961 0.57225429 -0.10813068 -1.1599959 ]
[ 0.0341247 0.76860593 -0.13133059 -1.4845317 ]
[ 0.04949682 0.9650617 -0.16102123 -1.81517799]
[ 0.06879805 0.77205696 -0.19732479 -1.57655185]
Episode finished after 19 timesteps
In [35]:
#!open .
In [36]:
# 結果をOpenAI Gym側のサーバーにアップロードする方法。
import gym
#gym.upload('/tmp/cartpole-v0-experiment-1', api_key='YOUR_API_KEY')
Content source: PyDataOkinawa/meetup021
Similar notebooks: