In [1]:
import tensorflow as tf
In [2]:
from drl.ddpg import DDPG
from drl.exploration import OrnSteinUhlenbeckNoise, WhiteNoise, LinearDecay
from drl.utilities import Statistics
from drl.env.arm import TwoLinkArm
In [3]:
ENV_NAME = "TwoLinkArm"
ALGO_NAME = "DDPG"
SAVE = False
SETTINGS = {
'learning_rate_actor': 0.0001,
'learning_rate_critic': 0.001,
'gamma': 0.95,
'tau': 0.001,
'hidden_nodes': [500, 500],
'batch_norm': False,
'batch_size': 32,
'buffer_size': 10000,
'num_updates_iter': 1
}
In [4]:
sess = tf.InteractiveSession()
In [5]:
stat = Statistics(sess, ENV_NAME, ALGO_NAME, DDPG.get_summary_tags(), settings=SETTINGS, save=SAVE)
In [ ]:
with tf.Session() as sess:
env = TwoLinkArm(g=0.)
stat = Statistics(sess, ENV_NAME, ALGO_NAME, DDPG.get_summary_tags(), settings=SETTINGS, save=SAVE)
# noise = OrnSteinUhlenbeckNoise(
# action_dim=env.action_dim,
# mu=0.,
# theta=0.05,
# sigma=0.05)
noise = WhiteNoise(env.action_dim, 0., 0.1)
noise = LinearDecay(noise, 100, 125)
ddpg = DDPG(sess=sess,
env=env,
stat=stat,
exploration=noise,
**SETTINGS)
ddpg.train(num_episodes=10000,
max_steps=200,
render_env=False)
sess.close()