In [1]:
from ddpg3 import *
%matplotlib inline
[2016-07-24 13:13:46,563] Site environment registry incorrect: Scoreboard did not register all envs: set(['AcrobotContinuous-v0'])
In [2]:
car = ddpg(environment = 'Reacher-v1', noise_scale= 1.0, maxstep= 2e5, ql2= 0.01, learning_rates= (1e-4,1e-3))
car.main()
[2016-07-24 13:13:46,599] Making new env: Reacher-v1
state dim 11
action dim 2
result after minibatch no. 0 : mean squared error: 484.962677002
result after minibatch no. 1000 : mean squared error: 308.146331787
result after minibatch no. 2000 : mean squared error: 180.791763306
result after minibatch no. 3000 : mean squared error: 98.7425079346
result after minibatch no. 4000 : mean squared error: 46.9946746826
result after minibatch no. 5000 : mean squared error: 19.1027622223
---------------------------------------------------------------------------
KeyboardInterrupt Traceback (most recent call last)
<ipython-input-2-b81e7fe0bc0c> in <module>()
1 car = ddpg(environment = 'Reacher-v1', noise_scale= 1.0, maxstep= 2e5, ql2= 0.01, learning_rates= (1e-4,1e-3))
----> 2 car.main()
/home/frederik/Dokumente/DeepRL/dpg/ddpg3.py in main(self)
471
472 self.initialize_training(self.sess)
--> 473 return self.start_training()
474
475 def plot_replay_memory_2d_state_histogramm(self):
/home/frederik/Dokumente/DeepRL/dpg/ddpg3.py in start_training(self, dataname, save)
169
170 # run episode
--> 171 episode_length = self.run_episode(test_run= False, enable_render=False, limit= 10000)
172
173 self.train_lengths.append(episode_length)
/home/frederik/Dokumente/DeepRL/dpg/ddpg3.py in run_episode(self, enable_render, limit, test_run)
150
151 if (len(self.replay_memory) > self.warmup) and (self.samples_count % (self.batch_size/2) == 0):
--> 152 self.train_networks()
153
154 state = state_prime
/home/frederik/Dokumente/DeepRL/dpg/ddpg3.py in train_networks(self)
418
419 if self.tensorboard_logs:
--> 420 summary, _, _, mse_val = self.sess.run([self.merged, self.q_train_step, self.mu_train_step, self.q_loss], feed_dict= dict_)
421 if self.step % 10 == 0:
422 self.train_writer.add_summary(summary, self.step)
/home/frederik/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in run(self, fetches, feed_dict, options, run_metadata)
370 try:
371 result = self._run(None, fetches, feed_dict, options_ptr,
--> 372 run_metadata_ptr)
373 if run_metadata:
374 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)
/home/frederik/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _run(self, handle, fetches, feed_dict, options, run_metadata)
634 try:
635 results = self._do_run(handle, target_list, unique_fetches,
--> 636 feed_dict_string, options, run_metadata)
637 finally:
638 # The movers are no longer used. Delete them.
/home/frederik/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
706 if handle is None:
707 return self._do_call(_run_fn, self._session, feed_dict, fetch_list,
--> 708 target_list, options, run_metadata)
709 else:
710 return self._do_call(_prun_fn, self._session, handle, feed_dict,
/home/frederik/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _do_call(self, fn, *args)
713 def _do_call(self, fn, *args):
714 try:
--> 715 return fn(*args)
716 except errors.OpError as e:
717 message = compat.as_text(e.message)
/home/frederik/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _run_fn(session, feed_dict, fetch_list, target_list, options, run_metadata)
695 return tf_session.TF_Run(session, options,
696 feed_dict, fetch_list, target_list,
--> 697 status, run_metadata)
698
699 def _prun_fn(session, handle, feed_dict, fetch_list):
KeyboardInterrupt:
In [ ]:
In [ ]:
Content source: febert/DeepRL
Similar notebooks: