In [1]:
from run import *
%matplotlib inline


[2016-07-25 12:36:33,211] Site environment registry incorrect: Scoreboard did not register all envs: set(['AcrobotContinuous-v0'])

In [2]:
e = Experiment()
e.run('Reacher-v1')


[2016-07-25 12:36:33,402] Making new env: Reacher-v1
at least one observation dim unbounded
('action center a_c', array([ 0.,  0.]))
('action scale a_c', array([ 1.,  1.]))
True action space: [-1. -1.], [ 1.  1.]
True state space: [-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf], [ inf  inf  inf  inf  inf  inf  inf  inf  inf  inf  inf]
Filtered action space: [-1. -1.], [ 1.  1.]
Filtered state space: [-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf], [ inf  inf  inf  inf  inf  inf  inf  inf  inf  inf  inf]
observationspace action space ((11,), (2,))
{'_entry_point': 'gym.envs.mujoco:ReacherEnv',
 '_env_name': 'Reacher',
 '_kwargs': {},
 '_local_only': False,
 'id': 'Reacher-v1',
 'nondeterministic': False,
 'reward_threshold': -3.75,
 'timestep_limit': 50,
 'trials': 100}
using train frequency 1
avg time for sim step: 0.0013977004
avg time for sim step: 0.0014184386
avg time for sim step: 0.0012886807
avg time for sim step: 0.0013628746
avg time for sim step: 0.0013880447
avg time for sim step: 0.0012705362
avg time for sim step: 0.0330415853
avg time for sim step: 0.0329183255
avg time for sim step: 0.0012469968
avg time for sim step: 0.0331341772
avg time for sim step: 0.0336642283
avg time for sim step: 0.0012318745
avg time for sim step: 0.0361001833
avg time for sim step: 0.0362101894
avg time for sim step: 0.0012994267
avg time for sim step: 0.0360359451
avg time for sim step: 0.0365916055
avg time for sim step: 0.0012326813
avg time for sim step: 0.0366618051
avg time for sim step: 0.03698188
avg time for sim step: 0.001216397
avg time for sim step: 0.037620646
avg time for sim step: 0.0371433901
avg time for sim step: 0.0012636503
avg time for sim step: 0.0366989999
avg time for sim step: 0.0368186062
avg time for sim step: 0.0012683049
avg time for sim step: 0.0373879112
avg time for sim step: 0.0372813725
avg time for sim step: 0.0012586797
avg time for sim step: 0.0379120124
avg time for sim step: 0.0384998255
avg time for sim step: 0.00124268979999
avg time for sim step: 0.0399967301
avg time for sim step: 0.0383027803
avg time for sim step: 0.00122902100001
avg time for sim step: 0.0386760052
avg time for sim step: 0.0392870517
avg time for sim step: 0.0012345944
avg time for sim step: 0.0400083723
avg time for sim step: 0.0448308407
avg time for sim step: 0.001236221
avg time for sim step: 0.0437003936
avg time for sim step: 0.0444121948
avg time for sim step: 0.00122921170001
avg time for sim step: 0.0473311487
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-2-7539d8718dd9> in <module>()
      1 e = Experiment()
----> 2 e.run('Reacher-v1')

/home/frederik/Dokumente/DeepRL/ddpg_working/run.pyc in run(self, env)
     92             it +=1
     93 
---> 94             episodelengths.append(self.run_episode(test=False))
     95 
     96 

/home/frederik/Dokumente/DeepRL/ddpg_working/run.pyc in run_episode(self, test, monitor)
    169             if t % self.train_frequency == 0:
    170                 perform_trainstep =True
--> 171             self.agent.observe(r_f, term, observation, test=test and not FLAGS.tot, perform_trainstep = perform_trainstep)
    172 
    173             if test:

/home/frederik/Dokumente/DeepRL/ddpg_working/ddpg.pyc in observe(self, rew, term, obs2, test, perform_trainstep)
    170             if self.t > FLAGS.warmup:
    171                 # print('warmed up')
--> 172                 if perform_trainstep: self.train()
    173 
    174             # elif FLAGS.warmq and self.rm.n > 1000:

/home/frederik/Dokumente/DeepRL/ddpg_working/ddpg.pyc in train(self)
    188 
    189         if FLAGS.async:
--> 190             self._train(obs, act, rew, ob2, term2, log=log, global_step=self.t)
    191         else:
    192             self._train_q(obs, act, rew, ob2, term2, log=log, global_step=self.t)

/home/frederik/Dokumente/DeepRL/ddpg_working/ddpg.pyc in __call__(self, *args, **kwargs)
    241 
    242         out = self._outputs + [self._summary_op] if log else self._outputs
--> 243         res = self._session.run(out, feeds)
    244 
    245         if log:

/home/frederik/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in run(self, fetches, feed_dict, options, run_metadata)
    370     try:
    371       result = self._run(None, fetches, feed_dict, options_ptr,
--> 372                          run_metadata_ptr)
    373       if run_metadata:
    374         proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)

/home/frederik/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _run(self, handle, fetches, feed_dict, options, run_metadata)
    634     try:
    635       results = self._do_run(handle, target_list, unique_fetches,
--> 636                              feed_dict_string, options, run_metadata)
    637     finally:
    638       # The movers are no longer used. Delete them.

/home/frederik/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
    706     if handle is None:
    707       return self._do_call(_run_fn, self._session, feed_dict, fetch_list,
--> 708                            target_list, options, run_metadata)
    709     else:
    710       return self._do_call(_prun_fn, self._session, handle, feed_dict,

/home/frederik/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _do_call(self, fn, *args)
    713   def _do_call(self, fn, *args):
    714     try:
--> 715       return fn(*args)
    716     except errors.OpError as e:
    717       message = compat.as_text(e.message)

/home/frederik/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _run_fn(session, feed_dict, fetch_list, target_list, options, run_metadata)
    695         return tf_session.TF_Run(session, options,
    696                                  feed_dict, fetch_list, target_list,
--> 697                                  status, run_metadata)
    698 
    699     def _prun_fn(session, handle, feed_dict, fetch_list):

KeyboardInterrupt: 

In [ ]: