In [1]:
from naf_core import*
%matplotlib inline
[2016-07-19 13:45:16,157] Site environment registry incorrect: Scoreboard did not register all envs: set(['AcrobotContinuous-v0'])
In [2]:
car = NAF()
car.main()
[2016-07-19 13:45:16,351] Making new env: MountainCarContinuous-v0
True action space: [ 0.], [ 2.]
True state space: [-1.2 -0.07], [ 0.6 0.07]
Filtered action space: [-1.], [ 1.]
Filtered state space: [-2.02205285 -2.88607976], [ 3.3058819 2.88506784]
state dim 2
action dim 1
[2016-07-19 13:45:17,944] Action '[[ 1.18974927]]' is not contained within action space 'Box(1,)'.
[2016-07-19 13:45:17,947] Observation '[[[ -4.55458212e-01]]
[[ -3.20103608e-04]]]' is not contained within observation space 'Box(2,)'.
writing output
result after minibatch no. 0 : mean squared error: 0.809489488602
plotting the mu() policy learned by NN
plotting the mu() policy learned by NN
plotting the Qfunction
Qfunction for action -1.0
Qfunction for action -0.5
Qfunction for action 0.0
Qfunction for action 0.5
Qfunction for action 1.0
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
writing output
---------------------------------------------------------------------------
KeyboardInterrupt Traceback (most recent call last)
<ipython-input-2-68a7539ee927> in <module>()
1 car = NAF()
----> 2 car.main()
/home/frederik/Dokumente/DeepRL/NAF/naf_core.pyc in main(self)
289 tf.gfile.MakeDirs(self.summaries_dir)
290
--> 291 self.run()
292
293
/home/frederik/Dokumente/DeepRL/NAF/naf_core.pyc in run(self, maxstep, max_episode_length)
218
219 # run episode
--> 220 episode_length = self.run_episode(test_run= False, enable_render=False, limit= max_episode_length)
221 self.train_lengths.append(episode_length)
222
/home/frederik/Dokumente/DeepRL/NAF/naf_core.pyc in run_episode(self, enable_render, limit, test_run)
200 # if (len(self.replay_memory) > self.warmup) and (self.samples_count % (self.batch_size/2) == 0):
201 if len(self.replay_memory) > self.warmup:
--> 202 self.train_networks()
203
204 state = state_prime
/home/frederik/Dokumente/DeepRL/NAF/naf_core.pyc in train_networks(self)
263
264 dict_ = feed_dict()
--> 265 summary, _, mse_val = self.sess.run([self.log_all, self.train_step, self.loss], feed_dict= dict_)
266
267 if self.step % 10 == 0:
/home/frederik/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in run(self, fetches, feed_dict, options, run_metadata)
370 try:
371 result = self._run(None, fetches, feed_dict, options_ptr,
--> 372 run_metadata_ptr)
373 if run_metadata:
374 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)
/home/frederik/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _run(self, handle, fetches, feed_dict, options, run_metadata)
582
583 # Validate and process fetches.
--> 584 processed_fetches = self._process_fetches(fetches)
585 unique_fetches = processed_fetches[0]
586 target_list = processed_fetches[1]
/home/frederik/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _process_fetches(self, fetches)
523 try:
524 fetch_t = self.graph.as_graph_element(subfetch, allow_tensor=True,
--> 525 allow_operation=True)
526 fetch_name = compat.as_bytes(fetch_t.name)
527 if isinstance(fetch_t, ops.Operation):
/home/frederik/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/ops.pyc in as_graph_element(self, obj, allow_tensor, allow_operation)
2320 """
2321 with self._lock:
-> 2322 return self._as_graph_element_locked(obj, allow_tensor, allow_operation)
2323
2324 def _as_graph_element_locked(self, obj, allow_tensor, allow_operation):
/home/frederik/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/ops.pyc in _as_graph_element_locked(self, obj, allow_tensor, allow_operation)
2340 raise ValueError("allow_tensor and allow_operation can't both be False.")
2341
-> 2342 temp_obj = _as_graph_element(obj)
2343 if temp_obj is not None:
2344 obj = temp_obj
/home/frederik/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/ops.pyc in _as_graph_element(obj)
121
122
--> 123 def _as_graph_element(obj):
124 """Convert `obj` to a graph element if possible, otherwise return `None`.
125
KeyboardInterrupt:
In [ ]:
Content source: febert/DeepRL
Similar notebooks: