DDPG with ILQG trajectory replay


TODO:

  • Choose exploration point based on replaybuffer composition
  • 2 choices:
    • Use trajectory as initialization for iLQG algorithm
    • Only give goal point to iLQG algorithm
  • Guide to goal for max_iter or goal reached
  • (Extend replaybuffer to also save trajectory)

In [1]:
import tensorflow as tf
from drl.ddpg import DDPG
from drl.env.arm import TwoLinkArm 
import numpy as np


Using TensorFlow backend.
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-1-d73374a5d54b> in <module>()
      1 import tensorflow as tf
----> 2 from drl.ddpg import DDPG
      3 from drl.env.arm import TwoLinkArm
      4 import numpy as np

/home/bartkeulen/repositories/drl/drl/ddpg/__init__.py in <module>()
----> 1 from .actor import ActorNetwork
      2 from .critic import CriticNetwork
      3 from .ddpg import DDPG

/home/bartkeulen/repositories/drl/drl/ddpg/actor.py in <module>()
      1 import tensorflow as tf
----> 2 from keras.models import  Model
      3 from keras.layers import Input, Dense, BatchNormalization, Lambda
      4 from keras.initializers import RandomUniform
      5 import keras.backend as K

/home/bartkeulen/anaconda3/envs/drl/lib/python3.5/site-packages/keras/__init__.py in <module>()
      1 from __future__ import absolute_import
      2 
----> 3 from . import activations
      4 from . import applications
      5 from . import backend

/home/bartkeulen/anaconda3/envs/drl/lib/python3.5/site-packages/keras/activations.py in <module>()
      4 from . import backend as K
      5 from .utils.generic_utils import deserialize_keras_object
----> 6 from .engine import Layer
      7 
      8 

/home/bartkeulen/anaconda3/envs/drl/lib/python3.5/site-packages/keras/engine/__init__.py in <module>()
      6 from .topology import Layer
      7 from .topology import get_source_inputs
----> 8 from .training import Model

/home/bartkeulen/anaconda3/envs/drl/lib/python3.5/site-packages/keras/engine/training.py in <module>()
     22 from .. import metrics as metrics_module
     23 from ..utils.generic_utils import Progbar
---> 24 from .. import callbacks as cbks
     25 from ..legacy import interfaces
     26 

/home/bartkeulen/anaconda3/envs/drl/lib/python3.5/site-packages/keras/callbacks.py in <module>()
     24 if K.backend() == 'tensorflow':
     25     import tensorflow as tf
---> 26     from tensorflow.contrib.tensorboard.plugins import projector
     27 
     28 

/home/bartkeulen/anaconda3/envs/drl/lib/python3.5/site-packages/tensorflow/contrib/__init__.py in <module>()
     29 from tensorflow.contrib import deprecated
     30 from tensorflow.contrib import distributions
---> 31 from tensorflow.contrib import factorization
     32 from tensorflow.contrib import framework
     33 from tensorflow.contrib import graph_editor

/home/bartkeulen/anaconda3/envs/drl/lib/python3.5/site-packages/tensorflow/contrib/factorization/__init__.py in <module>()
     22 from tensorflow.contrib.factorization.python.ops.clustering_ops import *
     23 from tensorflow.contrib.factorization.python.ops.factorization_ops import *
---> 24 from tensorflow.contrib.factorization.python.ops.gmm import *
     25 from tensorflow.contrib.factorization.python.ops.gmm_ops import *
     26 from tensorflow.contrib.factorization.python.ops.wals import *

/home/bartkeulen/anaconda3/envs/drl/lib/python3.5/site-packages/tensorflow/contrib/factorization/python/ops/gmm.py in <module>()
     25 from tensorflow.contrib.framework.python.framework import checkpoint_utils
     26 from tensorflow.contrib.framework.python.ops import variables
---> 27 from tensorflow.contrib.learn.python.learn.estimators import estimator
     28 from tensorflow.contrib.learn.python.learn.estimators import model_fn as model_fn_lib
     29 from tensorflow.python.framework import constant_op

/home/bartkeulen/anaconda3/envs/drl/lib/python3.5/site-packages/tensorflow/contrib/learn/__init__.py in <module>()
     86 
     87 # pylint: disable=wildcard-import
---> 88 from tensorflow.contrib.learn.python.learn import *
     89 # pylint: enable=wildcard-import
     90 

/home/bartkeulen/anaconda3/envs/drl/lib/python3.5/site-packages/tensorflow/contrib/learn/python/__init__.py in <module>()
     21 
     22 # pylint: disable=wildcard-import
---> 23 from tensorflow.contrib.learn.python.learn import *
     24 # pylint: enable=wildcard-import

/home/bartkeulen/anaconda3/envs/drl/lib/python3.5/site-packages/tensorflow/contrib/learn/python/learn/__init__.py in <module>()
     23 from tensorflow.contrib.learn.python.learn import basic_session_run_hooks
     24 from tensorflow.contrib.learn.python.learn import datasets
---> 25 from tensorflow.contrib.learn.python.learn import estimators
     26 from tensorflow.contrib.learn.python.learn import graph_actions
     27 from tensorflow.contrib.learn.python.learn import learn_io as io

/home/bartkeulen/anaconda3/envs/drl/lib/python3.5/site-packages/tensorflow/contrib/learn/python/learn/estimators/__init__.py in <module>()
    295 from tensorflow.contrib.learn.python.learn.estimators._sklearn import NotFittedError
    296 from tensorflow.contrib.learn.python.learn.estimators.constants import ProblemType
--> 297 from tensorflow.contrib.learn.python.learn.estimators.dnn import DNNClassifier
    298 from tensorflow.contrib.learn.python.learn.estimators.dnn import DNNEstimator
    299 from tensorflow.contrib.learn.python.learn.estimators.dnn import DNNRegressor

/home/bartkeulen/anaconda3/envs/drl/lib/python3.5/site-packages/tensorflow/contrib/learn/python/learn/estimators/dnn.py in <module>()
     21 import six
     22 
---> 23 from tensorflow.contrib import layers
     24 from tensorflow.contrib.framework import deprecated
     25 from tensorflow.contrib.framework import deprecated_arg_values

/home/bartkeulen/anaconda3/envs/drl/lib/python3.5/site-packages/tensorflow/contrib/layers/__init__.py in <module>()
     94 
     95 # pylint: disable=unused-import,wildcard-import
---> 96 from tensorflow.contrib.layers.python.layers import *
     97 # pylint: enable=unused-import,wildcard-import
     98 

/home/bartkeulen/anaconda3/envs/drl/lib/python3.5/site-packages/tensorflow/contrib/layers/python/layers/__init__.py in <module>()
     22 from tensorflow.contrib.layers.python.layers.embedding_ops import *
     23 from tensorflow.contrib.layers.python.layers.encoders import *
---> 24 from tensorflow.contrib.layers.python.layers.feature_column import *
     25 from tensorflow.contrib.layers.python.layers.feature_column_ops import *
     26 from tensorflow.contrib.layers.python.layers.initializers import *

/home/bartkeulen/anaconda3/envs/drl/lib/python3.5/site-packages/tensorflow/contrib/layers/python/layers/feature_column.py in <module>()
    136 from tensorflow.contrib.framework.python.ops import variables as contrib_variables
    137 from tensorflow.contrib.layers.python.layers import embedding_ops
--> 138 from tensorflow.contrib.layers.python.layers import layers
    139 from tensorflow.contrib.layers.python.ops import bucketization_op
    140 from tensorflow.contrib.layers.python.ops import sparse_feature_cross_op

/home/bartkeulen/anaconda3/envs/drl/lib/python3.5/site-packages/tensorflow/contrib/layers/python/layers/layers.py in <module>()
   1086     outputs_collections=None,
   1087     trainable=True,
-> 1088     scope=None):
   1089   """Adds a convolution2d_transpose with an optional batch normalization layer.
   1090 

/home/bartkeulen/anaconda3/envs/drl/lib/python3.5/site-packages/tensorflow/contrib/framework/python/ops/arg_scope.py in add_arg_scope(func)
    182   _add_op(func)
    183   setattr(func_with_args, '_key_op', _key_op(func))
--> 184   return tf_decorator.make_decorator(func, func_with_args)
    185 
    186 

/home/bartkeulen/anaconda3/envs/drl/lib/python3.5/site-packages/tensorflow/python/util/tf_decorator.py in make_decorator(target, decorator_func, decorator_name, decorator_doc, decorator_argspec)
     84   """
     85   if decorator_name is None:
---> 86     decorator_name = _inspect.stack()[1][3]  # Caller's name.
     87   decorator = TFDecorator(decorator_name, target, decorator_doc,
     88                           decorator_argspec)

/home/bartkeulen/anaconda3/envs/drl/lib/python3.5/inspect.py in stack(context)
   1463 def stack(context=1):
   1464     """Return a list of records for the stack above the caller's frame."""
-> 1465     return getouterframes(sys._getframe(1), context)
   1466 
   1467 def trace(context=1):

/home/bartkeulen/anaconda3/envs/drl/lib/python3.5/inspect.py in getouterframes(frame, context)
   1440     framelist = []
   1441     while frame:
-> 1442         frameinfo = (frame,) + getframeinfo(frame, context)
   1443         framelist.append(FrameInfo(*frameinfo))
   1444         frame = frame.f_back

/home/bartkeulen/anaconda3/envs/drl/lib/python3.5/inspect.py in getframeinfo(frame, context)
   1409         raise TypeError('{!r} is not a frame or traceback object'.format(frame))
   1410 
-> 1411     filename = getsourcefile(frame) or getfile(frame)
   1412     if context > 0:
   1413         start = lineno - 1 - context//2

/home/bartkeulen/anaconda3/envs/drl/lib/python3.5/inspect.py in getsourcefile(object)
    669         return filename
    670     # only return a non-existent filename if the module has a PEP 302 loader
--> 671     if getattr(getmodule(object, filename), '__loader__', None) is not None:
    672         return filename
    673     # or it is in the linecache

/home/bartkeulen/anaconda3/envs/drl/lib/python3.5/inspect.py in getmodule(object, _filename)
    706     # Copy sys.modules in order to cope with changes while iterating
    707     for modname, module in list(sys.modules.items()):
--> 708         if ismodule(module) and hasattr(module, '__file__'):
    709             f = module.__file__
    710             if f == _filesbymodname.get(modname, None):

KeyboardInterrupt: 

In [ ]:
env = TwoLinkArm(g=0.)

sess = tf.InteractiveSession()

initial =