In [0]:
#@title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
In this colab we will cover how to define custom networks for your agents. The networks help us define the model that is trained by agents. In TF-Agents you will find several different types of networks which are useful across agents:
Main Networks
ValueNetworks in literature, learns to estimate some version of a Value function mapping some state into an estimate for the expected return of a policy. These networks estimate how good the state the agent is currently in is.ActorNetworks but these generate a distribution which a policy can then sample to generate actions.Helper Networks
CategoricalProjectionNetwork or NormalProjectionNetwork take inputs and generate the required parameters to generate Categorical, or Normal distributions.All examples in TF-Agents come with pre-configured networks. However these networks are not setup to handle complex observations.
If you have an environment which exposes more than one observation/action and you need to customize your networks then this tutorial is for you!
If you haven't installed tf-agents yet, run:
In [0]:
!pip install --pre tf-agents[reverb]
In [0]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import abc
import tensorflow as tf
import numpy as np
from tf_agents.environments import random_py_environment
from tf_agents.environments import tf_py_environment
from tf_agents.networks import encoding_network
from tf_agents.networks import network
from tf_agents.networks import utils
from tf_agents.specs import array_spec
from tf_agents.utils import common as common_utils
from tf_agents.utils import nest_utils
tf.compat.v1.enable_v2_behavior()
In TF-Agents we subclass from Keras Networks. With it we can:
network.variables().As mentioned above the EncodingNetwork allows us to easily define a mapping of pre-processing layers to apply to a network's input to generate some encoding.
The EncodingNetwork is composed of the following mostly optional layers:
The special thing about encoding networks is that input preprocessing is applied. Input preprocessing is possible via preprocessing_layers and preprocessing_combiner layers. Each of these can be specified as a nested structure. If the preprocessing_layers nest is shallower than input_tensor_spec, then the layers will get the subnests. For example, if:
input_tensor_spec = ([TensorSpec(3)] * 2, [TensorSpec(3)] * 5)
preprocessing_layers = (Layer1(), Layer2())
then preprocessing will call:
preprocessed = [preprocessing_layers[0](observations[0]),
preprocessing_layers[1](obsrevations[1])]
However if
preprocessing_layers = ([Layer1() for _ in range(2)],
[Layer2() for _ in range(5)])
then preprocessing will call:
preprocessed = [
layer(obs) for layer, obs in zip(flatten(preprocessing_layers),
flatten(observations))
]
In [0]:
class ActorNetwork(network.Network):
def __init__(self,
observation_spec,
action_spec,
preprocessing_layers=None,
preprocessing_combiner=None,
conv_layer_params=None,
fc_layer_params=(75, 40),
dropout_layer_params=None,
activation_fn=tf.keras.activations.relu,
enable_last_layer_zero_initializer=False,
name='ActorNetwork'):
super(ActorNetwork, self).__init__(
input_tensor_spec=observation_spec, state_spec=(), name=name)
# For simplicity we will only support a single action float output.
self._action_spec = action_spec
flat_action_spec = tf.nest.flatten(action_spec)
if len(flat_action_spec) > 1:
raise ValueError('Only a single action is supported by this network')
self._single_action_spec = flat_action_spec[0]
if self._single_action_spec.dtype not in [tf.float32, tf.float64]:
raise ValueError('Only float actions are supported by this network.')
kernel_initializer = tf.keras.initializers.VarianceScaling(
scale=1. / 3., mode='fan_in', distribution='uniform')
self._encoder = encoding_network.EncodingNetwork(
observation_spec,
preprocessing_layers=preprocessing_layers,
preprocessing_combiner=preprocessing_combiner,
conv_layer_params=conv_layer_params,
fc_layer_params=fc_layer_params,
dropout_layer_params=dropout_layer_params,
activation_fn=activation_fn,
kernel_initializer=kernel_initializer,
batch_squash=False)
initializer = tf.keras.initializers.RandomUniform(
minval=-0.003, maxval=0.003)
self._action_projection_layer = tf.keras.layers.Dense(
flat_action_spec[0].shape.num_elements(),
activation=tf.keras.activations.tanh,
kernel_initializer=initializer,
name='action')
def call(self, observations, step_type=(), network_state=()):
outer_rank = nest_utils.get_outer_rank(observations, self.input_tensor_spec)
# We use batch_squash here in case the observations have a time sequence
# compoment.
batch_squash = utils.BatchSquash(outer_rank)
observations = tf.nest.map_structure(batch_squash.flatten, observations)
state, network_state = self._encoder(
observations, step_type=step_type, network_state=network_state)
actions = self._action_projection_layer(state)
actions = common_utils.scale_to_spec(actions, self._single_action_spec)
actions = batch_squash.unflatten(actions)
return tf.nest.pack_sequence_as(self._action_spec, [actions]), network_state
Let's create a RandomPyEnvironment to generate structured observations and validate our implementation.
In [0]:
action_spec = array_spec.BoundedArraySpec((3,), np.float32, minimum=0, maximum=10)
observation_spec = {
'image': array_spec.BoundedArraySpec((16, 16, 3), np.float32, minimum=0,
maximum=255),
'vector': array_spec.BoundedArraySpec((5,), np.float32, minimum=-100,
maximum=100)}
random_env = random_py_environment.RandomPyEnvironment(observation_spec, action_spec=action_spec)
# Convert the environment to a TFEnv to generate tensors.
tf_env = tf_py_environment.TFPyEnvironment(random_env)
Since we've defined the observations to be a dict we need to create preprocessing layers to handle these.
In [0]:
preprocessing_layers = {
'image': tf.keras.models.Sequential([tf.keras.layers.Conv2D(8, 4),
tf.keras.layers.Flatten()]),
'vector': tf.keras.layers.Dense(5)
}
preprocessing_combiner = tf.keras.layers.Concatenate(axis=-1)
actor = ActorNetwork(tf_env.observation_spec(),
tf_env.action_spec(),
preprocessing_layers=preprocessing_layers,
preprocessing_combiner=preprocessing_combiner)
Now that we have the actor network we can process observations from the environment.
In [0]:
time_step = tf_env.reset()
actor(time_step.observation, time_step.step_type)
This same strategy can be used to customize any of the main networks used by the agents. You can define whatever preprocessing and connect it to the rest of the network. As you define your own custom make sure the output layer definitions of the network match.