In this tutorial we will show how to train a basic biomechanical model using keras-rl.
To make it work, follow the instructions in https://github.com/stanfordnmbl/osim-rl#getting-started i.e. run
conda create -n opensim-rl -c kidzik opensim git python=2.7
source activate opensim-rl
pip install git+https://github.com/stanfordnmbl/osim-rl.git
Then run
git clone https://github.com/stanfordnmbl/osim-rl.git
conda install keras -c conda-forge
pip install git+https://github.com/matthiasplappert/keras-rl.git
cd osim-rl
conda install jupyter
follow the instructions and once jupyter is installed and type
jupyter notebook
This should open the browser with jupyter. Navigate to this notebook, i.e. to the file scripts/train.arm.ipynb.
The following two blocks load necessary libraries and create a simulator environment.
In [1]:
# Derived from keras-rl
import opensim as osim
import numpy as np
import sys
from keras.models import Sequential, Model
from keras.layers import Dense, Activation, Flatten, Input, concatenate
from keras.optimizers import Adam
import numpy as np
from rl.agents import DDPGAgent
from rl.memory import SequentialMemory
from rl.random import OrnsteinUhlenbeckProcess
from osim.env.arm import ArmEnv
from keras.optimizers import RMSprop
import argparse
import math
In [2]:
# Load walking environment
env = ArmEnv(True)
env.reset()
# Total number of steps in training
nallsteps = 10000
nb_actions = env.action_space.shape[0]
In [3]:
# Create networks for DDPG
# Next, we build a very simple model.
actor = Sequential()
actor.add(Flatten(input_shape=(1,) + env.observation_space.shape))
actor.add(Dense(32))
actor.add(Activation('relu'))
actor.add(Dense(32))
actor.add(Activation('relu'))
actor.add(Dense(32))
actor.add(Activation('relu'))
actor.add(Dense(nb_actions))
actor.add(Activation('sigmoid'))
print(actor.summary())
In [4]:
action_input = Input(shape=(nb_actions,), name='action_input')
observation_input = Input(shape=(1,) + env.observation_space.shape, name='observation_input')
flattened_observation = Flatten()(observation_input)
x = concatenate([action_input, flattened_observation])
x = Dense(64)(x)
x = Activation('relu')(x)
x = Dense(64)(x)
x = Activation('relu')(x)
x = Dense(64)(x)
x = Activation('relu')(x)
x = Dense(1)(x)
x = Activation('linear')(x)
critic = Model(inputs=[action_input, observation_input], outputs=x)
print(critic.summary())
In [5]:
# Set up the agent for training
memory = SequentialMemory(limit=100000, window_length=1)
random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.2, size=env.noutput)
agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input,
memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100,
random_process=random_process, gamma=.99, target_model_update=1e-3,
delta_clip=1.)
agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])
In [6]:
# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
agent.fit(env, nb_steps=2000, visualize=False, verbose=0, nb_max_episode_steps=200, log_interval=10000)
# After training is done, we save the final weights.
# agent.save_weights(args.model, overwrite=True)
Out[6]:
In [7]:
# agent.load_weights(args.model)
# Finally, evaluate our algorithm for 1 episode.
agent.test(env, nb_episodes=2, visualize=False, nb_max_episode_steps=1000)
Out[7]:
In [9]:
agent.load_weights("../models/example.h5f")
# Finally, evaluate our algorithm for 1 episode.
agent.test(env, nb_episodes=5, visualize=False, nb_max_episode_steps=1000)
Out[9]: