In this tutorial we will show how to train a basic biomechanical model using keras-rl
.
To make it work, follow the instructions in https://github.com/stanfordnmbl/osim-rl#getting-started i.e. run
conda create -n opensim-rl -c kidzik opensim python=3.6.1
activate opensim-rl
pip install git+https://github.com/stanfordnmbl/osim-rl.git
Then run
pip install keras tensorflow keras-rl jupyter
git clone https://github.com/stanfordnmbl/osim-rl.git
cd osim-rl
follow the instructions and once jupyter is installed and type
jupyter notebook
This should open the browser with jupyter. Navigate to this notebook, i.e. to the file examples/train.arm.ipynb
.
The following two blocks load necessary libraries and create a simulator environment.
In [ ]:
import osim
import numpy as np
import sys
# Keras libraries
from keras.optimizers import Adam
import numpy as np
from helpers import *
from rl.agents import DDPGAgent
from rl.memory import SequentialMemory
from rl.random import OrnsteinUhlenbeckProcess
from keras.optimizers import RMSprop
import argparse
import math
In [ ]:
# Load arm environment
from osim.env import Arm2DEnv
env = Arm2DEnv(True)
In [ ]:
# Create networks for DDPG
# Next, we build a very simple model.
actor = policy_nn(env.observation_space.shape[0], env.action_space.shape[0], hidden_layers = 3, hidden_size = 32)
print(actor.summary())
In [ ]:
qfunc = q_nn(env.observation_space.shape[0], env.action_space.shape[0], hidden_layers = 3, hidden_size = 64)
print(qfunc[0].summary())
In [ ]:
# Set up the agent for training
memory = SequentialMemory(limit=100000, window_length=1)
random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.2, size=env.action_space.shape)
agent = DDPGAgent(nb_actions=env.action_space.shape[0], actor=actor, critic=qfunc[0], critic_action_input=qfunc[1],
memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100,
random_process=random_process, gamma=.99, target_model_update=1e-3,
delta_clip=1.)
agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])
In [ ]:
# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely by
# stopping the notebook
agent.fit(env, nb_steps=2000, visualize=False, verbose=0, nb_max_episode_steps=200, log_interval=10000)
# After training is done, we save the final weights.
# agent.save_weights(args.model, overwrite=True)
In [ ]:
# agent.load_weights(args.model)
# Finally, evaluate our algorithm for 2 episodes.
agent.test(env, nb_episodes=2, visualize=False, nb_max_episode_steps=1000)