In [3]:
import functools

import matplotlib
from matplotlib import pyplot
import numpy as np

import sys
sys.path.append("..")
from hiora_cartpole import features
from hiora_cartpole import fourier_fa
from hiora_cartpole import easytile_fa
from hiora_cartpole import driver

import gym_ext.tools as gym_tools

import gym

In [67]:
env = gym.make('CartPole-v0')
clipped_high        = env.observation_space.high
clipped_high = np.array([2.5, 3.6, 0.27, 3.7])
clipped_low         = -clipped_high
state_ranges = np.array([clipped_low, clipped_high])


[2016-09-23 13:07:02,758] Making new env: CartPole-v0

Tile coding


In [71]:
easyt_n_weights, easyt_feature_vec = easytile_fa.make_feature_vec(state_ranges, 2, [5, 7, 5, 7], 8)

#fv = feature_vec(cartpole.observation_space.sample(), cartpole.action_space.sample())

from hiora_cartpole import linfa
eexperience = linfa.init(lmbda=0.9,
                        init_alpha=0.05,
                        epsi=0.01,
                        feature_vec=easyt_feature_vec,
                        n_weights=easyt_n_weights,
                        act_space=env.action_space,
                        theta=None,
                        is_use_alpha_bounds=True,
                        map_obs=functools.partial(gym_tools.warning_clip_obs, ranges=state_ranges))

In [72]:
eexperience, steps_per_episode, alpha_per_episode \
    = driver.train(env, linfa, eexperience, n_episodes=700, max_steps=500, is_render=False)
# Credits: http://matplotlib.org/examples/api/two_scales.html
fig, ax1 = pyplot.subplots()
ax1.plot(steps_per_episode, color='b')
ax2 = ax1.twinx()
ax2.plot(alpha_per_episode, color='r')
pyplot.show()



In [82]:
steps_per_episode = driver.exec_greedy(env, eexperience, n_episodes=15, max_steps=600, is_render=True)

In [75]:
%matplotlib notebook
pyplot.plot(eexperience.theta)
pyplot.show()



In [36]:
eexperience.p_feat

In [28]:
eexperience, steps_per_episode, alpha_per_episode \
    = driver.train(env, linfa, eexperience, n_episodes=50, max_steps=100, is_render=True)

Sarsa with Fourier basis


In [97]:
four_n_weights, four_feature_vec \
    = fourier_fa.make_feature_vec(state_ranges,
                                  n_acts=2,
                                  order=3)

#fv = feature_vec(cartpole.observation_space.sample(), cartpole.action_space.sample())

from hiora_cartpole import linfa
fexperience = linfa.init(lmbda=0.9,
                        init_alpha=0.001,
                        epsi=0.1,
                        feature_vec=four_feature_vec,
                        n_weights=four_n_weights,
                        act_space=env.action_space,
                        theta=None,
                        is_use_alpha_bounds=True,
                        map_obs=functools.partial(gym_tools.warning_clip_obs, ranges=state_ranges))

In [98]:
fexperience, steps_per_episode, alpha_per_episode \
    = driver.train(env, linfa, fexperience, n_episodes=700, max_steps=500, is_render=False)
# Credits: http://matplotlib.org/examples/api/two_scales.html
fig, ax1 = pyplot.subplots()
ax1.plot(steps_per_episode, color='b')
ax2 = ax1.twinx()
ax2.plot(alpha_per_episode, color='r')
pyplot.show()


Haha, this just explodes after 30 or so episodes and doesn't diverge, at least not before 700 episodes.


In [99]:
pyplot.plot(fexperience.theta)
pyplot.show()

In [100]:
steps_per_episode = driver.exec_greedy(env, fexperience, n_episodes=10, max_steps=600, is_render=True)


---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-100-4b4073f3b212> in <module>()
----> 1 steps_per_episode = driver.exec_greedy(env, fexperience, n_episodes=10, max_steps=600, is_render=True)

/home/erle/repos/cartpole/hiora_cartpole/driver.py in exec_greedy(env, experience, n_episodes, max_steps, is_render)
     84 
     85         for t in xrange(max_steps):
---> 86             is_render and env.render() # pylint: disable=expression-not-assigned
     87             action                  = greedy_act(experience, observation)
     88             observation, _, done, _ = env.step(action)

/home/erle/.local/lib/python2.7/site-packages/gym/core.pyc in render(self, mode, close)
    190             raise error.UnsupportedMode('Unsupported rendering mode: {}. (Supported modes for {}: {})'.format(mode, self, modes))
    191 
--> 192         return self._render(mode=mode, close=close)
    193 
    194     def close(self):

/home/erle/.local/lib/python2.7/site-packages/gym/envs/classic_control/cartpole.pyc in _render(self, mode, close)
    147         self.poletrans.set_rotation(-x[2])
    148 
--> 149         return self.viewer.render(return_rgb_array = mode=='rgb_array')

/home/erle/.local/lib/python2.7/site-packages/gym/envs/classic_control/rendering.pyc in render(self, return_rgb_array)
     80     def render(self, return_rgb_array=False):
     81         glClearColor(1,1,1,1)
---> 82         self.window.clear()
     83         self.window.switch_to()
     84         self.window.dispatch_events()

/home/erle/.local/lib/python2.7/site-packages/pyglet/window/__init__.pyc in clear(self)
   1149         buffer.  The window must be the active context (see `switch_to`).
   1150         '''
-> 1151         gl.glClear(gl.GL_COLOR_BUFFER_BIT | gl.GL_DEPTH_BUFFER_BIT)
   1152 
   1153     def dispatch_event(self, *args):

/home/erle/.local/lib/python2.7/site-packages/pyglet/gl/lib.pyc in errcheck(result, func, arguments)
     82     pass
     83 
---> 84 def errcheck(result, func, arguments):
     85     if _debug_gl_trace:
     86         try:

KeyboardInterrupt: 

Q-learning


In [101]:
qfour_n_weights, qfour_feature_vec \
    = fourier_fa.make_feature_vec(state_ranges,
                                  n_acts=2,
                                  order=3)

#fv = feature_vec(cartpole.observation_space.sample(), cartpole.action_space.sample())

from hiora_cartpole import linfa
qfexperience = linfa.init(lmbda=0.9,
                        init_alpha=0.001,
                        epsi=0.1,
                        feature_vec=qfour_feature_vec,
                        n_weights=qfour_n_weights,
                        act_space=env.action_space,
                        theta=None,
                        is_use_alpha_bounds=True,
                        map_obs=functools.partial(gym_tools.warning_clip_obs, ranges=state_ranges),
                        choose_action=linfa.choose_action_Q)

In [102]:
qfexperience, steps_per_episode, alpha_per_episode \
    = driver.train(env, linfa, qfexperience, n_episodes=300, max_steps=500, is_render=False)
# Credits: http://matplotlib.org/examples/api/two_scales.html
fig, ax1 = pyplot.subplots()
ax1.plot(steps_per_episode, color='b')
ax2 = ax1.twinx()
ax2.plot(alpha_per_episode, color='r')
pyplot.show()



In [103]:
steps_per_episode = driver.exec_greedy(env, qfexperience, n_episodes=10, max_steps=600, is_render=True)

In [106]:
fig, ax = pyplot.subplots()
ax.plot(qfexperience.theta)
pyplot.show()



In [ ]: