In [4]:
%load_ext autoreload
%autoreload 2

%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np

import algos
import features
import parametric
import policy
import chicken
from agents import OffPolicyAgent, OnPolicyAgent
from rlbench import *

from numpy.linalg import pinv


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
$$ \theta = \left( \Phi^{\top} M (I - P_{\pi} \Lambda \Gamma )^{-1} (I - P_{\pi} \Gamma ) \Phi \right)^{-1} \Phi^{\top} M (I - P_{\pi} \Gamma \Lambda)^{-1} r_{\pi} $$

In [ ]:
# Solve using the matrix formulation
ns = 8

I = np.eye(ns)
# transition matrix
P = np.diag(np.ones(ns-1), 1)
P[-1][0] = 1
# reward vector
r = np.zeros((ns,1))
r[-1] = 1
# gamma matrix
G = np.eye(8)*0.9
G[0] = 0