In [4]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import algos
import features
import parametric
import policy
import chicken
from agents import OffPolicyAgent, OnPolicyAgent
from rlbench import *
from numpy.linalg import pinv
In [ ]:
# Solve using the matrix formulation
ns = 8
I = np.eye(ns)
# transition matrix
P = np.diag(np.ones(ns-1), 1)
P[-1][0] = 1
# reward vector
r = np.zeros((ns,1))
r[-1] = 1
# gamma matrix
G = np.eye(8)*0.9
G[0] = 0