In [ ]:
%load_ext autoreload
%autoreload 2
In [6]:
from blackhc import mdp
In [7]:
from blackhc.mdp import example
In [8]:
mdp.display_mdp(example.TWO_ROUND_DMDP)
In [9]:
env = example.MULTI_ROUND_NDMP.to_env()
from matplotlib import pyplot
import time
def display_env():
env.render()
env.render_widget.width=500
time.sleep(0.200)
for _ in range(3):
env.reset()
display_env()
while True:
state, reward, is_done, _ = env.step(env.action_space.sample())
display_env()
if is_done:
break
In [24]:
from blackhc.mdp import lp
import functools
import numpy as np
solver = lp.LinearProgramming(example.MULTI_ROUND_NDMP)
solver.compute_q_table(max_iterations=10000, all_close=functools.partial(np.allclose, rtol=1e-10, atol=1e-10))
Out[24]:
In [ ]: