In [ ]:
%load_ext autoreload
%autoreload 2

In [6]:
from blackhc import mdp

In [7]:
from blackhc.mdp import example

In [8]:
mdp.display_mdp(example.TWO_ROUND_DMDP)



In [9]:
env = example.MULTI_ROUND_NDMP.to_env()

from matplotlib import pyplot
import time 

def display_env():    
    env.render()
    env.render_widget.width=500
    time.sleep(0.200)
    
    
for _ in range(3):
    env.reset()
    display_env()

    while True:
        state, reward, is_done, _ = env.step(env.action_space.sample())
        display_env()

        if is_done:
            break

In [24]:
from blackhc.mdp import lp
import functools
import numpy as np

solver = lp.LinearProgramming(example.MULTI_ROUND_NDMP)
solver.compute_q_table(max_iterations=10000, all_close=functools.partial(np.allclose, rtol=1e-10, atol=1e-10))


Out[24]:
array([[ 6.,  5.],
       [ 0.,  0.]])

In [ ]: