pymdptoolbox


In [49]:
from mdptoolbox.mdp import ValueIteration
from mdptoolbox.example import forest, small

def value_iteration(example, gamma = 0.9):
    # Need probability (transition) and reward matrices
    probability_matrix, reward_matrix = example()
    
    # Setup value iteration
    vi = ValueIteration(probability_matrix,
                        reward_matrix,
                        gamma)
    vi.run()
    print('Optimal policy : {}'.format(vi.policy))
    print('Expected value : {}'.format(vi.V))

In [50]:
value_iteration(forest)


Optimal policy : (0, 0, 0)
Expected value : (5.051970000000001, 8.291970000000001, 12.291970000000001)

In [51]:
value_iteration(small)


Optimal policy : (1, 0)
Expected value : (40.048625392716815, 33.65371175967546)