In [1]:
import numpy as np
import sys
if "../" not in sys.path:
  sys.path.append("../")
from collections import defaultdict

import matplotlib
%matplotlib inline
matplotlib.style.use('ggplot')

from envs.blackjack import BlackjackEnv
from utils import plotting

from mc_control_epsilon_greedy import mc_control_epsilon_greedy

# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

In [2]:
env = BlackjackEnv()

In [3]:
Q, policy = mc_control_epsilon_greedy(env, num_episodes=500000, epsilon=0.1)

In [4]:
# For plotting: Create value function from action-value function
# by picking the best action at each state
V = defaultdict(float)
for state, actions in Q.items():
    action_value = np.max(actions)
    V[state] = action_value
plotting.plot_value_function(V, title="Optimal Value Function")