In [1]:
import numpy as np
import sys
if "../" not in sys.path:
sys.path.append("../")
from collections import defaultdict
import matplotlib
%matplotlib inline
matplotlib.style.use('ggplot')
from envs.blackjack import BlackjackEnv
from utils import plotting
from mc_control_epsilon_greedy import mc_control_epsilon_greedy
# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2
In [2]:
env = BlackjackEnv()
In [3]:
Q, policy = mc_control_epsilon_greedy(env, num_episodes=500000, epsilon=0.1)
In [4]:
# For plotting: Create value function from action-value function
# by picking the best action at each state
V = defaultdict(float)
for state, actions in Q.items():
action_value = np.max(actions)
V[state] = action_value
plotting.plot_value_function(V, title="Optimal Value Function")