notebook.community

Edit and run



In [1]:

    
import numpy as np
import sys
if "../" not in sys.path:
  sys.path.append("../")
from collections import defaultdict

import matplotlib
%matplotlib inline
matplotlib.style.use('ggplot')

from envs.blackjack import BlackjackEnv
from utils import plotting

from mc_control_epsilon_greedy import mc_control_epsilon_greedy

# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2



In [2]:

    
env = BlackjackEnv()



In [3]:

    
Q, policy = mc_control_epsilon_greedy(env, num_episodes=500000, epsilon=0.1)



In [4]:

    
# For plotting: Create value function from action-value function
# by picking the best action at each state
V = defaultdict(float)
for state, actions in Q.items():
    action_value = np.max(actions)
    V[state] = action_value
plotting.plot_value_function(V, title="Optimal Value Function")