Monte Carlo and DP solution of simple blackjack.
https://webdocs.cs.ualberta.ca/~sutton/book/code/blackjack1.lisp
In [1]:
import numpy as np
class blackjack(object):
def __init__(self):
shape = (11, 22, 2)
self.V = np.zeros(shape)
self.N = np.zeros(shape, dtype=np.int)
self.policy = np.ones(shape, dtype=np.int)
self.policy[1:11, 20:22, 0:2] = 0
self.pc = 0
self.ace = False
self._episode = []
def card(self):
return min(10, 1 + np.random.randint(13))
def bust(self):
return (self.pc > 21)
def draw_card(self):
card = self.card()
self.pc += card
if ((not self.ace) and (card == 1)):
self.pc += 10
self.ace = True
if (self.ace and (self.pc > 21)):
self.pc -= 10
self.ace = False
def episode(self):
self._episode = []
dc_hidden = self.card()
dc = self.card()
pcard1 = self.card()
pcard2 = self.card()
self.ace = ((1 == pcard1) or (1 == pcard2))
self.pc = pcard1 + pcard2
if self.ace:
self.pc += 10
if self.pc != 21:
while True:
self._episode.append((dc, self.pc, self.ace))
if (1 != self.policy[dc, self.pc, (1 if self.ace else 0)]):
break
self.draw_card()
if self.bust():
break
self.learn(self.outcome(dc, dc_hidden))
def learn(self, outcome):
for (dc, pc, ace_boolean) in self._episode:
ace = (1 if ace_boolean else 0)
if (pc > 11):
self.N[dc, pc, ace] += 1
self.V[dc, pc, ace] += (outcome - self.V[dc, pc, ace]) / self.N[dc, pc, ace]
def outcome(self, dc, dc_hidden):
dace = ((1 == dc) or (1 == dc_hidden))
dcount = dc + dc_hidden
if dace:
dcount += 10
dnatural = (dcount == 21)
pnatural = not self._episode
if pnatural and dnatural:
return 0
if pnatural:
return 1
if dnatural:
return -1
if self.bust():
return -1
while (dcount < 17):
card = self.card()
dcount += card
if ((not dace) and (card == 1)):
dcount += 10
dace = True
if (dace and (dcount > 21)):
dcount -= 10
dace = False
if (dcount > 21):
return 1
if (dcount > self.pc):
return -1
if (dcount == self.pc):
return 0
return 1
_fig_count = 0
def gr(self):
import matplotlib.pyplot as plt
plt.figure(self._fig_count)
self._fig_count += 1
plt.subplot(1, 2, 1)
plt.imshow(self.V[:,:,0], interpolation='none', origin='lower')
plt.xlim((11.5, 21.5))
plt.ylim((0.5, 10.5))
plt.subplot(1, 2, 2)
plt.imshow(self.V[:,:,1], interpolation='none', origin='lower')
plt.xlim((11.5, 21.5))
plt.ylim((0.5, 10.5))
plt.show()
In [2]:
# experiment
bj = blackjack()
for count in xrange(10):
ar0 = np.zeros((10, 10))
ar1 = np.zeros((10, 10))
print count
for _ in xrange(100000):
bj.episode()
bj.gr()