In [1]:
import numpy as np
from random import randint, random, choice
import matplotlib.pyplot as plt
from math import sqrt, log
import copy
%matplotlib inline
In [2]:
class Board(object):
def __init__(self, num_players, rewards):
self.n_players = num_players
self.rewards = rewards
self.grid = [[0 for i in range(4)] for j in range(4)]
self.cur_turn = 1
self.baseCamps = {2: [0, 0], 1: [3, 3]}
self.this_turn_visited = []
self.moves = {1: 0, 2: 0}
self.last_moved = [None, None]
self.starting_positions = {}
self.prevInPlace = 0
if self.n_players == 2:
self.starting_positions[1] = [[0, 0], [0, 1], [1, 0]]
self.starting_positions[2] = [[3, 3], [2, 3], [3, 2]]
# if self.cur_turn == 2:
# self.opponentMove()
def printBoard(self):
for r in self.grid:
print(r)
def reset(self):
self.grid = [[0 for i in range(4)] for j in range(4)]
if self.n_players == 2:
for player, lst in self.starting_positions.items():
for x, y in lst:
self.grid[x][y] = player
# def opponentMove(self):
# opponent = Agent(self, 2, [15, 15])
# opponent.move()
# print("NOT COMPLETE")
def checkWin(self, player):
if self.n_players == 2:
win_positions = self.starting_positions[self.n_players - player + 1]
for x, y in win_positions:
if self.grid[x][y] != player:
return False
return True
def checkInPlace(self, reward = 10):
piecesInPlace = 0
if self.n_players == 2:
win_positions = self.starting_positions[self.n_players - player + 1]
for x, y in win_positions:
if self.grid[x][y] == player:
piecesInPlace += 100
# inPlaceDiff = piecesInPlace - self.prevInPlace
# self.prevInPlace = piecesInPlace
for i in range(len(self.grid)):
for j in range(len(self.grid[i])):
if self.grid[i][j] == self.cur_turn:
totalDist += self.checkDist(i, j)
return inPlaceDiff * reward
def calcTotalRew(self, reward = 10):
totalDist = 0.
for i in range(len(self.grid)):
for j in range(len(self.grid[i])):
if self.grid[i][j] == self.cur_turn:
totalDist += self.checkDist(i, j)
return totalDist*reward
def inBounds(self, i, j):
if i < 0 or j < 0 or j > 3 or i > 3:
return False
return True
def checkDist(self, x, y, other = None):
if not other:
other = self.baseCamps[3-self.cur_turn]
baseX, baseY = other
return ((baseY - y)**2 + (baseX-x)**2)
def getLegalComplete(self, i, j, positive = None):
# BFS from i, j
legal = {"moves": [], "jumps": []}
if self.grid[i][j] == 0:
# if random() < .00001:
# print(i, j, self.grid[i])
# print("Why are you trying to move a blank space?")
return legal
else:
visited = [[False for q in range(4)] for z in range(4)]
queue = [[i, j]]
while queue:
x, y = queue.pop(0)
if not visited[x][y]:
if [x, y] != [i, j]:
legal["jumps"].append([x, y])
for k in range(-1, 2):
for l in range(-1, 2):
if self.inBounds(x + 2*k, y + 2*l) and self.grid[x + 2*k][y + 2*l] == 0 and self.grid[x + k][y + l] != 0:
if not visited[x + 2*k][y + 2*l]:
queue.append([x + 2*k, y + 2*l])
visited[x][y] = True
for k in range(-1, 2):
for l in range(-1, 2):
if self.inBounds(i + k, j + l) and self.grid[i + k][j + l] == 0:
legal["moves"].append([i + k, j + l])
return legal
def getLegalCompleteDist(self, i, j, positive = None, dist = True):
# BFS from i, j
legal = {"moves": [], "jumps": []}
if self.grid[i][j] == 0:
# print(i, j, self.grid[i])
# print("Why are you trying to move a blank space?")
return legal
else:
visited = [[False for q in range(4)] for z in range(4)]
queue = [[i, j, self.checkDist(i, j, other = positive)]]
while queue:
x, y, d = queue.pop(0)
if not visited[x][y]:
if [x, y] != [i, j]:
legal["jumps"].append([x, y, d])
for k in range(-1, 2):
for l in range(-1, 2):
myDist = self.checkDist(x, y, other = positive)
if self.inBounds(x + 2*k, y + 2*l) and self.grid[x + 2*k][y + 2*l] == 0 and self.grid[x + k][y + l] != 0:
if not visited[x + 2*k][y + 2*l]:
newDist = self.checkDist(x + 2*k, y + 2*l, other = positive)
if ((not dist) or myDist > newDist):
queue.append([x + 2*k, y + 2*l, newDist])
visited[x][y] = True
for k in range(-1, 2):
for l in range(-1, 2):
myDist = self.checkDist(i, j, other = positive)
if self.inBounds(i + k, j + l) and self.grid[i + k][j + l] == 0:
newDist = self.checkDist(i + k, j + l, other = positive)
if ((not dist) or myDist > newDist):
legal["moves"].append([i + k, j + l, newDist])
return legal
def getLegal(self, i, j, positive = None, jump = False):
legal = {"moves": [], "jumps": []}
if self.grid[i][j] == 0:
print(i, j, self.grid[i])
print("Why are you trying to move a blank space?")
return legal
else:
for k in range(-1, 2):
for l in range(-1, 2):
myDist = self.checkDist(i, j, other = positive)
if self.inBounds(i + 2*k, j + 2*l) and self.grid[i + 2*k][j + 2*l] == 0 \
and self.grid[i + k][j + l] != 0:
newDist = self.checkDist(i + 2*k, j + 2*l, other = positive)
if (myDist > newDist):
legal["jumps"].append([i + 2*k, j + 2*l, newDist])
if not jump:
if self.inBounds(i + k, j + l) and self.grid[i + k][j + l] == 0:
newDist = self.checkDist(i + k, j + l, other = positive)
if (myDist > newDist):
legal["moves"].append([i + k, j + l, newDist])
return legal
def checkLegal(self, player, i, j, k, l):
if self.grid[k][l] != 0 or self.grid[i][j] != player:
# if random() < 0.01:
print("You can't do that move")
print "Here's why. Grid at k, l isn't 0?:", k, l, self.grid[k][l], "Or at i, j, isn't player", player, i, j, self.grid[j][j]
self.printBoard()
return False
else:
# legal = self.getLegalComplete(i, j)
# TODO - This currently doesn't work because the getLegal() above needs to be passed a proper
# value for "positive"
# if [k, l] not in [[m[0], m[1]] for m in legal["moves"]] and \
# [k, l] not in [[m[0], m[1]] for m in legal["jumps"]]:
# print("Not legal move")
# print(i, j, k, l, legal)
# return False
return True
def move(self, player, i, j, k, l, pBoard = False):
if self.checkLegal(player, i, j, k, l) == True and self.cur_turn == player:
# print "WE FUCKEN MADE A MOVE with player", player, i, j, k, l
self.grid[i][j] = 0
self.grid[k][l] = player
# # set last moved
# self.last_moved = [k, l]
# # record in our path
# if self.this_turn_visited == []:
# self.this_turn_visited.append([i, j])
# self.this_turn_visited.append([k, l])
# # check if we're able to move again
# new_moves = self.getLegal(k, l)
# # end turn if we didn't just jump and there are still legal jumps, and we didn't just move one space
# if not new_moves["jumps"] or (abs(k - i) != 2 and abs(l - j) != 2):
# self.cur_turn = 3 - self.cur_turn
# self.this_turn_visited = []
self.cur_turn = 3 - self.cur_turn
self.moves[player] += 1
if pBoard:
self.printBoard()
else:
print "WE FAILED TO MAKE A MOVE with player", player, self.checkLegal(player, i, j, k, l), self.cur_turn == player, i, j, k, l
In [3]:
board = Board(2, 0)
board.reset()
board.move(1, 0, 1, 0, 2)
board.printBoard()
print(board.getLegalComplete(0,0))
In [76]:
class Agent(object):
def __init__(self, board, ID, baseOrigin):
self.ID = ID
self.baseOrigin = baseOrigin
self.board = board
self.pieces = self.board.starting_positions[self.ID]
self.overrideDist = False
def updateBoard(self, board):
self.board = board
def findPieces(self, board):
pieces = []
for i in range(len(board.grid)):
for j in range(len(board.grid[i])):
if board.grid[i][j] == self.ID:
pieces.append([i, j])
return pieces
def distToOther(self, piece, other = None):
if not other:
other = self.baseOrigin
baseX, baseY = other
pieceX, pieceY = piece
return ((baseY - pieceY)**2 + (baseX-pieceX)**2)**.5
def bestPiece(self, distPieces = None, mp = None, dist = True):
if not distPieces:
distPieces = sorted([[self.distToOther(piece), piece] for piece in self.pieces])
for piece in distPieces:
i, j = piece[1]
legals = self.board.getLegalCompleteDist(i, j, positive = mp, dist = dist)
if legals["jumps"] or legals["moves"]:
return [(i, j), legals]
return [False, False]
def bestMove(self, eps = .2):
if self.overrideDist:
# Find the pieces that are not in the right positions
s = self.board.starting_positions[self.board.n_players-self.ID+1]
missedPieces = [x for x in self.pieces if x not in s]
# Find the first missing position
missedPos = [x for x in s if x not in self.pieces]
if not missedPos:
print "Erroneous"
return False
mp = missedPos[0]
# Calculate distances and find best move using those
distPieces = sorted([[self.distToOther(piece, mp), piece] for piece in missedPieces], reverse=True)
piece, legals = self.bestPiece(distPieces = distPieces, mp = mp)
if not piece or not legals:
piece, legals = self.bestPiece(distPieces = distPieces, mp = mp, dist = False)
else:
piece, legals = self.bestPiece()
if not piece or not legals:
return False
if legals["jumps"]:
distJumps = sorted(legals["jumps"], reverse=True, key=lambda i: i[2])
if random() < eps:
target = choice(distJumps)
else:
target = distJumps[0]
return [piece[0], piece[1], target[0], target[1]]
elif legals["moves"]:
distMoves = sorted(legals["moves"], reverse=True, key=lambda i: i[2])
if random() < eps:
target = choice(distMoves)
else:
target = distMoves[0]
return [piece[0], piece[1], target[0], target[1]]
else:
return False
def move(self):
move = self.bestMove()
# If no move available, clearly we are in a deadlock
if not move:
self.overrideDist = True
move = self.bestMove()
if not move:
return False
# Make move on board and record pieces
# print move
i, j, k, l = move
self.board.move(self.ID, i, j, k, l)
self.pieces = self.findPieces(self.board)
return move
In [57]:
board = Board(2, 0)
board.reset()
player = Agent(board, 1, [0, 0])
opponent = Agent(board, 2, [3, 3])
runs = 0
for i in range(100):
if board.checkWin(1) or board.checkWin(2):
print i
runs = i
break
mv = player.move()
mv = opponent.move()
if i > 30:
board.printBoard()
print "-------"
In [6]:
moveRew = -1
winRew = 5
def PlayGame(nplayers = 2):
brd = Board(nplayers, 0)
brd.reset()
opponent = Agent(brd, 2, [15, 15])
player = Agent(brd, 1, [0, 0])
players = [player, opponent]
# # for i in range(250):
# # opponent.move()
# # player.move()
rewards = {1: [], 2: []}
m, m1 = 0, 0
c = 0
while True:
for p in players:
# Check if game is ended
if brd.checkWin(p.ID or c > 20):
rewards[p.ID].append(winRew)
return p, rewards
p.move()
rewards[p.ID].append(moveRew)
brd = p.board
c+=1
In [7]:
lol = PlayGame()
In [8]:
plays = 100
In [9]:
wins = {1: 0, 2: 0}
rews = {1: [], 2: []}
for i in range(plays):
p, r = PlayGame()
wins[p.ID] += 1
rews[1].append(np.sum(r[1]))
rews[2].append(np.sum(r[2]))
cumrews = {1: np.cumsum(rews[1]), 2: np.cumsum(rews[2])}
In [10]:
x = np.arange(0, plays, 1)
y = map(lambda x: rews[1][x], x)
plt.plot(x, y)
Out[10]:
In [11]:
x = np.arange(0, plays, 1)
y = map(lambda x: rews[2][x], x)
plt.plot(x, y)
Out[11]:
In [107]:
class HalmaState:
""" A state of the game of Halma
"""
def __init__(self, players = 2, rewards = 0, opp = None):
self.nplayers = 2
self.rewards = 0
self.board = Board(players, rewards)
self.board.reset()
self.opp = Agent(self.board, 2, [3, 3])
self.size = len(self.board.grid)
def Clone(self):
""" Create a deep clone of this game state.
"""
st = HalmaState()
st.board = copy.deepcopy(self.board)
# st.board.grid = [self.board.grid[i][:] for i in range(len(self.board.grid))]
st.opp = copy.deepcopy(self.opp)
# print "CLONING IN PROGRESS"
return st
def DoMove(self, move):
""" Update a state by carrying out the given move.
Must update playerJustMoved.
"""
self.board = copy.deepcopy(self.opp.board)
i, j, k, l = move
if self.board.cur_turn != 1:
print "something's wrong with cur_turn (should be 1)"
self.board.move(1, i, j, k, l)
if self.board.cur_turn != 2:
print "something's wrong with cur_turn (should be 2)"
self.opp.board = copy.deepcopy(self.board)
self.opp.pieces = self.opp.findPieces(self.opp.board)
self.opp.move()
self.board = copy.deepcopy(self.opp.board)
if self.board.cur_turn != 1:
print "something's wrong with cur_turn (should be 1) - step 2"
if self.board.checkWin(1):
# print 1, "WINS"
return 1
elif self.board.checkWin(2):
# print 2, "WIINS"
return 2
else:
return None
def GetMoves(self):
""" Get all possible moves from this state.
"""
moves = {"moves": [], "jumps": []}
if self.board.checkWin(1) or self.board.checkWin(2):
return moves
self.board = copy.deepcopy(self.opp.board)
for i in range(len(self.board.grid)):
for j in range(len(self.board.grid)):
if self.board.grid[i][j] == self.board.cur_turn:
legals = self.board.getLegalComplete(i, j)
# remove places we've already traveled this turn if necessary
if legals["moves"]:
moves["moves"] += ([[i, j, k, l] for k, l in legals["moves"] if [k, l] not in self.board.this_turn_visited])
if legals["jumps"]:
moves["jumps"] += ([[i, j, k, l] for k, l in legals["jumps"] if [k, l] not in self.board.this_turn_visited])
# # Override
# if not moves["moves"] and not moves["jumps"]:
# # Find the pieces that are not in the right positions
# s = self.board.starting_positions[3 - self.board.cur_turn]
# # Find the first missing position
# missedPos = [[x,y] for x, y in s if self.board.grid[x][y] == 0]
# if missedPos:
# target = missedPos[0]
# else:
# return moves
# for i in range(len(self.board.grid)):
# if self.board.grid[i][j] == self.board.cur_turn:
# legals = self.board.getLegalComplete(i, j, positive = target)
# # print legals, self.playerJustMoved, self.board.cur_turn
# # remove places we've already traveled this turn if necessary
# if legals["moves"]:
# moves["moves"] += ([[i, j, k, l] for k, l in legals["moves"] if [k, l] not in self.board.this_turn_visited])
# if legals["jumps"]:
# moves["jumps"] += ([[i, j, k, l] for k, l in legals["jumps"] if [k, l] not in self.board.this_turn_visited])
if not moves["moves"] and not moves["jumps"]:
print "something went wrong"
return moves
def GetResult(self, player):
""" Get the game result from the viewpoint of playerjm.
"""
reward = 10
#return self.board.checkInPlace(player, reward)
# return self.board.calcTotalRew(reward)
return self.board.checkWin(player)*100 - self.board.moves[player]
def __repr__(self):
s= ""
for x in range(len(self.board.grid)):
for y in range(len(self.board.grid[x])):
s += ["[_]","[X]","[O]"][self.board.grid[x][y]]
s += "\n"
return s
In [108]:
class Node:
""" A node in the game tree. Note wins is always from the viewpoint of playerJustMoved.
Crashes if state not specified.
"""
def __init__(self, move = None, moveType = None, parent = None, state = None):
self.move = move # the move that got us to this node - "None" for the root node
self.moveType = moveType
self.parentNode = parent # "None" for the root node
self.childNodes = []
self.wins = 0
self.visits = 0
self.untriedMoves = state.GetMoves() # future child nodes
self.playerJustMoved = state.board.cur_turn # the only part of the state that the Node needs later
def UCTSelectChild(self):
""" Use the UCB1 formula to select a child node. Often a constant UCTK is applied so we have
lambda c: c.wins/c.visits + UCTK * sqrt(2*log(self.visits)/c.visits to vary the amount of
exploration versus exploitation.
"""
s = sorted(self.childNodes, key = lambda c: c.wins/c.visits + sqrt(2*log(self.visits)/c.visits))[-1]
return s
def AddChild(self, m, s, moveType = "moves"):
""" Remove m from untriedMoves and add a new child node for this move.
Return the added child node
"""
n = Node(move = m, moveType = moveType, parent = self, state = s)
self.untriedMoves[moveType].remove(m)
self.childNodes.append(n)
return n
def Update(self, result):
""" Update this node - one additional visit and result additional wins. result must be from the viewpoint of playerJustmoved.
"""
self.visits += 1
# if result > 0:
# print "booom"
self.wins += result
def __repr__(self):
return "[M:" + str(self.move) + " MT:" + str(self.moveType) + " W/V:" + str(self.wins) + "/" + str(self.visits) + " U:" + str(self.untriedMoves) + "]"
def TreeToString(self, indent):
s = self.IndentString(indent) + str(self)
for c in self.childNodes:
s += c.TreeToString(indent+1)
return s
def IndentString(self,indent):
s = "\n"
for i in range (1,indent+1):
s += "| "
return s
def ChildrenToString(self):
s = ""
for c in self.childNodes:
s += str(c) + "\n"
return s
In [120]:
def UCT(rootstate, itermax, verbose = False):
""" Conduct a UCT search for itermax iterations starting from rootstate.
Return the best move from the rootstate.
Assumes 2 alternating players (player 1 starts), with game results in the range [0.0, 1.0]."""
rootnode = Node(state = rootstate)
for i in range(itermax):
node = rootnode
state = rootstate.Clone()
winner = None
# Select
while not node.untriedMoves["moves"] and not node.untriedMoves["jumps"] and node.childNodes != []: # node is fully expanded and non-terminal
node = node.UCTSelectChild()
winner = state.DoMove(node.move)
if winner:
break
# Expand
if node.untriedMoves["moves"] or node.untriedMoves["jumps"] and not winner: # if we can expand (i.e. state/node is non-terminal)
m = choice(node.untriedMoves["moves"] + node.untriedMoves["jumps"])
winner = state.DoMove(m)
if winner:
break
mt = "moves"
if m in node.untriedMoves["jumps"]:
mt = "jumps"
node = node.AddChild(m, state, mt) # add child and descend tree
mvs = state.GetMoves()
asdf = 0
# Rollout - this can often be made orders of magnitude quicker using a state.GetRandomMove() function
while mvs["moves"] or mvs["jumps"] and not winner: # while state is non-terminal
mv = choice(mvs["moves"] + mvs["jumps"])
if mv in mvs["moves"]:
mvs["moves"].remove(mv)
elif mv in mvs["jumps"]:
mvs["jumps"].remove(mv)
state.DoMove(mv)
mvs = state.GetMoves()
# Backpropagate
while node != None: # backpropagate from the expanded node and work back to the root node
node.Update(state.GetResult(node.playerJustMoved)) # state is terminal. Update node with result from POV of node.playerJustMoved
node = node.parentNode
# if i > 2:
# return
# Output some information about the tree - can be omitted
#if (verbose): print(rootnode.TreeToString(0))
#else: print(rootnode.ChildrenToString())
if not rootnode.childNodes:
return rootnode.move
return sorted(rootnode.childNodes, key = lambda c: c.visits)[-1].move # return the move that was most visited
moveRew = -1
winRew = 100
def UCTPlayGame():
""" Play a sample game between two UCT players where each player gets a different number
of UCT iterations (= simulations = tree nodes).
"""
rewards = {1: [], 2: []}
state = HalmaState()
c = 0
while (state.GetMoves()["jumps"] != [] and state.GetMoves()["moves"] != []):
if not c%1:
print c
print str(state)
print state.board.calcTotalRew(10), state.board.cur_turn
# print(str(state))
if state.board.cur_turn == 1:
m = UCT(rootstate = state, itermax = 200, verbose = False) # play with values for itermax and verbose = True
else:
print "WE SHOULDNT SEE THIS EVER"
# m = UCT(rootstate = state, itermax = 2000, verbose = False)
# m = opponent.move()
# print("Best Move: " + str(m) + "\n")
if m:
state.DoMove(m)
if state.board.checkWin(1) or state.board.checkWin(2):
print "Hallelujah"
rewards[state.board.cur_turn].append(moveRew)
else:
break
c+=1
if state.GetResult(state.board.cur_turn):
print("Player " + str(state.board.cur_turn) + " wins!")
print str(state)
print c
rewards[state.board.cur_turn].append(winRew)
return state.board.cur_turn, rewards
elif state.GetResult(3-state.board.cur_turn):
print("Player " + str(3 - state.board.cur_turn) + " wins!")
print str(state)
print c
rewards[3-state.board.cur_turn].append(winRew)
return 3-state.board.cur_turn, rewards
else:
print "Fuck the castle"
print str(state)
print c
return 3-state.board.cur_turn, rewards
In [121]:
lol = UCTPlayGame()
This is the implementation of Fitted Q
The Halma problem space is quite challenging since at any given moment we actually have (size^2) x (totalPieces/2) possible actions.
This is the case, because at any given moment, we have a total of totalPieces/2 moveable pieces.
Not considering legality, given the usage of jumps, we could hypotheticlaly end up anywhere on the board with any piece. Now, many of these moves will be illegal, either by blockage or by inability to reach, but it still comes to the same thing. In an NxN (4x4) board with X (3) total pieces per team, there are obviouslt N^2 (16) possible locations for each of the X (3) pieces, leading to N^2 x X = (16 x 3) = 48 possible actions total.
Many of these (at least 6 per piece, so at least 18) will be illegal as a result of other pieces being in the way. We should still consider them though.
Thus, our action array is the set
In [ ]:
def maxreg(Q, xt1):
maxr = max([Q[action].predict(xt1) if Q[action] else -float("inf") for action in range(4)])
# In the cse nothing has been fit yet, just return 0
if maxr == -float("inf"):
maxr = 0
return maxr
def fittedQ(batch, regressor, eps=30, days=200, num_patients=10, n_estimators = 10):
Q_N = [None for i in range(4)]
gamma = 0.98
trainX = [[] for i in range(4)]
trainY = [[] for i in range(4)]
actions = []
for N in range(eps):
if not N%5:
print "Just finished episode", N
for d in range(1, days):
for patient in range(num_patients):
state = batch[0][patient][d]
action = batch[1][patient][d]
next_state = batch[0][patient][d+1]
reward = batch[2][patient][d] + gamma*maxreg(Q_N, next_state)
trainX[action].append(state)
trainY[action].append(reward)
X = np.array(trainX)
Y = np.array(trainY)
if not Q_N[action]:
Q_N[action] = regressor(n_estimators=n_estimators)
Q_N[action].fit(X[action], np.ravel(Y[action]))
return Q_N, batch
In [ ]:
def run_fittedq(e, eps, days, num_patients, regressor, n_estimators, policy=hiv_treatment.random_policy, itr = 1, batch = None):
# Generate Batch
state_histories, action_histories, reward_histories = \
HIVTreatment.generate_batch(num_patients=num_patients, policy=policy)
# Organize and update Batch
if not batch:
batch = (state_histories, action_histories, reward_histories)
else:
batch = (np.concatenate((state_histories, batch[0])), \
np.concatenate((action_histories, batch[1])), \
np.concatenate((reward_histories, batch[2])))
# Fit Q
newQ, batch = fittedQ(batch, regressor, eps=eps, days=days, num_patients=num_patients, n_estimators = n_estimators)
def policy(state, rng):
lst = [newQ[action].predict(state) if newQ[action] else -float("inf") for action in range(4)]
if random.random() < e:
return random.choice([i for i in range(4)])
return lst.index(max(lst))
print "Q", itr
print "Q", itr, "reward mean: ", np.array(reward_histories).mean()
plt.plot(np.array(reward_histories).mean(axis=0), label="Iteration "+str(itr))
return newQ, policy, batch, len(batch[0])
In [ ]:
In [ ]:
In [ ]:
# Play tic-tac-toe. The first player will be always X.
# Each tic-tac-toe board is represented by a sequence of three values:
# (set of squares that have an X, set of squares that have a y, board's width)
import random
import os
import string
def TicTacToe(X, O, width=3):
"""Play a tic-tac-toe game between the two given functions. After each
turn, yield the new board.
Each function should get a tic-tac-toe board and a char - 'X' or 'O',
that represents the current turn, and return the number of
square where it wants to put a sign.
width is the board's width and length - it's 3 as default.
X, O -- functions
width -- natural number
"""
board = (set(), set(), width)
turn = 'X'
while result(board) == False:
if turn == 'X':
board[0].add(X(board, turn))
else:
board[1].add(O(board, turn))
yield board
turn = list({'X', 'O'} - set([turn]))[0]
def displayTicTacToe(X, O, width=3):
"""Play a tic-tac-toe game (see TicTacToe's docstring for explanation) and
display the new board to the user when a player plays, and the result of
the game after its end.
X, O - functions
width - natural number"""
for board in TicTacToe(X, O, width):
os.system('cls' if os.name == 'nt' else 'clear') # clearscreen
print str_board(board)
winner = result(board)
if winner in {'X', 'O'}: print winner + ' won!'
elif winner == None: print 'Tie!'
else: raise ValueError("The game didn't end")
def result(board):
"""Return 'X' if X won in the given board, 'O' if O won, None if the game
ended with a tie, False if the game didn't end yet, and raise an exception
if it looks like X and O won both (the board cannot be reached using a
legal game)."""
x_squares, o_squares, width = board
rows = [{width*row+col+1 for col in range(width)} for row in range(width)]
cols = [{width*row+col+1 for row in range(width)} for col in range(width)]
diagonals = [{width*i+i+1 for i in range(width)},
{width*i+width-i for i in range(width)}]
lines = rows + cols + diagonals
x_won = any(line.issubset(x_squares) for line in lines)
o_won = any(line.issubset(o_squares) for line in lines)
if x_won:
if o_won:
raise ValueError("Illegal board")
return 'X'
if o_won:
return 'O'
if x_squares | o_squares == set(range(1, width**2+1)):
# Nobody won, but the board is full
return None # Tie
return False
def str_board(board):
"""Return the board in a string representation, to print it."""
return_str = ''
x_squares, o_squares, width = board
for row in range(width):
for col in range(width):
square = width*row+col+1
return_str += 'X' if square in x_squares else 'O' if square in \
o_squares else ' '
if col != width-1: return_str += ' | '
if row != width-1: return_str += '\n'+'--+-'*(width-1)+'-\n'
return return_str
def human_player(board, turn):
"""Display the board to the user and ask him where does he want to put a
sign. Return the square."""
x_squares, o_squares, width = board
os.system('cls' if os.name == 'nt' else 'clear') # clear screen
print str_board(board)
while True:
try:
square = int(raw_input('Where do you want to add ' + turn + '? '))
assert 0 < square <= width**2 and \
square not in x_squares | o_squares
return square # this will happen if there were no exceptions
except:
print ('You should write an integer between 1 and '+str(width**2)+
', that represents a blank square.')
def minimax_player(board, turn):
"""Return a square where it's worthwhile to play according to the minimax
algorithm."""
return minimax_best_square(board, turn)[0]
def minimax_score_board(board, turn):
"""Return 1, 0 or -1 according to the minimax algorithm -- 1 if the player
that has the given turn has a winning strategy, 0 if he doesn't have a
winning strategy but he has a tie strategy, and -1 if he will lose anyway
(assuming his opponent is playing a perfect game)."""
if result(board) == turn:
return 1
if result(board) == None:
return 0
if result(board) != False:
return -1
return minimax_best_square(board, turn)[1]
def minimax_best_square(board, turn):
"""Choose a square where it's worthwhile to play in the given board and
turn, and return a tuple of the square's number and it's score according
to the minimax algorithm."""
x_squares, o_squares, width = board
max_score = -2
opponent = list({'X', 'O'} - set([turn]))[0]
squares = list(set(range(1, width**2+1)) - (x_squares | o_squares))
random.shuffle(squares)
for square in squares:
# Iterate over the blank squares, to get the best square to play
new_board = (x_squares | set([square] if turn=='X' else []),) + \
(o_squares | set([square] if turn=='O' else []), width)
score = -minimax_score_board(new_board, opponent)
if score == 1: return (square, 1)
if score > max_score:
max_score, max_square = score, square
return (max_square, max_score)
displayTicTacToe(X = minimax_player, O = human_player, width = 3)
raw_input()
In [41]:
lol = UCTPlayGame()
In [147]:
print lol
In [148]:
plays= 100
In [150]:
UCTwins = {1: 0, 2: 0}
UCTrews = {1: [], 2: []}
for i in range(plays):
print i
p, r = UCTPlayGame()
UCTwins[p] += 1
UCTrews[1].append(np.sum(r[1]))
UCTrews[2].append(np.sum(r[2]))
UCTcumrews = {1: np.cumsum(UCTrews[1]), 2: np.cumsum(UCTrews[2])}
In [152]:
x = np.arange(0, plays, 1)
y = map(lambda x: UCTrews[1][x], x)
plt.plot(x, y)
Out[152]:
In [153]:
x = np.arange(0, plays, 1)
y = map(lambda x: UCTrews[2][x], x)
plt.plot(x, y)
Out[153]:
In [ ]:
In [ ]: