In [1]:
import numpy as np
from random import randint, random, choice
import matplotlib.pyplot as plt
from math import sqrt, log
import copy
%matplotlib inline
In [119]:
class Board(object):
def __init__(self, num_players, rewards):
self.n_players = num_players
self.rewards = rewards
self.grid = [[0 for i in range(4)] for j in range(4)]
self.cur_turn = 1
self.baseCamps = {2: [0, 0], 1: [3, 3]}
self.this_turn_visited = []
self.moves = {1: 0, 2: 0}
self.last_moved = [None, None]
self.starting_positions = {}
self.prevInPlace = 0
if self.n_players == 2:
self.starting_positions[1] = [[0, 0], [0, 1], [1, 0]]
self.starting_positions[2] = [[3, 3], [2, 3], [3, 2]]
# if self.cur_turn == 2:
# self.opponentMove()
def printBoard(self):
for r in self.grid:
print(r)
def reset(self):
self.grid = [[0 for i in range(4)] for j in range(4)]
if self.n_players == 2:
for player, lst in self.starting_positions.items():
for x, y in lst:
self.grid[x][y] = player
# def opponentMove(self):
# opponent = Agent(self, 2, [15, 15])
# opponent.move()
# print("NOT COMPLETE")
def checkWin(self, player):
if self.n_players == 2:
win_positions = self.starting_positions[self.n_players - player + 1]
for x, y in win_positions:
if self.grid[x][y] != player:
return False
return True
def checkInPlace(self, reward = 10):
piecesInPlace = 0
if self.n_players == 2:
win_positions = self.starting_positions[self.n_players - player + 1]
for x, y in win_positions:
if self.grid[x][y] == player:
piecesInPlace += 100
# inPlaceDiff = piecesInPlace - self.prevInPlace
# self.prevInPlace = piecesInPlace
for i in range(len(self.grid)):
for j in range(len(self.grid[i])):
if self.grid[i][j] == self.cur_turn:
totalDist += self.checkDist(i, j)
return inPlaceDiff * reward
def calcTotalRew(self, reward = 10):
totalDist = 0.
for i in range(len(self.grid)):
for j in range(len(self.grid[i])):
if self.grid[i][j] == self.cur_turn:
totalDist += self.checkDist(i, j)
return totalDist*reward
def inBounds(self, i, j):
if i < 0 or j < 0 or j > 3 or i > 3:
return False
return True
def checkDist(self, x, y, other = None):
if not other:
other = self.baseCamps[3-self.cur_turn]
baseX, baseY = other
return ((baseY - y)**2 + (baseX-x)**2)
def getLegalComplete(self, i, j, positive = None):
# BFS from i, j
legal = {"moves": [], "jumps": []}
if self.grid[i][j] == 0:
# if random() < .00001:
# print(i, j, self.grid[i])
# print("Why are you trying to move a blank space?")
return legal
else:
visited = [[False for q in range(4)] for z in range(4)]
queue = [[i, j]]
while queue:
x, y = queue.pop(0)
if not visited[x][y]:
if [x, y] != [i, j]:
legal["jumps"].append([x, y])
for k in range(-1, 2):
for l in range(-1, 2):
if self.inBounds(x + 2*k, y + 2*l) and self.grid[x + 2*k][y + 2*l] == 0 and self.grid[x + k][y + l] != 0:
if not visited[x + 2*k][y + 2*l]:
queue.append([x + 2*k, y + 2*l])
visited[x][y] = True
for k in range(-1, 2):
for l in range(-1, 2):
if self.inBounds(i + k, j + l) and self.grid[i + k][j + l] == 0:
legal["moves"].append([i + k, j + l])
return legal
def getLegalCompleteDist(self, i, j, positive = None, dist = True):
# BFS from i, j
legal = {"moves": [], "jumps": []}
if self.grid[i][j] == 0:
# print(i, j, self.grid[i])
# print("Why are you trying to move a blank space?")
return legal
else:
visited = [[False for q in range(4)] for z in range(4)]
queue = [[i, j, self.checkDist(i, j, other = positive)]]
while queue:
x, y, d = queue.pop(0)
if not visited[x][y]:
if [x, y] != [i, j]:
legal["jumps"].append([x, y, d])
for k in range(-1, 2):
for l in range(-1, 2):
myDist = self.checkDist(x, y, other = positive)
if self.inBounds(x + 2*k, y + 2*l) and self.grid[x + 2*k][y + 2*l] == 0 and self.grid[x + k][y + l] != 0:
if not visited[x + 2*k][y + 2*l]:
newDist = self.checkDist(x + 2*k, y + 2*l, other = positive)
if ((not dist) or myDist > newDist):
queue.append([x + 2*k, y + 2*l, newDist])
visited[x][y] = True
for k in range(-1, 2):
for l in range(-1, 2):
myDist = self.checkDist(i, j, other = positive)
if self.inBounds(i + k, j + l) and self.grid[i + k][j + l] == 0:
newDist = self.checkDist(i + k, j + l, other = positive)
if ((not dist) or myDist > newDist):
legal["moves"].append([i + k, j + l, newDist])
return legal
def getLegal(self, i, j, positive = None, jump = False):
legal = {"moves": [], "jumps": []}
if self.grid[i][j] == 0:
print(i, j, self.grid[i])
print("Why are you trying to move a blank space?")
return legal
else:
for k in range(-1, 2):
for l in range(-1, 2):
myDist = self.checkDist(i, j, other = positive)
if self.inBounds(i + 2*k, j + 2*l) and self.grid[i + 2*k][j + 2*l] == 0 \
and self.grid[i + k][j + l] != 0:
newDist = self.checkDist(i + 2*k, j + 2*l, other = positive)
if (myDist > newDist):
legal["jumps"].append([i + 2*k, j + 2*l, newDist])
if not jump:
if self.inBounds(i + k, j + l) and self.grid[i + k][j + l] == 0:
newDist = self.checkDist(i + k, j + l, other = positive)
if (myDist > newDist):
legal["moves"].append([i + k, j + l, newDist])
return legal
def checkLegal(self, player, i, j, k, l):
if self.grid[k][l] != 0 or self.grid[i][j] != player:
# if random() < 0.01:
print("You can't do that move")
print "Here's why. Grid at k, l isn't 0?:", k, l, self.grid[k][l], "Or at i, j, isn't player", player, i, j, self.grid[j][j]
self.printBoard()
return False
else:
# legal = self.getLegalComplete(i, j)
# TODO - This currently doesn't work because the getLegal() above needs to be passed a proper
# value for "positive"
# if [k, l] not in [[m[0], m[1]] for m in legal["moves"]] and \
# [k, l] not in [[m[0], m[1]] for m in legal["jumps"]]:
# print("Not legal move")
# print(i, j, k, l, legal)
# return False
return True
def move(self, player, i, j, k, l, pBoard = False):
if self.checkLegal(player, i, j, k, l) == True and self.cur_turn == player:
# print "WE FUCKEN MADE A MOVE with player", player, i, j, k, l
self.grid[i][j] = 0
self.grid[k][l] = player
# # set last moved
# self.last_moved = [k, l]
# # record in our path
# if self.this_turn_visited == []:
# self.this_turn_visited.append([i, j])
# self.this_turn_visited.append([k, l])
# # check if we're able to move again
# new_moves = self.getLegal(k, l)
# # end turn if we didn't just jump and there are still legal jumps, and we didn't just move one space
# if not new_moves["jumps"] or (abs(k - i) != 2 and abs(l - j) != 2):
# self.cur_turn = 3 - self.cur_turn
# self.this_turn_visited = []
self.cur_turn = 3 - self.cur_turn
self.moves[player] += 1
if pBoard:
self.printBoard()
else:
print "WE FAILED TO MAKE A MOVE with player", player, self.checkLegal(player, i, j, k, l), self.cur_turn == player, i, j, k, l
In [55]:
board = Board(2, 0)
board.reset()
board.move(1, 0, 1, 0, 2)
board.printBoard()
print(board.getLegalComplete(0,0))
In [76]:
class Agent(object):
def __init__(self, board, ID, baseOrigin):
self.ID = ID
self.baseOrigin = baseOrigin
self.board = board
self.pieces = self.board.starting_positions[self.ID]
self.overrideDist = False
def updateBoard(self, board):
self.board = board
def findPieces(self, board):
pieces = []
for i in range(len(board.grid)):
for j in range(len(board.grid[i])):
if board.grid[i][j] == self.ID:
pieces.append([i, j])
return pieces
def distToOther(self, piece, other = None):
if not other:
other = self.baseOrigin
baseX, baseY = other
pieceX, pieceY = piece
return ((baseY - pieceY)**2 + (baseX-pieceX)**2)**.5
def bestPiece(self, distPieces = None, mp = None, dist = True):
if not distPieces:
distPieces = sorted([[self.distToOther(piece), piece] for piece in self.pieces])
for piece in distPieces:
i, j = piece[1]
legals = self.board.getLegalCompleteDist(i, j, positive = mp, dist = dist)
if legals["jumps"] or legals["moves"]:
return [(i, j), legals]
return [False, False]
def bestMove(self, eps = .2):
if self.overrideDist:
# Find the pieces that are not in the right positions
s = self.board.starting_positions[self.board.n_players-self.ID+1]
missedPieces = [x for x in self.pieces if x not in s]
# Find the first missing position
missedPos = [x for x in s if x not in self.pieces]
if not missedPos:
print "Erroneous"
return False
mp = missedPos[0]
# Calculate distances and find best move using those
distPieces = sorted([[self.distToOther(piece, mp), piece] for piece in missedPieces], reverse=True)
piece, legals = self.bestPiece(distPieces = distPieces, mp = mp)
if not piece or not legals:
piece, legals = self.bestPiece(distPieces = distPieces, mp = mp, dist = False)
else:
piece, legals = self.bestPiece()
if not piece or not legals:
return False
if legals["jumps"]:
distJumps = sorted(legals["jumps"], reverse=True, key=lambda i: i[2])
if random() < eps:
target = choice(distJumps)
else:
target = distJumps[0]
return [piece[0], piece[1], target[0], target[1]]
elif legals["moves"]:
distMoves = sorted(legals["moves"], reverse=True, key=lambda i: i[2])
if random() < eps:
target = choice(distMoves)
else:
target = distMoves[0]
return [piece[0], piece[1], target[0], target[1]]
else:
return False
def move(self):
move = self.bestMove()
# If no move available, clearly we are in a deadlock
if not move:
self.overrideDist = True
move = self.bestMove()
if not move:
return False
# Make move on board and record pieces
# print move
i, j, k, l = move
self.board.move(self.ID, i, j, k, l)
self.pieces = self.findPieces(self.board)
return move
In [57]:
board = Board(2, 0)
board.reset()
player = Agent(board, 1, [0, 0])
opponent = Agent(board, 2, [3, 3])
runs = 0
for i in range(100):
if board.checkWin(1) or board.checkWin(2):
print i
runs = i
break
mv = player.move()
mv = opponent.move()
if i > 30:
board.printBoard()
print "-------"
In [6]:
moveRew = -1
winRew = 5
def PlayGame(nplayers = 2):
brd = Board(nplayers, 0)
brd.reset()
opponent = Agent(brd, 2, [15, 15])
player = Agent(brd, 1, [0, 0])
players = [player, opponent]
# # for i in range(250):
# # opponent.move()
# # player.move()
rewards = {1: [], 2: []}
m, m1 = 0, 0
c = 0
while True:
for p in players:
# Check if game is ended
if brd.checkWin(p.ID or c > 20):
rewards[p.ID].append(winRew)
return p, rewards
p.move()
rewards[p.ID].append(moveRew)
brd = p.board
c+=1
In [7]:
lol = PlayGame()
In [8]:
plays = 100
In [9]:
wins = {1: 0, 2: 0}
rews = {1: [], 2: []}
for i in range(plays):
p, r = PlayGame()
wins[p.ID] += 1
rews[1].append(np.sum(r[1]))
rews[2].append(np.sum(r[2]))
cumrews = {1: np.cumsum(rews[1]), 2: np.cumsum(rews[2])}
In [10]:
x = np.arange(0, plays, 1)
y = map(lambda x: rews[1][x], x)
plt.plot(x, y)
Out[10]:
In [11]:
x = np.arange(0, plays, 1)
y = map(lambda x: rews[2][x], x)
plt.plot(x, y)
Out[11]:
In [107]:
class HalmaState:
""" A state of the game of Halma
"""
def __init__(self, players = 2, rewards = 0, opp = None):
self.nplayers = 2
self.rewards = 0
self.board = Board(players, rewards)
self.board.reset()
self.opp = Agent(self.board, 2, [3, 3])
self.size = len(self.board.grid)
def Clone(self):
""" Create a deep clone of this game state.
"""
st = HalmaState()
st.board = copy.deepcopy(self.board)
# st.board.grid = [self.board.grid[i][:] for i in range(len(self.board.grid))]
st.opp = copy.deepcopy(self.opp)
# print "CLONING IN PROGRESS"
return st
def DoMove(self, move):
""" Update a state by carrying out the given move.
Must update playerJustMoved.
"""
self.board = copy.deepcopy(self.opp.board)
i, j, k, l = move
if self.board.cur_turn != 1:
print "something's wrong with cur_turn (should be 1)"
self.board.move(1, i, j, k, l)
if self.board.cur_turn != 2:
print "something's wrong with cur_turn (should be 2)"
self.opp.board = copy.deepcopy(self.board)
self.opp.pieces = self.opp.findPieces(self.opp.board)
self.opp.move()
self.board = copy.deepcopy(self.opp.board)
if self.board.cur_turn != 1:
print "something's wrong with cur_turn (should be 1) - step 2"
if self.board.checkWin(1):
# print 1, "WINS"
return 1
elif self.board.checkWin(2):
# print 2, "WIINS"
return 2
else:
return None
def GetMoves(self):
""" Get all possible moves from this state.
"""
moves = {"moves": [], "jumps": []}
if self.board.checkWin(1) or self.board.checkWin(2):
return moves
self.board = copy.deepcopy(self.opp.board)
for i in range(len(self.board.grid)):
for j in range(len(self.board.grid)):
if self.board.grid[i][j] == self.board.cur_turn:
legals = self.board.getLegalComplete(i, j)
# remove places we've already traveled this turn if necessary
if legals["moves"]:
moves["moves"] += ([[i, j, k, l] for k, l in legals["moves"] if [k, l] not in self.board.this_turn_visited])
if legals["jumps"]:
moves["jumps"] += ([[i, j, k, l] for k, l in legals["jumps"] if [k, l] not in self.board.this_turn_visited])
# # Override
# if not moves["moves"] and not moves["jumps"]:
# # Find the pieces that are not in the right positions
# s = self.board.starting_positions[3 - self.board.cur_turn]
# # Find the first missing position
# missedPos = [[x,y] for x, y in s if self.board.grid[x][y] == 0]
# if missedPos:
# target = missedPos[0]
# else:
# return moves
# for i in range(len(self.board.grid)):
# if self.board.grid[i][j] == self.board.cur_turn:
# legals = self.board.getLegalComplete(i, j, positive = target)
# # print legals, self.playerJustMoved, self.board.cur_turn
# # remove places we've already traveled this turn if necessary
# if legals["moves"]:
# moves["moves"] += ([[i, j, k, l] for k, l in legals["moves"] if [k, l] not in self.board.this_turn_visited])
# if legals["jumps"]:
# moves["jumps"] += ([[i, j, k, l] for k, l in legals["jumps"] if [k, l] not in self.board.this_turn_visited])
if not moves["moves"] and not moves["jumps"]:
print "something went wrong"
return moves
def GetResult(self, player):
""" Get the game result from the viewpoint of playerjm.
"""
reward = 10
#return self.board.checkInPlace(player, reward)
# return self.board.calcTotalRew(reward)
return self.board.checkWin(player)*100 - self.board.moves[player]
def __repr__(self):
s= ""
for x in range(len(self.board.grid)):
for y in range(len(self.board.grid[x])):
s += ["[_]","[X]","[O]"][self.board.grid[x][y]]
s += "\n"
return s
In [108]:
class Node:
""" A node in the game tree. Note wins is always from the viewpoint of playerJustMoved.
Crashes if state not specified.
"""
def __init__(self, move = None, moveType = None, parent = None, state = None):
self.move = move # the move that got us to this node - "None" for the root node
self.moveType = moveType
self.parentNode = parent # "None" for the root node
self.childNodes = []
self.wins = 0
self.visits = 0
self.untriedMoves = state.GetMoves() # future child nodes
self.playerJustMoved = state.board.cur_turn # the only part of the state that the Node needs later
def UCTSelectChild(self):
""" Use the UCB1 formula to select a child node. Often a constant UCTK is applied so we have
lambda c: c.wins/c.visits + UCTK * sqrt(2*log(self.visits)/c.visits to vary the amount of
exploration versus exploitation.
"""
s = sorted(self.childNodes, key = lambda c: c.wins/c.visits + sqrt(2*log(self.visits)/c.visits))[-1]
return s
def AddChild(self, m, s, moveType = "moves"):
""" Remove m from untriedMoves and add a new child node for this move.
Return the added child node
"""
n = Node(move = m, moveType = moveType, parent = self, state = s)
self.untriedMoves[moveType].remove(m)
self.childNodes.append(n)
return n
def Update(self, result):
""" Update this node - one additional visit and result additional wins. result must be from the viewpoint of playerJustmoved.
"""
self.visits += 1
# if result > 0:
# print "booom"
self.wins += result
def __repr__(self):
return "[M:" + str(self.move) + " MT:" + str(self.moveType) + " W/V:" + str(self.wins) + "/" + str(self.visits) + " U:" + str(self.untriedMoves) + "]"
def TreeToString(self, indent):
s = self.IndentString(indent) + str(self)
for c in self.childNodes:
s += c.TreeToString(indent+1)
return s
def IndentString(self,indent):
s = "\n"
for i in range (1,indent+1):
s += "| "
return s
def ChildrenToString(self):
s = ""
for c in self.childNodes:
s += str(c) + "\n"
return s
In [120]:
def UCT(rootstate, itermax, verbose = False):
""" Conduct a UCT search for itermax iterations starting from rootstate.
Return the best move from the rootstate.
Assumes 2 alternating players (player 1 starts), with game results in the range [0.0, 1.0]."""
rootnode = Node(state = rootstate)
for i in range(itermax):
node = rootnode
state = rootstate.Clone()
winner = None
# Select
while not node.untriedMoves["moves"] and not node.untriedMoves["jumps"] and node.childNodes != []: # node is fully expanded and non-terminal
node = node.UCTSelectChild()
winner = state.DoMove(node.move)
if winner:
break
# Expand
if node.untriedMoves["moves"] or node.untriedMoves["jumps"] and not winner: # if we can expand (i.e. state/node is non-terminal)
m = choice(node.untriedMoves["moves"] + node.untriedMoves["jumps"])
winner = state.DoMove(m)
if winner:
break
mt = "moves"
if m in node.untriedMoves["jumps"]:
mt = "jumps"
node = node.AddChild(m, state, mt) # add child and descend tree
mvs = state.GetMoves()
asdf = 0
# Rollout - this can often be made orders of magnitude quicker using a state.GetRandomMove() function
while mvs["moves"] or mvs["jumps"] and not winner: # while state is non-terminal
mv = choice(mvs["moves"] + mvs["jumps"])
if mv in mvs["moves"]:
mvs["moves"].remove(mv)
elif mv in mvs["jumps"]:
mvs["jumps"].remove(mv)
state.DoMove(mv)
mvs = state.GetMoves()
# Backpropagate
while node != None: # backpropagate from the expanded node and work back to the root node
node.Update(state.GetResult(node.playerJustMoved)) # state is terminal. Update node with result from POV of node.playerJustMoved
node = node.parentNode
# if i > 2:
# return
# Output some information about the tree - can be omitted
#if (verbose): print(rootnode.TreeToString(0))
#else: print(rootnode.ChildrenToString())
if not rootnode.childNodes:
return rootnode.move
return sorted(rootnode.childNodes, key = lambda c: c.visits)[-1].move # return the move that was most visited
moveRew = -1
winRew = 100
def UCTPlayGame():
""" Play a sample game between two UCT players where each player gets a different number
of UCT iterations (= simulations = tree nodes).
"""
rewards = {1: [], 2: []}
state = HalmaState()
c = 0
while (state.GetMoves()["jumps"] != [] and state.GetMoves()["moves"] != []):
if not c%1:
print c
print str(state)
print state.board.calcTotalRew(10), state.board.cur_turn
# print(str(state))
if state.board.cur_turn == 1:
m = UCT(rootstate = state, itermax = 200, verbose = False) # play with values for itermax and verbose = True
else:
print "WE SHOULDNT SEE THIS EVER"
# m = UCT(rootstate = state, itermax = 2000, verbose = False)
# m = opponent.move()
# print("Best Move: " + str(m) + "\n")
if m:
state.DoMove(m)
if state.board.checkWin(1) or state.board.checkWin(2):
print "Hallelujah"
rewards[state.board.cur_turn].append(moveRew)
else:
break
c+=1
if state.GetResult(state.board.cur_turn):
print("Player " + str(state.board.cur_turn) + " wins!")
print str(state)
print c
rewards[state.board.cur_turn].append(winRew)
return state.board.cur_turn, rewards
elif state.GetResult(3-state.board.cur_turn):
print("Player " + str(3 - state.board.cur_turn) + " wins!")
print str(state)
print c
rewards[3-state.board.cur_turn].append(winRew)
return 3-state.board.cur_turn, rewards
else:
print "Fuck the castle"
print str(state)
print c
return 3-state.board.cur_turn, rewards
In [121]:
lol = UCTPlayGame()
In [ ]:
# Play tic-tac-toe. The first player will be always X.
# Each tic-tac-toe board is represented by a sequence of three values:
# (set of squares that have an X, set of squares that have a y, board's width)
import random
import os
import string
def TicTacToe(X, O, width=3):
"""Play a tic-tac-toe game between the two given functions. After each
turn, yield the new board.
Each function should get a tic-tac-toe board and a char - 'X' or 'O',
that represents the current turn, and return the number of
square where it wants to put a sign.
width is the board's width and length - it's 3 as default.
X, O -- functions
width -- natural number
"""
board = (set(), set(), width)
turn = 'X'
while result(board) == False:
if turn == 'X':
board[0].add(X(board, turn))
else:
board[1].add(O(board, turn))
yield board
turn = list({'X', 'O'} - set([turn]))[0]
def displayTicTacToe(X, O, width=3):
"""Play a tic-tac-toe game (see TicTacToe's docstring for explanation) and
display the new board to the user when a player plays, and the result of
the game after its end.
X, O - functions
width - natural number"""
for board in TicTacToe(X, O, width):
os.system('cls' if os.name == 'nt' else 'clear') # clearscreen
print str_board(board)
winner = result(board)
if winner in {'X', 'O'}: print winner + ' won!'
elif winner == None: print 'Tie!'
else: raise ValueError("The game didn't end")
def result(board):
"""Return 'X' if X won in the given board, 'O' if O won, None if the game
ended with a tie, False if the game didn't end yet, and raise an exception
if it looks like X and O won both (the board cannot be reached using a
legal game)."""
x_squares, o_squares, width = board
rows = [{width*row+col+1 for col in range(width)} for row in range(width)]
cols = [{width*row+col+1 for row in range(width)} for col in range(width)]
diagonals = [{width*i+i+1 for i in range(width)},
{width*i+width-i for i in range(width)}]
lines = rows + cols + diagonals
x_won = any(line.issubset(x_squares) for line in lines)
o_won = any(line.issubset(o_squares) for line in lines)
if x_won:
if o_won:
raise ValueError("Illegal board")
return 'X'
if o_won:
return 'O'
if x_squares | o_squares == set(range(1, width**2+1)):
# Nobody won, but the board is full
return None # Tie
return False
def str_board(board):
"""Return the board in a string representation, to print it."""
return_str = ''
x_squares, o_squares, width = board
for row in range(width):
for col in range(width):
square = width*row+col+1
return_str += 'X' if square in x_squares else 'O' if square in \
o_squares else ' '
if col != width-1: return_str += ' | '
if row != width-1: return_str += '\n'+'--+-'*(width-1)+'-\n'
return return_str
def human_player(board, turn):
"""Display the board to the user and ask him where does he want to put a
sign. Return the square."""
x_squares, o_squares, width = board
os.system('cls' if os.name == 'nt' else 'clear') # clear screen
print str_board(board)
while True:
try:
square = int(raw_input('Where do you want to add ' + turn + '? '))
assert 0 < square <= width**2 and \
square not in x_squares | o_squares
return square # this will happen if there were no exceptions
except:
print ('You should write an integer between 1 and '+str(width**2)+
', that represents a blank square.')
def minimax_player(board, turn):
"""Return a square where it's worthwhile to play according to the minimax
algorithm."""
return minimax_best_square(board, turn)[0]
def minimax_score_board(board, turn):
"""Return 1, 0 or -1 according to the minimax algorithm -- 1 if the player
that has the given turn has a winning strategy, 0 if he doesn't have a
winning strategy but he has a tie strategy, and -1 if he will lose anyway
(assuming his opponent is playing a perfect game)."""
if result(board) == turn:
return 1
if result(board) == None:
return 0
if result(board) != False:
return -1
return minimax_best_square(board, turn)[1]
def minimax_best_square(board, turn):
"""Choose a square where it's worthwhile to play in the given board and
turn, and return a tuple of the square's number and it's score according
to the minimax algorithm."""
x_squares, o_squares, width = board
max_score = -2
opponent = list({'X', 'O'} - set([turn]))[0]
squares = list(set(range(1, width**2+1)) - (x_squares | o_squares))
random.shuffle(squares)
for square in squares:
# Iterate over the blank squares, to get the best square to play
new_board = (x_squares | set([square] if turn=='X' else []),) + \
(o_squares | set([square] if turn=='O' else []), width)
score = -minimax_score_board(new_board, opponent)
if score == 1: return (square, 1)
if score > max_score:
max_score, max_square = score, square
return (max_square, max_score)
displayTicTacToe(X = minimax_player, O = human_player, width = 3)
raw_input()
In [41]:
lol = UCTPlayGame()
In [147]:
print lol
In [148]:
plays= 100
In [150]:
UCTwins = {1: 0, 2: 0}
UCTrews = {1: [], 2: []}
for i in range(plays):
print i
p, r = UCTPlayGame()
UCTwins[p] += 1
UCTrews[1].append(np.sum(r[1]))
UCTrews[2].append(np.sum(r[2]))
UCTcumrews = {1: np.cumsum(UCTrews[1]), 2: np.cumsum(UCTrews[2])}
In [152]:
x = np.arange(0, plays, 1)
y = map(lambda x: UCTrews[1][x], x)
plt.plot(x, y)
Out[152]:
In [153]:
x = np.arange(0, plays, 1)
y = map(lambda x: UCTrews[2][x], x)
plt.plot(x, y)
Out[153]:
In [ ]:
In [ ]: