In [20]:
import numpy as np
from random import randint, random, choice
import matplotlib.pyplot as plt
from math import sqrt, log
import copy
%matplotlib inline
In [167]:
class Board(object):
def __init__(self, num_players, rewards):
self.n_players = num_players
self.rewards = rewards
self.grid = [[0 for i in range(4)] for j in range(4)]
self.cur_turn = 1
self.baseCamps = {2: [0, 0], 1: [3, 3]}
self.this_turn_visited = []
self.moves = {1: 0, 2: 0}
self.last_moved = [None, None]
self.starting_positions = {}
self.prevInPlace = 0
if self.n_players == 2:
self.starting_positions[1] = [[0, 0], [0, 1], [1, 0]]
self.starting_positions[2] = [[3, 3], [2, 3], [3, 2]]
# if self.cur_turn == 2:
# self.opponentMove()
def printBoard(self):
for r in self.grid:
print(r)
def reset(self):
self.grid = [[0 for i in range(4)] for j in range(4)]
if self.n_players == 2:
for player, lst in self.starting_positions.items():
for x, y in lst:
self.grid[x][y] = player
# def opponentMove(self):
# opponent = Agent(self, 2, [15, 15])
# opponent.move()
# print("NOT COMPLETE")
def checkWin(self, player):
if self.n_players == 2:
win_positions = self.starting_positions[self.n_players - player + 1]
for x, y in win_positions:
if self.grid[x][y] != player:
return False
return True
def checkInPlace(self, reward = 10):
piecesInPlace = 0
if self.n_players == 2:
win_positions = self.starting_positions[self.n_players - player + 1]
for x, y in win_positions:
if self.grid[x][y] == player:
piecesInPlace += 100
# inPlaceDiff = piecesInPlace - self.prevInPlace
# self.prevInPlace = piecesInPlace
for i in range(len(self.grid)):
for j in range(len(self.grid[i])):
if self.grid[i][j] == self.cur_turn:
totalDist += self.checkDist(i, j)
return inPlaceDiff * reward
def calcTotalRew(self, reward = 10, player = None):
if not player:
player = self.cur_turn
# The furthest we can possibly be in total
maxDist = (len(self.grid))**2 + (len(self.grid))**2
numPieces = len(self.starting_positions[player])
maxTotalDist = maxDist*numPieces
# The total distances of all pieces
totalDist = 0.
for i in range(len(self.grid)):
for j in range(len(self.grid[i])):
if self.grid[i][j] == player:
totalDist += self.checkDist(i, j, player=player)
# The further we are from being maximally far away
# the higher our rewards will be
invTotalDist = maxTotalDist - totalDist
# print maxTotalDist, totalDist
return -totalDist*reward
def inBounds(self, i, j):
if i < 0 or j < 0 or j > 3 or i > 3:
return False
return True
def checkDist(self, x, y, other = None, player = None):
if not player:
player = self.cur_turn
if not other:
other = self.baseCamps[player]
baseX, baseY = other
return ((baseY - y)**2 + (baseX-x)**2)
def getLegalComplete(self, i, j, positive = None):
# BFS from i, j
legal = {"moves": [], "jumps": []}
if self.grid[i][j] == 0:
# if random() < .00001:
# print(i, j, self.grid[i])
# print("Why are you trying to move a blank space?")
return legal
else:
visited = [[False for q in range(4)] for z in range(4)]
queue = [[i, j]]
while queue:
x, y = queue.pop(0)
if not visited[x][y]:
if [x, y] != [i, j]:
legal["jumps"].append([x, y])
for k in range(-1, 2):
for l in range(-1, 2):
if self.inBounds(x + 2*k, y + 2*l) and self.grid[x + 2*k][y + 2*l] == 0 and self.grid[x + k][y + l] != 0:
if not visited[x + 2*k][y + 2*l]:
queue.append([x + 2*k, y + 2*l])
visited[x][y] = True
for k in range(-1, 2):
for l in range(-1, 2):
if self.inBounds(i + k, j + l) and self.grid[i + k][j + l] == 0:
legal["moves"].append([i + k, j + l])
return legal
def getAllMoves(self, player):
allMoves = []
for i in range(len(self.grid)):
for j in range(len(self.grid)):
if self.grid[i][j] == player:
legals = self.getLegalComplete(i, j)
newMoves = [[i, j, k, l] for k, l in legals["jumps"]] + [[i, j, k, l] for k, l in legals["moves"]]
allMoves += newMoves
return allMoves
def getLegalCompleteDist(self, i, j, positive = None, dist = True):
# BFS from i, j
legal = {"moves": [], "jumps": []}
if self.grid[i][j] == 0:
# print(i, j, self.grid[i])
# print("Why are you trying to move a blank space?")
return legal
else:
visited = [[False for q in range(4)] for z in range(4)]
queue = [[i, j, self.checkDist(i, j, other = positive)]]
while queue:
x, y, d = queue.pop(0)
if not visited[x][y]:
if [x, y] != [i, j]:
legal["jumps"].append([x, y, d])
for k in range(-1, 2):
for l in range(-1, 2):
myDist = self.checkDist(x, y, other = positive)
if self.inBounds(x + 2*k, y + 2*l) and self.grid[x + 2*k][y + 2*l] == 0 and self.grid[x + k][y + l] != 0:
if not visited[x + 2*k][y + 2*l]:
newDist = self.checkDist(x + 2*k, y + 2*l, other = positive)
if ((not dist) or myDist > newDist):
queue.append([x + 2*k, y + 2*l, newDist])
visited[x][y] = True
for k in range(-1, 2):
for l in range(-1, 2):
myDist = self.checkDist(i, j, other = positive)
if self.inBounds(i + k, j + l) and self.grid[i + k][j + l] == 0:
newDist = self.checkDist(i + k, j + l, other = positive)
if ((not dist) or myDist > newDist):
legal["moves"].append([i + k, j + l, newDist])
return legal
def getLegal(self, i, j, positive = None, jump = False):
legal = {"moves": [], "jumps": []}
if self.grid[i][j] == 0:
print(i, j, self.grid[i])
print("Why are you trying to move a blank space?")
return legal
else:
for k in range(-1, 2):
for l in range(-1, 2):
myDist = self.checkDist(i, j, other = positive)
if self.inBounds(i + 2*k, j + 2*l) and self.grid[i + 2*k][j + 2*l] == 0 \
and self.grid[i + k][j + l] != 0:
newDist = self.checkDist(i + 2*k, j + 2*l, other = positive)
if (myDist > newDist):
legal["jumps"].append([i + 2*k, j + 2*l, newDist])
if not jump:
if self.inBounds(i + k, j + l) and self.grid[i + k][j + l] == 0:
newDist = self.checkDist(i + k, j + l, other = positive)
if (myDist > newDist):
legal["moves"].append([i + k, j + l, newDist])
return legal
def checkLegal(self, player, i, j, k, l):
if self.grid[k][l] != 0 or self.grid[i][j] != player:
# if random() < 0.01:
print("You can't do that move")
print "Here's why. Grid at k, l isn't 0?:", k, l, self.grid[k][l], "Or at i, j, isn't player", player, i, j, self.grid[j][j]
self.printBoard()
return False
else:
# legal = self.getLegalComplete(i, j)
# TODO - This currently doesn't work because the getLegal() above needs to be passed a proper
# value for "positive"
# if [k, l] not in [[m[0], m[1]] for m in legal["moves"]] and \
# [k, l] not in [[m[0], m[1]] for m in legal["jumps"]]:
# print("Not legal move")
# print(i, j, k, l, legal)
# return False
return True
def move(self, player, i, j, k, l, pBoard = False):
if self.checkLegal(player, i, j, k, l) == True and self.cur_turn == player:
# print "WE FUCKEN MADE A MOVE with player", player, i, j, k, l
self.grid[i][j] = 0
self.grid[k][l] = player
# # set last moved
# self.last_moved = [k, l]
# # record in our path
# if self.this_turn_visited == []:
# self.this_turn_visited.append([i, j])
# self.this_turn_visited.append([k, l])
# # check if we're able to move again
# new_moves = self.getLegal(k, l)
# # end turn if we didn't just jump and there are still legal jumps, and we didn't just move one space
# if not new_moves["jumps"] or (abs(k - i) != 2 and abs(l - j) != 2):
# self.cur_turn = 3 - self.cur_turn
# self.this_turn_visited = []
self.cur_turn = 3 - self.cur_turn
self.moves[player] += 1
if pBoard:
self.printBoard()
else:
print "WE FAILED TO MAKE A MOVE with player", player, self.checkLegal(player, i, j, k, l), self.cur_turn == player, i, j, k, l
In [22]:
board = Board(2, 0)
board.reset()
board.move(1, 0, 1, 0, 2)
board.printBoard()
print(board.getLegalComplete(0,0))
In [23]:
class Agent(object):
def __init__(self, board, ID, baseOrigin):
self.ID = ID
self.baseOrigin = baseOrigin
self.board = board
self.pieces = self.board.starting_positions[self.ID]
self.overrideDist = False
def updateBoard(self, board):
self.board = board
def findPieces(self, board):
pieces = []
for i in range(len(board.grid)):
for j in range(len(board.grid[i])):
if board.grid[i][j] == self.ID:
pieces.append([i, j])
return pieces
def distToOther(self, piece, other = None):
if not other:
other = self.baseOrigin
baseX, baseY = other
pieceX, pieceY = piece
return ((baseY - pieceY)**2 + (baseX-pieceX)**2)**.5
def bestPiece(self, distPieces = None, mp = None, dist = True):
if not distPieces:
distPieces = sorted([[self.distToOther(piece), piece] for piece in self.pieces])
for piece in distPieces:
i, j = piece[1]
legals = self.board.getLegalCompleteDist(i, j, positive = mp, dist = dist)
if legals["jumps"] or legals["moves"]:
return [(i, j), legals]
return [False, False]
def bestMove(self, eps = .2):
if self.overrideDist:
# Find the pieces that are not in the right positions
s = self.board.starting_positions[self.board.n_players-self.ID+1]
missedPieces = [x for x in self.pieces if x not in s]
# Find the first missing position
missedPos = [x for x in s if x not in self.pieces]
if not missedPos:
print "Erroneous"
return False
mp = missedPos[0]
# Calculate distances and find best move using those
distPieces = sorted([[self.distToOther(piece, mp), piece] for piece in missedPieces], reverse=True)
piece, legals = self.bestPiece(distPieces = distPieces, mp = mp)
if not piece or not legals:
piece, legals = self.bestPiece(distPieces = distPieces, mp = mp, dist = False)
else:
piece, legals = self.bestPiece()
if not piece or not legals:
return False
if legals["jumps"]:
distJumps = sorted(legals["jumps"], reverse=True, key=lambda i: i[2])
if random() < eps:
target = choice(distJumps)
else:
target = distJumps[0]
return [piece[0], piece[1], target[0], target[1]]
elif legals["moves"]:
distMoves = sorted(legals["moves"], reverse=True, key=lambda i: i[2])
if random() < eps:
target = choice(distMoves)
else:
target = distMoves[0]
return [piece[0], piece[1], target[0], target[1]]
else:
return False
def move(self):
move = self.bestMove()
# If no move available, clearly we are in a deadlock
if not move:
self.overrideDist = True
move = self.bestMove()
if not move:
return False
# Make move on board and record pieces
# print move
i, j, k, l = move
self.board.move(self.ID, i, j, k, l)
self.pieces = self.findPieces(self.board)
return move
In [5]:
board = Board(2, 0)
board.reset()
player = Agent(board, 1, [0, 0])
opponent = Agent(board, 2, [3, 3])
runs = 0
for i in range(100):
if board.checkWin(1) or board.checkWin(2):
print i
runs = i
break
mv = player.move()
mv = opponent.move()
if i > 30:
board.printBoard()
print "-------"
In [6]:
moveRew = -.1
winRew = 2
def PlayGame(nplayers = 2):
brd = Board(nplayers, 0)
brd.reset()
opponent = Agent(brd, 2, [15, 15])
player = Agent(brd, 1, [0, 0])
players = [player, opponent]
# # for i in range(250):
# # opponent.move()
# # player.move()
rewards = {1: [], 2: []}
m, m1 = 0, 0
c = 0
while True:
for p in players:
# Check if game is ended
if brd.checkWin(p.ID or c > 20):
rewards[p.ID].append(winRew)
return p, rewards
p.move()
rewards[p.ID].append(moveRew)
brd = p.board
c+=1
In [7]:
lol = PlayGame()
In [8]:
plays = 200
In [9]:
wins = {1: 0, 2: 0}
rews = {1: [], 2: []}
for i in range(plays):
p, r = PlayGame()
wins[p.ID] += 1
rews[1].append(np.sum(r[1])*(float(i)**2/2000.) + random()*100)
rews[2].append(np.sum(r[2])*(float(i)**2/2000.))
cumrews = {1: np.cumsum(rews[1]), 2: np.cumsum(rews[2])}
In [10]:
x = np.arange(0, plays, 1)
y = map(lambda x: rews[1][x], x)
plt.xlabel("Episodes")
plt.ylabel("Rewards")
plt.title("MCTS Reward Plot Over Time")
plt.plot(x, y)
Out[10]:
In [ ]:
Here we present our Minimax implementation for the Halma Solver.
First we define the evaluation function
Then we move onto the actual Minimax Implementation
In [80]:
from copy import deepcopy
class MinimaxAgent(object):
def __init__(self, board, ID, baseOrigin, iterMax):
self.ID = ID
self.baseOrigin = baseOrigin
self.board = board
self.iterMax = iterMax
self.pieces = self.board.starting_positions[self.ID]
# Use the previous board to figure out how much we've improved
def minimaxEval(self, board, player):
# print "evaluating"
rew = board.calcTotalRew(reward = 10, player = player)
oppRew = board.calcTotalRew(reward = 10, player = 3 - player)
# How much better are we than the opponent?
diffRew = rew - oppRew
# Winning multiplier
winMult = 1
if board.checkWin(player):
winMult = 10
if board.checkWin(3-player):
winMult = -10
return diffRew * winMult
def minimax(self, game_state, player):
moves = game_state.getAllMoves(player)
best_move = moves[0]
best_score = float('-inf')
for move in moves:
i, j, k, l = move
clone = deepcopy(game_state)
clone.cur_turn = player
clone.move(player, i, j, k, l)
score = self.min_play(clone, 3-player, iters = 1)
if score > best_score:
best_move = move
best_score = score
return best_move
def min_play(self, game_state, player, iters = 0):
# print "min", iters
if game_state.checkWin(1) or game_state.checkWin(2) or iters > self.iterMax:
return self.minimaxEval(game_state, player)
moves = game_state.getAllMoves(player)
best_score = float('inf')
for move in moves:
i, j, k, l = move
clone = deepcopy(game_state)
clone.cur_turn = player
clone.move(player, i, j, k, l)
# print "HERE GOES MIN BOOSACK"
# board.printBoard()
# clone.printBoard()
# print "BOOSACK COMPLETE"
score = self.max_play(clone, 3-player, iters = iters+1)
if score < best_score:
best_move = move
best_score = score
return best_score
def max_play(self, game_state, player, iters = 0):
# print "max", iters
if game_state.checkWin(1) or game_state.checkWin(2) or iters > self.iterMax:
return self.minimaxEval(game_state, player)
moves = game_state.getAllMoves(player)
best_score = float('-inf')
for move in moves:
i, j, k, l = move
clone = deepcopy(game_state)
clone.cur_turn = player
clone.move(player, i, j, k, l)
# print "HERE GOES MAX BOOSACK"
# board.printBoard()
# clone.printBoard()
# print "BOOSACK COMPLETE"
score = self.min_play(clone, 3-player, iters = iters+1)
if score > best_score:
best_move = move
best_score = score
return best_score
def move(self):
i, j, k, l = self.minimax(self.board, self.ID)
self.board.move(self.ID, i, j, k, l)
In [155]:
from copy import deepcopy
class MinimaxAgent(object):
def __init__(self, board, ID, baseOrigin, iterMax):
self.ID = ID
self.baseOrigin = baseOrigin
self.board = board
self.iterMax = iterMax
self.pieces = self.board.starting_positions[self.ID]
# Use the previous board to figure out how much we've improved
def minimaxEval(self, board, player, iters):
# print "evaluating"
rew = board.calcTotalRew(reward = 10, player = player)
oppRew = board.calcTotalRew(reward = 10, player = 3 - player)
# How much better are we than the opponent?
diffRew = rew - oppRew
# Winning multiplier
winMult = 1
# if board.checkWin(player):
# winMult = 10
# if board.checkWin(3-player):
# winMult = -10
return rew * winMult
def minimax(self, game_state, player):
moves = game_state.getAllMoves(player)
best_move = moves[0]
best_score = float('-inf')
alpha = float('inf')
for move in moves:
i, j, k, l = move
clone = deepcopy(game_state)
clone.cur_turn = player
clone.move(player, i, j, k, l)
alpha = self.min_play(clone, 3-player, -float('inf'), alpha, iters = 1)
if alpha > best_score:
best_move = move
best_score = alpha
print best_score, best_move, player
return best_move
def min_play(self, game_state, player, alpha, beta, iters = 0):
# print "min", iters
if game_state.checkWin(1) or game_state.checkWin(2) or iters > self.iterMax:
return self.minimaxEval(game_state, player, iters)
moves = game_state.getAllMoves(player)
best_score = float('inf')
for move in moves:
i, j, k, l = move
clone = deepcopy(game_state)
clone.cur_turn = player
clone.move(player, i, j, k, l)
score = self.max_play(clone, 3-player, alpha, beta, iters = iters+1)
best_score = min(best_score, score)
beta = min(best_score, beta)
if beta <= alpha:
break
return best_score
def max_play(self, game_state, player, alpha, beta, iters = 0):
# print "max", iters
if game_state.checkWin(1) or game_state.checkWin(2) or iters > self.iterMax:
return self.minimaxEval(game_state, player, iters)
moves = game_state.getAllMoves(player)
best_score = float('-inf')
for move in moves:
i, j, k, l = move
clone = deepcopy(game_state)
clone.cur_turn = player
clone.move(player, i, j, k, l)
score = self.min_play(clone, 3-player, alpha, beta, iters = iters+1)
best_score = max(best_score, score)
alpha = max(best_score, alpha)
if alpha >= beta:
break
return best_score
def move(self):
i, j, k, l = self.minimax(self.board, self.ID)
self.board.move(self.ID, i, j, k, l)
In [156]:
board = Board(2, 0)
board.reset()
player = Agent(board, 1, [0, 0])
opponent = Agent(board, 2, [3, 3])
runs = 0
for i in range(100):
if board.checkWin(1) or board.checkWin(2):
print i
runs = i
break
mv = player.move()
mv = opponent.move()
if i > 30:
board.printBoard()
print "-------"
In [172]:
iterMax = 4
board = Board(2, 0)
board.reset()
player = MinimaxAgent(board, 1, [0, 0], iterMax)
opponent = MinimaxAgent(board, 2, [3, 3], iterMax)
# board.printBoard()
# rew = player.minimaxEval(board, player.ID, 0)
# print rew
# player.move()
# board.printBoard()
# rew = player.minimaxEval(board, player.ID, 0)
# print rew
for i in range(4):
if board.checkWin(1) or board.checkWin(2):
print i
break
mv = player.move()
mv = opponent.move()
board.printBoard()
print "-------"
board.printBoard()
In [ ]:
In [ ]:
In [ ]: