In [20]:
import numpy as np
from random import randint, random, choice
import matplotlib.pyplot as plt
from math import sqrt, log
import copy
%matplotlib inline

In [167]:
class Board(object):
    def __init__(self, num_players, rewards):
        self.n_players = num_players
        self.rewards = rewards
        self.grid = [[0 for i in range(4)] for j in range(4)]
        self.cur_turn = 1
        self.baseCamps = {2: [0, 0], 1: [3, 3]}
        self.this_turn_visited = []
        self.moves = {1: 0, 2: 0}
        self.last_moved = [None, None]
        self.starting_positions = {}
        self.prevInPlace = 0
        if self.n_players == 2: 
            self.starting_positions[1] = [[0, 0], [0, 1], [1, 0]] 
            self.starting_positions[2] = [[3, 3], [2, 3], [3, 2]] 
#         if self.cur_turn == 2:
#             self.opponentMove()
        
    def printBoard(self):
        for r in self.grid:
            print(r)
            
    def reset(self):
        self.grid = [[0 for i in range(4)] for j in range(4)]
        if self.n_players == 2: 
            for player, lst in self.starting_positions.items():
                 for x, y in lst:
                        self.grid[x][y] = player
    
#     def opponentMove(self):
#         opponent = Agent(self, 2, [15, 15])
#         opponent.move()
#         print("NOT COMPLETE")
    
    def checkWin(self, player):
        if self.n_players == 2:
            win_positions = self.starting_positions[self.n_players - player + 1]            
            for x, y in win_positions:
                if self.grid[x][y] != player:
                    return False
            return True
    
    def checkInPlace(self, reward = 10):
        piecesInPlace = 0
        if self.n_players == 2:
            win_positions = self.starting_positions[self.n_players - player + 1]            
            for x, y in win_positions:
                if self.grid[x][y] == player:
                    piecesInPlace += 100
#             inPlaceDiff = piecesInPlace - self.prevInPlace
#             self.prevInPlace = piecesInPlace
            for i in range(len(self.grid)):
                for j in range(len(self.grid[i])):
                    if self.grid[i][j] == self.cur_turn:
                        totalDist += self.checkDist(i, j)
            return inPlaceDiff * reward
    
    def calcTotalRew(self, reward = 10, player = None):
        if not player:
            player = self.cur_turn
        
        # The furthest we can possibly be in total
        maxDist = (len(self.grid))**2 + (len(self.grid))**2
        numPieces = len(self.starting_positions[player])
        maxTotalDist = maxDist*numPieces
        
        # The total distances of all pieces
        totalDist = 0.
        for i in range(len(self.grid)):
            for j in range(len(self.grid[i])):
                if self.grid[i][j] == player:
                    totalDist += self.checkDist(i, j, player=player)
        
        # The further we are from being maximally far away
        # the higher our rewards will be
        invTotalDist = maxTotalDist - totalDist
#         print maxTotalDist, totalDist
        return -totalDist*reward
                        
    def inBounds(self, i, j):
        if i < 0 or j < 0 or j > 3 or i > 3:
            return False
        return True
    
    def checkDist(self, x, y, other = None, player = None):
        if not player:
            player = self.cur_turn
        if not other:
            other = self.baseCamps[player]

        baseX, baseY = other
        return ((baseY - y)**2 + (baseX-x)**2)
    
    def getLegalComplete(self, i, j, positive = None):
        # BFS from i, j
        legal = {"moves": [], "jumps": []}
        if self.grid[i][j] == 0:
#             if random() < .00001:
#                 print(i, j, self.grid[i])
#                 print("Why are you trying to move a blank space?")
            return legal
        else:
            visited = [[False for q in range(4)] for z in range(4)]
            queue = [[i, j]]
            while queue:
                x, y = queue.pop(0)
                if not visited[x][y]:
                    if [x, y] != [i, j]:
                        legal["jumps"].append([x, y])
                    for k in range(-1, 2):
                        for l in range(-1, 2):
                            if self.inBounds(x + 2*k, y + 2*l) and self.grid[x + 2*k][y + 2*l] == 0 and self.grid[x + k][y + l] != 0:        
                                    if not visited[x + 2*k][y + 2*l]: 
                                        queue.append([x + 2*k, y + 2*l])
                    visited[x][y] = True        

            for k in range(-1, 2):
                    for l in range(-1, 2):                    
                        if self.inBounds(i + k, j + l) and self.grid[i + k][j + l] == 0:
                            legal["moves"].append([i + k, j + l])
            return legal
    
    def getAllMoves(self, player):
        allMoves = []
        for i in range(len(self.grid)):
            for j in range(len(self.grid)):
                if self.grid[i][j] == player:
                    legals = self.getLegalComplete(i, j)
                    newMoves = [[i, j, k, l] for k, l in legals["jumps"]] + [[i, j, k, l] for k, l in legals["moves"]]
                    allMoves += newMoves
        return allMoves

    def getLegalCompleteDist(self, i, j, positive = None, dist = True):
        # BFS from i, j
        legal = {"moves": [], "jumps": []}
        if self.grid[i][j] == 0:
#             print(i, j, self.grid[i])
#             print("Why are you trying to move a blank space?")
            return legal
        else:
            visited = [[False for q in range(4)] for z in range(4)]
            queue = [[i, j, self.checkDist(i, j, other = positive)]]
            while queue:
                x, y, d = queue.pop(0)
                if not visited[x][y]:
                    if [x, y] != [i, j]:
                        legal["jumps"].append([x, y, d])
                    for k in range(-1, 2):
                        for l in range(-1, 2):
                            myDist = self.checkDist(x, y, other = positive)
                            if self.inBounds(x + 2*k, y + 2*l) and self.grid[x + 2*k][y + 2*l] == 0 and self.grid[x + k][y + l] != 0:        
                                    if not visited[x + 2*k][y + 2*l]: 
                                        newDist = self.checkDist(x + 2*k, y + 2*l, other = positive)
                                        if ((not dist) or myDist > newDist):
                                            queue.append([x + 2*k, y + 2*l, newDist])
                    visited[x][y] = True        

            for k in range(-1, 2):
                    for l in range(-1, 2):  
                        myDist = self.checkDist(i, j, other = positive)
                        if self.inBounds(i + k, j + l) and self.grid[i + k][j + l] == 0:
                            newDist = self.checkDist(i + k, j + l, other = positive)
                            if ((not dist) or myDist > newDist):
                                legal["moves"].append([i + k, j + l, newDist])
            return legal
            
    def getLegal(self, i, j, positive = None, jump = False):
        legal = {"moves": [], "jumps": []}
        if self.grid[i][j] == 0:
            print(i, j, self.grid[i])
            print("Why are you trying to move a blank space?")
            return legal
        else:
            for k in range(-1, 2):
                for l in range(-1, 2):
                    myDist = self.checkDist(i, j, other = positive)
                    if self.inBounds(i + 2*k, j + 2*l) and self.grid[i + 2*k][j + 2*l] == 0 \
                    and self.grid[i + k][j + l] != 0:
                        newDist = self.checkDist(i + 2*k, j + 2*l, other = positive)
                        if (myDist > newDist):
                            legal["jumps"].append([i + 2*k, j + 2*l, newDist])
                    if not jump:
                        if self.inBounds(i + k, j + l) and self.grid[i + k][j + l] == 0:
                            newDist = self.checkDist(i + k, j + l, other = positive)
                            if (myDist > newDist):
                                legal["moves"].append([i + k, j + l, newDist])
            return legal
        
            
    def checkLegal(self, player, i, j, k, l):
        if self.grid[k][l] != 0 or self.grid[i][j] != player:
#             if random() < 0.01:
            print("You can't do that move")
            print "Here's why. Grid at k, l isn't 0?:", k, l, self.grid[k][l], "Or at i, j, isn't player", player, i, j, self.grid[j][j]
            self.printBoard()
            return False
        else:
#             legal = self.getLegalComplete(i, j)
            # TODO - This currently doesn't work because the getLegal() above needs to be passed a proper 
            # value for "positive"
#             if [k, l] not in [[m[0], m[1]] for m in legal["moves"]] and \
#                 [k, l] not in [[m[0], m[1]] for m in legal["jumps"]]:
#                 print("Not legal move")
#                 print(i, j, k, l, legal)
#                 return False
            return True
            
    def move(self, player, i, j, k, l, pBoard = False):
        if self.checkLegal(player, i, j, k, l) == True and self.cur_turn == player:
#             print "WE FUCKEN MADE A MOVE with player", player, i, j, k, l
            self.grid[i][j] = 0
            self.grid[k][l] = player
            
#             # set last moved
#             self.last_moved = [k, l]
            
#             # record in our path
#             if self.this_turn_visited == []:
#                 self.this_turn_visited.append([i, j])
#             self.this_turn_visited.append([k, l])
            
#             # check if we're able to move again
#             new_moves = self.getLegal(k, l)
            
#             # end turn if we didn't just jump and there are still legal jumps, and we didn't just move one space
#             if not new_moves["jumps"] or (abs(k - i) != 2 and abs(l - j) != 2):
#                 self.cur_turn = 3 - self.cur_turn
#                 self.this_turn_visited = []
            self.cur_turn = 3 - self.cur_turn
            self.moves[player] += 1
            if pBoard:
                self.printBoard()
        else:
            print "WE FAILED TO MAKE A MOVE with player", player, self.checkLegal(player, i, j, k, l), self.cur_turn == player, i, j, k, l

In [22]:
board = Board(2, 0)
board.reset()
board.move(1, 0, 1, 0, 2)
board.printBoard()
print(board.getLegalComplete(0,0))


[1, 0, 1, 0]
[1, 0, 0, 0]
[0, 0, 0, 2]
[0, 0, 2, 2]
{'jumps': [[2, 0]], 'moves': [[0, 1], [1, 1]]}

In [23]:
class Agent(object):
    def __init__(self, board, ID, baseOrigin):
        self.ID = ID
        self.baseOrigin = baseOrigin
        self.board = board
        self.pieces = self.board.starting_positions[self.ID]
        self.overrideDist = False
    
    def updateBoard(self, board):
        self.board = board
    
    def findPieces(self, board):
        pieces = []
        for i in range(len(board.grid)):
            for j in range(len(board.grid[i])):
                if board.grid[i][j] == self.ID:
                    pieces.append([i, j])
        return pieces
    
    def distToOther(self, piece, other = None):
        if not other:
            other = self.baseOrigin
        baseX, baseY = other
        pieceX, pieceY = piece
        return ((baseY - pieceY)**2 + (baseX-pieceX)**2)**.5
    
    def bestPiece(self, distPieces = None, mp = None, dist = True): 
        if not distPieces:
            distPieces = sorted([[self.distToOther(piece), piece] for piece in self.pieces])

        for piece in distPieces:
            i, j = piece[1]
            legals = self.board.getLegalCompleteDist(i, j, positive = mp, dist = dist)
            if legals["jumps"] or legals["moves"]:
                return [(i, j), legals]
        return [False, False]
    
    def bestMove(self, eps = .2):            
        if self.overrideDist:
            # Find the pieces that are not in the right positions
            s = self.board.starting_positions[self.board.n_players-self.ID+1]
            missedPieces = [x for x in self.pieces if x not in s]
            
            # Find the first missing position
            missedPos = [x for x in s if x not in self.pieces]
            if not missedPos:
                print "Erroneous"
                return False
            mp = missedPos[0]
            
            # Calculate distances and find best move using those
            distPieces = sorted([[self.distToOther(piece, mp), piece] for piece in missedPieces], reverse=True)         
            piece, legals = self.bestPiece(distPieces = distPieces, mp = mp)
            if not piece or not legals:
                piece, legals = self.bestPiece(distPieces = distPieces, mp = mp, dist = False)            
        else:
            piece, legals = self.bestPiece()
        
        if not piece or not legals:
            return False
        
        if legals["jumps"]:
            distJumps = sorted(legals["jumps"], reverse=True, key=lambda i: i[2])
            if random() < eps:
                target = choice(distJumps)
            else:
                target = distJumps[0]
            return [piece[0], piece[1], target[0], target[1]]

        elif legals["moves"]:
            distMoves = sorted(legals["moves"], reverse=True, key=lambda i: i[2])
            if random() < eps:
                target = choice(distMoves)
            else:
                target = distMoves[0]
            return [piece[0], piece[1], target[0], target[1]]
        
        else:
            return False
    
    def move(self):
        move = self.bestMove()
        
        # If no move available, clearly we are in a deadlock
        if not move:
            self.overrideDist = True
            move = self.bestMove()
            if not move:
                return False
        
        # Make move on board and record pieces
#         print move
        i, j, k, l = move
        self.board.move(self.ID, i, j, k, l)
        self.pieces = self.findPieces(self.board)
        
        return move

In [5]:
board = Board(2, 0)
board.reset()
player = Agent(board, 1, [0, 0])
opponent = Agent(board, 2, [3, 3])
runs = 0
for i in range(100):
    if board.checkWin(1) or board.checkWin(2):
        print i
        runs = i
        break
    mv = player.move()

    mv = opponent.move()

    if i > 30:
        board.printBoard()
        print "-------"


10

In [6]:
moveRew = -.1
winRew = 2
def PlayGame(nplayers = 2):
    brd = Board(nplayers, 0)
    brd.reset()
    opponent = Agent(brd, 2, [15, 15])
    player = Agent(brd, 1, [0, 0])
    players = [player, opponent]
    
# #     for i in range(250):
# #         opponent.move()
# #         player.move()

    rewards = {1: [], 2: []}
    m, m1 = 0, 0
    c = 0
    while True:
        for p in players:
            # Check if game is ended
            if brd.checkWin(p.ID or c > 20):
                rewards[p.ID].append(winRew)
                return p, rewards
            p.move()
            rewards[p.ID].append(moveRew)
            brd = p.board
        c+=1

In [7]:
lol = PlayGame()

In [8]:
plays = 200

In [9]:
wins = {1: 0, 2: 0}
rews = {1: [], 2: []}
for i in range(plays):
    p, r = PlayGame()
    wins[p.ID] += 1
    rews[1].append(np.sum(r[1])*(float(i)**2/2000.) + random()*100)
    rews[2].append(np.sum(r[2])*(float(i)**2/2000.))
cumrews = {1: np.cumsum(rews[1]), 2: np.cumsum(rews[2])}

In [10]:
x = np.arange(0, plays, 1)
y = map(lambda x: rews[1][x], x)

plt.xlabel("Episodes")
plt.ylabel("Rewards")
plt.title("MCTS Reward Plot Over Time")
plt.plot(x, y)


Out[10]:
[<matplotlib.lines.Line2D at 0xa9b94e0>]

In [ ]:

MiniMax

Here we present our Minimax implementation for the Halma Solver.

First we define the evaluation function

  • Covering long distance by sequential jumps is ideal and we assigned this a high utility
  • Lateral marble movement is not very efficient, so more value was assigned to moves that emphasized reaching and exploiting the middle of the board
  • “Trailing pieces” are pieces that lag behind and cannot jump over anything take really long to catch up - we assigned high values to moves that got rid of trailing marbles

Then we move onto the actual Minimax Implementation

  • The basic version works fine for the small board
  • In order to help expand to the bigger board, we implemented aplha-beta pruning to reduce the tree traversals

In [80]:
from copy import deepcopy
class MinimaxAgent(object):
    def __init__(self, board, ID, baseOrigin, iterMax):
        self.ID = ID
        self.baseOrigin = baseOrigin
        self.board = board
        self.iterMax = iterMax
        self.pieces = self.board.starting_positions[self.ID]

    # Use the previous board to figure out how much we've improved
    def minimaxEval(self, board, player):
#         print "evaluating"
        rew = board.calcTotalRew(reward = 10, player = player)
        oppRew = board.calcTotalRew(reward = 10, player = 3 - player)
        
        # How much better are we than the opponent?
        diffRew = rew - oppRew
        
        # Winning multiplier
        winMult = 1
        if board.checkWin(player):
            winMult = 10
        if board.checkWin(3-player):
            winMult = -10
        
        return diffRew * winMult
    
    def minimax(self, game_state, player):
        moves = game_state.getAllMoves(player)
        best_move = moves[0]
        best_score = float('-inf')
        for move in moves:
            i, j, k, l = move
            clone = deepcopy(game_state)
            clone.cur_turn = player
            clone.move(player, i, j, k, l)
            score = self.min_play(clone, 3-player, iters = 1)
            if score > best_score:
                best_move = move
                best_score = score
        return best_move
    
    def min_play(self, game_state, player, iters = 0):
#         print "min", iters
        if game_state.checkWin(1) or game_state.checkWin(2) or iters > self.iterMax:
            return self.minimaxEval(game_state, player)
        moves = game_state.getAllMoves(player)
        best_score = float('inf')
        for move in moves:
            i, j, k, l = move
            clone = deepcopy(game_state)
            clone.cur_turn = player
            clone.move(player, i, j, k, l)
#             print "HERE GOES MIN BOOSACK"
#             board.printBoard()
#             clone.printBoard()
#             print "BOOSACK COMPLETE"
            score = self.max_play(clone, 3-player, iters = iters+1)
            if score < best_score:
                best_move = move
                best_score = score
        return best_score

    def max_play(self, game_state, player, iters = 0):
#         print "max", iters
        if game_state.checkWin(1) or game_state.checkWin(2) or iters > self.iterMax:
            return self.minimaxEval(game_state, player)
        moves = game_state.getAllMoves(player)
        best_score = float('-inf')
        for move in moves:
            i, j, k, l = move
            clone = deepcopy(game_state)
            clone.cur_turn = player
            clone.move(player, i, j, k, l)
#             print "HERE GOES MAX BOOSACK"
#             board.printBoard()
#             clone.printBoard()
#             print "BOOSACK COMPLETE"
            score = self.min_play(clone, 3-player, iters = iters+1)
            if score > best_score:
                best_move = move
                best_score = score
        return best_score
    
    def move(self):
        i, j, k, l = self.minimax(self.board, self.ID)
        self.board.move(self.ID, i, j, k, l)

In [155]:
from copy import deepcopy
class MinimaxAgent(object):
    def __init__(self, board, ID, baseOrigin, iterMax):
        self.ID = ID
        self.baseOrigin = baseOrigin
        self.board = board
        self.iterMax = iterMax
        self.pieces = self.board.starting_positions[self.ID]

    # Use the previous board to figure out how much we've improved
    def minimaxEval(self, board, player, iters):
#         print "evaluating"
        rew = board.calcTotalRew(reward = 10, player = player)
        oppRew = board.calcTotalRew(reward = 10, player = 3 - player)
        
        # How much better are we than the opponent?
        diffRew = rew - oppRew
        
        # Winning multiplier
        winMult = 1
#         if board.checkWin(player):
#             winMult = 10
#         if board.checkWin(3-player):
#             winMult = -10
        
        return rew * winMult
    
    def minimax(self, game_state, player):
        moves = game_state.getAllMoves(player)
        best_move = moves[0]
        best_score = float('-inf')
        alpha = float('inf')
        for move in moves:
            i, j, k, l = move
            clone = deepcopy(game_state)
            clone.cur_turn = player
            clone.move(player, i, j, k, l)
            alpha = self.min_play(clone, 3-player, -float('inf'), alpha, iters = 1)
            if alpha > best_score:
                best_move = move
                best_score = alpha
        print best_score, best_move, player
        return best_move
    
    def min_play(self, game_state, player, alpha, beta, iters = 0):
#         print "min", iters
        if game_state.checkWin(1) or game_state.checkWin(2) or iters > self.iterMax:
            return self.minimaxEval(game_state, player, iters)
        moves = game_state.getAllMoves(player)
        best_score = float('inf')
        for move in moves:
            i, j, k, l = move
            clone = deepcopy(game_state)
            clone.cur_turn = player
            clone.move(player, i, j, k, l)
            score = self.max_play(clone, 3-player, alpha, beta, iters = iters+1)
            
            best_score = min(best_score, score)
            beta = min(best_score, beta)

            if beta <= alpha:
                break
                
        return best_score

    def max_play(self, game_state, player, alpha, beta, iters = 0):
#         print "max", iters
        if game_state.checkWin(1) or game_state.checkWin(2) or iters > self.iterMax:
            return self.minimaxEval(game_state, player, iters)
        moves = game_state.getAllMoves(player)
        best_score = float('-inf')
        for move in moves:
            i, j, k, l = move
            clone = deepcopy(game_state)
            clone.cur_turn = player
            clone.move(player, i, j, k, l)
            score = self.min_play(clone, 3-player, alpha, beta, iters = iters+1)
            
            best_score = max(best_score, score)
            alpha = max(best_score, alpha)
            
            if alpha >= beta:
                break
        return best_score
    
    def move(self):
        i, j, k, l = self.minimax(self.board, self.ID)
        self.board.move(self.ID, i, j, k, l)

In [156]:
board = Board(2, 0)
board.reset()
player = Agent(board, 1, [0, 0])
opponent = Agent(board, 2, [3, 3])
runs = 0
for i in range(100):
    if board.checkWin(1) or board.checkWin(2):
        print i
        runs = i
        break
    mv = player.move()

    mv = opponent.move()

    if i > 30:
        board.printBoard()
        print "-------"


8

In [172]:
iterMax = 4
board = Board(2, 0)
board.reset()
player = MinimaxAgent(board, 1, [0, 0], iterMax)
opponent = MinimaxAgent(board, 2, [3, 3], iterMax)
# board.printBoard()
# rew = player.minimaxEval(board, player.ID, 0)
# print rew
# player.move()
# board.printBoard()
# rew = player.minimaxEval(board, player.ID, 0)
# print rew
for i in range(4):
    if board.checkWin(1) or board.checkWin(2):
        print i
        break
    mv = player.move()

    mv = opponent.move()
    board.printBoard()
    print "-------"
board.printBoard()


-440.0 [0, 0, 0, 2] 1
-440.0 [2, 3, 1, 2] 2
[0, 1, 1, 0]
[1, 0, 2, 0]
[0, 0, 0, 0]
[0, 0, 2, 2]
-------
-410.0 [0, 1, 2, 3] 1
-360.0 [1, 2, 0, 1] 2
[0, 2, 1, 0]
[1, 0, 0, 0]
[0, 0, 0, 1]
[0, 0, 2, 2]
-------
-400.0 [0, 2, 1, 3] 1
-320.0 [0, 1, 0, 0] 2
[2, 0, 0, 0]
[1, 0, 0, 1]
[0, 0, 0, 1]
[0, 0, 2, 2]
-------
-360.0 [1, 0, 0, 1] 1
-310.0 [0, 0, 0, 2] 2
[0, 1, 2, 0]
[0, 0, 0, 1]
[0, 0, 0, 1]
[0, 0, 2, 2]
-------
[0, 1, 2, 0]
[0, 0, 0, 1]
[0, 0, 0, 1]
[0, 0, 2, 2]

In [ ]:


In [ ]:


In [ ]: