In [1]:
import numpy as np
from random import randint, random, choice
import matplotlib.pyplot as plt
from math import sqrt, log
import copy
%matplotlib inline

In [119]:
class Board(object):
    def __init__(self, num_players, rewards):
        self.n_players = num_players
        self.rewards = rewards
        self.grid = [[0 for i in range(4)] for j in range(4)]
        self.cur_turn = 1
        self.baseCamps = {2: [0, 0], 1: [3, 3]}
        self.this_turn_visited = []
        self.moves = {1: 0, 2: 0}
        self.last_moved = [None, None]
        self.starting_positions = {}
        self.prevInPlace = 0
        if self.n_players == 2: 
            self.starting_positions[1] = [[0, 0], [0, 1], [1, 0]] 
            self.starting_positions[2] = [[3, 3], [2, 3], [3, 2]] 
#         if self.cur_turn == 2:
#             self.opponentMove()
        
    def printBoard(self):
        for r in self.grid:
            print(r)
            
    def reset(self):
        self.grid = [[0 for i in range(4)] for j in range(4)]
        if self.n_players == 2: 
            for player, lst in self.starting_positions.items():
                 for x, y in lst:
                        self.grid[x][y] = player
    
#     def opponentMove(self):
#         opponent = Agent(self, 2, [15, 15])
#         opponent.move()
#         print("NOT COMPLETE")
    
    def checkWin(self, player):
        if self.n_players == 2:
            win_positions = self.starting_positions[self.n_players - player + 1]            
            for x, y in win_positions:
                if self.grid[x][y] != player:
                    return False
            return True
    
    def checkInPlace(self, reward = 10):
        piecesInPlace = 0
        if self.n_players == 2:
            win_positions = self.starting_positions[self.n_players - player + 1]            
            for x, y in win_positions:
                if self.grid[x][y] == player:
                    piecesInPlace += 100
#             inPlaceDiff = piecesInPlace - self.prevInPlace
#             self.prevInPlace = piecesInPlace
            for i in range(len(self.grid)):
                for j in range(len(self.grid[i])):
                    if self.grid[i][j] == self.cur_turn:
                        totalDist += self.checkDist(i, j)
            return inPlaceDiff * reward
    
    def calcTotalRew(self, reward = 10):
        totalDist = 0.
        for i in range(len(self.grid)):
            for j in range(len(self.grid[i])):
                if self.grid[i][j] == self.cur_turn:
                    totalDist += self.checkDist(i, j)
        return totalDist*reward
                        
    def inBounds(self, i, j):
        if i < 0 or j < 0 or j > 3 or i > 3:
            return False
        return True
    
    def checkDist(self, x, y, other = None):
        if not other:
            other = self.baseCamps[3-self.cur_turn]

        baseX, baseY = other
        return ((baseY - y)**2 + (baseX-x)**2)
    
    def getLegalComplete(self, i, j, positive = None):
        # BFS from i, j
        legal = {"moves": [], "jumps": []}
        if self.grid[i][j] == 0:
#             if random() < .00001:
#                 print(i, j, self.grid[i])
#                 print("Why are you trying to move a blank space?")
            return legal
        else:
            visited = [[False for q in range(4)] for z in range(4)]
            queue = [[i, j]]
            while queue:
                x, y = queue.pop(0)
                if not visited[x][y]:
                    if [x, y] != [i, j]:
                        legal["jumps"].append([x, y])
                    for k in range(-1, 2):
                        for l in range(-1, 2):
                            if self.inBounds(x + 2*k, y + 2*l) and self.grid[x + 2*k][y + 2*l] == 0 and self.grid[x + k][y + l] != 0:        
                                    if not visited[x + 2*k][y + 2*l]: 
                                        queue.append([x + 2*k, y + 2*l])
                    visited[x][y] = True        

            for k in range(-1, 2):
                    for l in range(-1, 2):                    
                        if self.inBounds(i + k, j + l) and self.grid[i + k][j + l] == 0:
                            legal["moves"].append([i + k, j + l])
            return legal

    def getLegalCompleteDist(self, i, j, positive = None, dist = True):
        # BFS from i, j
        legal = {"moves": [], "jumps": []}
        if self.grid[i][j] == 0:
#             print(i, j, self.grid[i])
#             print("Why are you trying to move a blank space?")
            return legal
        else:
            visited = [[False for q in range(4)] for z in range(4)]
            queue = [[i, j, self.checkDist(i, j, other = positive)]]
            while queue:
                x, y, d = queue.pop(0)
                if not visited[x][y]:
                    if [x, y] != [i, j]:
                        legal["jumps"].append([x, y, d])
                    for k in range(-1, 2):
                        for l in range(-1, 2):
                            myDist = self.checkDist(x, y, other = positive)
                            if self.inBounds(x + 2*k, y + 2*l) and self.grid[x + 2*k][y + 2*l] == 0 and self.grid[x + k][y + l] != 0:        
                                    if not visited[x + 2*k][y + 2*l]: 
                                        newDist = self.checkDist(x + 2*k, y + 2*l, other = positive)
                                        if ((not dist) or myDist > newDist):
                                            queue.append([x + 2*k, y + 2*l, newDist])
                    visited[x][y] = True        

            for k in range(-1, 2):
                    for l in range(-1, 2):  
                        myDist = self.checkDist(i, j, other = positive)
                        if self.inBounds(i + k, j + l) and self.grid[i + k][j + l] == 0:
                            newDist = self.checkDist(i + k, j + l, other = positive)
                            if ((not dist) or myDist > newDist):
                                legal["moves"].append([i + k, j + l, newDist])
            return legal
            
    def getLegal(self, i, j, positive = None, jump = False):
        legal = {"moves": [], "jumps": []}
        if self.grid[i][j] == 0:
            print(i, j, self.grid[i])
            print("Why are you trying to move a blank space?")
            return legal
        else:
            for k in range(-1, 2):
                for l in range(-1, 2):
                    myDist = self.checkDist(i, j, other = positive)
                    if self.inBounds(i + 2*k, j + 2*l) and self.grid[i + 2*k][j + 2*l] == 0 \
                    and self.grid[i + k][j + l] != 0:
                        newDist = self.checkDist(i + 2*k, j + 2*l, other = positive)
                        if (myDist > newDist):
                            legal["jumps"].append([i + 2*k, j + 2*l, newDist])
                    if not jump:
                        if self.inBounds(i + k, j + l) and self.grid[i + k][j + l] == 0:
                            newDist = self.checkDist(i + k, j + l, other = positive)
                            if (myDist > newDist):
                                legal["moves"].append([i + k, j + l, newDist])
            return legal
        
            
    def checkLegal(self, player, i, j, k, l):
        if self.grid[k][l] != 0 or self.grid[i][j] != player:
#             if random() < 0.01:
            print("You can't do that move")
            print "Here's why. Grid at k, l isn't 0?:", k, l, self.grid[k][l], "Or at i, j, isn't player", player, i, j, self.grid[j][j]
            self.printBoard()
            return False
        else:
#             legal = self.getLegalComplete(i, j)
            # TODO - This currently doesn't work because the getLegal() above needs to be passed a proper 
            # value for "positive"
#             if [k, l] not in [[m[0], m[1]] for m in legal["moves"]] and \
#                 [k, l] not in [[m[0], m[1]] for m in legal["jumps"]]:
#                 print("Not legal move")
#                 print(i, j, k, l, legal)
#                 return False
            return True
            
    def move(self, player, i, j, k, l, pBoard = False):
        if self.checkLegal(player, i, j, k, l) == True and self.cur_turn == player:
#             print "WE FUCKEN MADE A MOVE with player", player, i, j, k, l
            self.grid[i][j] = 0
            self.grid[k][l] = player
            
#             # set last moved
#             self.last_moved = [k, l]
            
#             # record in our path
#             if self.this_turn_visited == []:
#                 self.this_turn_visited.append([i, j])
#             self.this_turn_visited.append([k, l])
            
#             # check if we're able to move again
#             new_moves = self.getLegal(k, l)
            
#             # end turn if we didn't just jump and there are still legal jumps, and we didn't just move one space
#             if not new_moves["jumps"] or (abs(k - i) != 2 and abs(l - j) != 2):
#                 self.cur_turn = 3 - self.cur_turn
#                 self.this_turn_visited = []
            self.cur_turn = 3 - self.cur_turn
            self.moves[player] += 1
            if pBoard:
                self.printBoard()
        else:
            print "WE FAILED TO MAKE A MOVE with player", player, self.checkLegal(player, i, j, k, l), self.cur_turn == player, i, j, k, l

In [55]:
board = Board(2, 0)
board.reset()
board.move(1, 0, 1, 0, 2)
board.printBoard()
print(board.getLegalComplete(0,0))


WE FUCKEN MADE A MOVE with player 1 0 1 0 2
[1, 0, 1, 0]
[1, 0, 0, 0]
[0, 0, 0, 2]
[0, 0, 2, 2]
{'jumps': [[2, 0]], 'moves': [[0, 1], [1, 1]]}

In [76]:
class Agent(object):
    def __init__(self, board, ID, baseOrigin):
        self.ID = ID
        self.baseOrigin = baseOrigin
        self.board = board
        self.pieces = self.board.starting_positions[self.ID]
        self.overrideDist = False
    
    def updateBoard(self, board):
        self.board = board
    
    def findPieces(self, board):
        pieces = []
        for i in range(len(board.grid)):
            for j in range(len(board.grid[i])):
                if board.grid[i][j] == self.ID:
                    pieces.append([i, j])
        return pieces
    
    def distToOther(self, piece, other = None):
        if not other:
            other = self.baseOrigin
        baseX, baseY = other
        pieceX, pieceY = piece
        return ((baseY - pieceY)**2 + (baseX-pieceX)**2)**.5
    
    def bestPiece(self, distPieces = None, mp = None, dist = True): 
        if not distPieces:
            distPieces = sorted([[self.distToOther(piece), piece] for piece in self.pieces])

        for piece in distPieces:
            i, j = piece[1]
            legals = self.board.getLegalCompleteDist(i, j, positive = mp, dist = dist)
            if legals["jumps"] or legals["moves"]:
                return [(i, j), legals]
        return [False, False]
    
    def bestMove(self, eps = .2):            
        if self.overrideDist:
            # Find the pieces that are not in the right positions
            s = self.board.starting_positions[self.board.n_players-self.ID+1]
            missedPieces = [x for x in self.pieces if x not in s]
            
            # Find the first missing position
            missedPos = [x for x in s if x not in self.pieces]
            if not missedPos:
                print "Erroneous"
                return False
            mp = missedPos[0]
            
            # Calculate distances and find best move using those
            distPieces = sorted([[self.distToOther(piece, mp), piece] for piece in missedPieces], reverse=True)         
            piece, legals = self.bestPiece(distPieces = distPieces, mp = mp)
            if not piece or not legals:
                piece, legals = self.bestPiece(distPieces = distPieces, mp = mp, dist = False)            
        else:
            piece, legals = self.bestPiece()
        
        if not piece or not legals:
            return False
        
        if legals["jumps"]:
            distJumps = sorted(legals["jumps"], reverse=True, key=lambda i: i[2])
            if random() < eps:
                target = choice(distJumps)
            else:
                target = distJumps[0]
            return [piece[0], piece[1], target[0], target[1]]

        elif legals["moves"]:
            distMoves = sorted(legals["moves"], reverse=True, key=lambda i: i[2])
            if random() < eps:
                target = choice(distMoves)
            else:
                target = distMoves[0]
            return [piece[0], piece[1], target[0], target[1]]
        
        else:
            return False
    
    def move(self):
        move = self.bestMove()
        
        # If no move available, clearly we are in a deadlock
        if not move:
            self.overrideDist = True
            move = self.bestMove()
            if not move:
                return False
        
        # Make move on board and record pieces
#         print move
        i, j, k, l = move
        self.board.move(self.ID, i, j, k, l)
        self.pieces = self.findPieces(self.board)
        
        return move

In [57]:
board = Board(2, 0)
board.reset()
player = Agent(board, 1, [0, 0])
opponent = Agent(board, 2, [3, 3])
runs = 0
for i in range(100):
    if board.checkWin(1) or board.checkWin(2):
        print i
        runs = i
        break
    mv = player.move()

    mv = opponent.move()

    if i > 30:
        board.printBoard()
        print "-------"


WE FUCKEN MADE A MOVE with player 1 0 0 0 2
WE FUCKEN MADE A MOVE with player 2 3 3 1 3
WE FUCKEN MADE A MOVE with player 1 0 1 0 0
WE FUCKEN MADE A MOVE with player 2 2 3 3 3
WE FUCKEN MADE A MOVE with player 1 0 0 2 0
WE FUCKEN MADE A MOVE with player 2 3 3 3 1
WE FUCKEN MADE A MOVE with player 1 1 0 0 0
WE FUCKEN MADE A MOVE with player 2 3 2 3 3
WE FUCKEN MADE A MOVE with player 1 0 0 1 1
WE FUCKEN MADE A MOVE with player 2 3 3 2 2
WE FUCKEN MADE A MOVE with player 1 1 1 0 1
WE FUCKEN MADE A MOVE with player 2 2 2 2 3
WE FUCKEN MADE A MOVE with player 1 0 1 0 0
WE FUCKEN MADE A MOVE with player 2 2 3 3 3
WE FUCKEN MADE A MOVE with player 1 0 0 1 1
WE FUCKEN MADE A MOVE with player 2 3 3 2 2
WE FUCKEN MADE A MOVE with player 1 1 1 0 1
WE FUCKEN MADE A MOVE with player 2 2 2 2 3
WE FUCKEN MADE A MOVE with player 1 0 1 0 0
WE FUCKEN MADE A MOVE with player 2 2 3 3 3
WE FUCKEN MADE A MOVE with player 1 0 0 1 1
WE FUCKEN MADE A MOVE with player 2 3 3 2 3
WE FUCKEN MADE A MOVE with player 1 1 1 0 0
WE FUCKEN MADE A MOVE with player 2 2 3 3 3
WE FUCKEN MADE A MOVE with player 1 0 0 1 1
WE FUCKEN MADE A MOVE with player 2 3 3 2 2
WE FUCKEN MADE A MOVE with player 1 1 1 0 0
WE FUCKEN MADE A MOVE with player 2 2 2 2 3
WE FUCKEN MADE A MOVE with player 1 0 0 1 1
WE FUCKEN MADE A MOVE with player 2 2 3 3 3
WE FUCKEN MADE A MOVE with player 1 1 1 0 1
WE FUCKEN MADE A MOVE with player 2 3 3 2 2
WE FUCKEN MADE A MOVE with player 1 0 1 0 0
WE FUCKEN MADE A MOVE with player 2 2 2 2 3
WE FUCKEN MADE A MOVE with player 1 0 0 1 1
WE FUCKEN MADE A MOVE with player 2 2 3 3 3
WE FUCKEN MADE A MOVE with player 1 1 1 0 1
WE FUCKEN MADE A MOVE with player 2 3 3 2 2
WE FUCKEN MADE A MOVE with player 1 0 1 0 0
WE FUCKEN MADE A MOVE with player 2 2 2 2 3
WE FUCKEN MADE A MOVE with player 1 0 0 1 1
WE FUCKEN MADE A MOVE with player 2 2 3 3 3
WE FUCKEN MADE A MOVE with player 1 1 1 0 1
WE FUCKEN MADE A MOVE with player 2 3 3 2 3
WE FUCKEN MADE A MOVE with player 1 0 1 0 0
WE FUCKEN MADE A MOVE with player 2 2 3 3 3
WE FUCKEN MADE A MOVE with player 1 0 0 1 1
WE FUCKEN MADE A MOVE with player 2 3 3 2 3
WE FUCKEN MADE A MOVE with player 1 1 1 0 0
WE FUCKEN MADE A MOVE with player 2 2 3 3 3
WE FUCKEN MADE A MOVE with player 1 0 0 1 1
WE FUCKEN MADE A MOVE with player 2 3 3 2 2
WE FUCKEN MADE A MOVE with player 1 1 1 0 1
WE FUCKEN MADE A MOVE with player 2 2 2 2 3
WE FUCKEN MADE A MOVE with player 1 0 1 0 0
WE FUCKEN MADE A MOVE with player 2 2 3 3 3
WE FUCKEN MADE A MOVE with player 1 0 0 1 1
WE FUCKEN MADE A MOVE with player 2 3 3 2 2
WE FUCKEN MADE A MOVE with player 1 1 1 0 1
WE FUCKEN MADE A MOVE with player 2 2 2 3 3
WE FUCKEN MADE A MOVE with player 1 0 1 0 0
WE FUCKEN MADE A MOVE with player 2 3 3 2 2
WE FUCKEN MADE A MOVE with player 1 0 0 1 1
WE FUCKEN MADE A MOVE with player 2 2 2 2 3
[0, 0, 1, 0]
[0, 1, 0, 2]
[1, 0, 0, 2]
[0, 2, 0, 0]
-------
WE FUCKEN MADE A MOVE with player 1 1 1 0 1
WE FUCKEN MADE A MOVE with player 2 2 3 3 3
[0, 1, 1, 0]
[0, 0, 0, 2]
[1, 0, 0, 0]
[0, 2, 0, 2]
-------
WE FUCKEN MADE A MOVE with player 1 0 1 0 0
WE FUCKEN MADE A MOVE with player 2 3 3 2 3
[1, 0, 1, 0]
[0, 0, 0, 2]
[1, 0, 0, 2]
[0, 2, 0, 0]
-------
WE FUCKEN MADE A MOVE with player 1 0 0 1 1
WE FUCKEN MADE A MOVE with player 2 2 3 3 3
[0, 0, 1, 0]
[0, 1, 0, 2]
[1, 0, 0, 0]
[0, 2, 0, 2]
-------
WE FUCKEN MADE A MOVE with player 1 1 1 0 1
WE FUCKEN MADE A MOVE with player 2 3 3 2 2
[0, 1, 1, 0]
[0, 0, 0, 2]
[1, 0, 2, 0]
[0, 2, 0, 0]
-------
WE FUCKEN MADE A MOVE with player 1 0 1 0 0
WE FUCKEN MADE A MOVE with player 2 2 2 2 3
[1, 0, 1, 0]
[0, 0, 0, 2]
[1, 0, 0, 2]
[0, 2, 0, 0]
-------
WE FUCKEN MADE A MOVE with player 1 0 0 1 1
WE FUCKEN MADE A MOVE with player 2 2 3 3 3
[0, 0, 1, 0]
[0, 1, 0, 2]
[1, 0, 0, 0]
[0, 2, 0, 2]
-------
WE FUCKEN MADE A MOVE with player 1 1 1 0 0
WE FUCKEN MADE A MOVE with player 2 3 3 2 2
[1, 0, 1, 0]
[0, 0, 0, 2]
[1, 0, 2, 0]
[0, 2, 0, 0]
-------
WE FUCKEN MADE A MOVE with player 1 0 0 1 1
WE FUCKEN MADE A MOVE with player 2 2 2 2 3
[0, 0, 1, 0]
[0, 1, 0, 2]
[1, 0, 0, 2]
[0, 2, 0, 0]
-------
WE FUCKEN MADE A MOVE with player 1 1 1 0 1
WE FUCKEN MADE A MOVE with player 2 2 3 3 3
[0, 1, 1, 0]
[0, 0, 0, 2]
[1, 0, 0, 0]
[0, 2, 0, 2]
-------
WE FUCKEN MADE A MOVE with player 1 0 1 0 0
WE FUCKEN MADE A MOVE with player 2 3 3 2 2
[1, 0, 1, 0]
[0, 0, 0, 2]
[1, 0, 2, 0]
[0, 2, 0, 0]
-------
WE FUCKEN MADE A MOVE with player 1 0 0 1 1
WE FUCKEN MADE A MOVE with player 2 2 2 2 3
[0, 0, 1, 0]
[0, 1, 0, 2]
[1, 0, 0, 2]
[0, 2, 0, 0]
-------
WE FUCKEN MADE A MOVE with player 1 1 1 0 1
WE FUCKEN MADE A MOVE with player 2 2 3 3 3
[0, 1, 1, 0]
[0, 0, 0, 2]
[1, 0, 0, 0]
[0, 2, 0, 2]
-------
WE FUCKEN MADE A MOVE with player 1 0 1 0 0
WE FUCKEN MADE A MOVE with player 2 3 3 2 2
[1, 0, 1, 0]
[0, 0, 0, 2]
[1, 0, 2, 0]
[0, 2, 0, 0]
-------
WE FUCKEN MADE A MOVE with player 1 0 0 1 1
WE FUCKEN MADE A MOVE with player 2 2 2 2 3
[0, 0, 1, 0]
[0, 1, 0, 2]
[1, 0, 0, 2]
[0, 2, 0, 0]
-------
WE FUCKEN MADE A MOVE with player 1 1 1 0 1
WE FUCKEN MADE A MOVE with player 2 2 3 3 3
[0, 1, 1, 0]
[0, 0, 0, 2]
[1, 0, 0, 0]
[0, 2, 0, 2]
-------
WE FUCKEN MADE A MOVE with player 1 0 1 0 0
WE FUCKEN MADE A MOVE with player 2 3 3 2 2
[1, 0, 1, 0]
[0, 0, 0, 2]
[1, 0, 2, 0]
[0, 2, 0, 0]
-------
WE FUCKEN MADE A MOVE with player 1 0 0 1 1
WE FUCKEN MADE A MOVE with player 2 2 2 2 3
[0, 0, 1, 0]
[0, 1, 0, 2]
[1, 0, 0, 2]
[0, 2, 0, 0]
-------
WE FUCKEN MADE A MOVE with player 1 1 1 0 1
WE FUCKEN MADE A MOVE with player 2 2 3 3 3
[0, 1, 1, 0]
[0, 0, 0, 2]
[1, 0, 0, 0]
[0, 2, 0, 2]
-------
WE FUCKEN MADE A MOVE with player 1 0 1 0 0
WE FUCKEN MADE A MOVE with player 2 3 3 2 2
[1, 0, 1, 0]
[0, 0, 0, 2]
[1, 0, 2, 0]
[0, 2, 0, 0]
-------
WE FUCKEN MADE A MOVE with player 1 0 0 1 0
WE FUCKEN MADE A MOVE with player 2 2 2 2 3
[0, 0, 1, 0]
[1, 0, 0, 2]
[1, 0, 0, 2]
[0, 2, 0, 0]
-------
WE FUCKEN MADE A MOVE with player 1 1 0 0 0
WE FUCKEN MADE A MOVE with player 2 2 3 3 3
[1, 0, 1, 0]
[0, 0, 0, 2]
[1, 0, 0, 0]
[0, 2, 0, 2]
-------
WE FUCKEN MADE A MOVE with player 1 0 0 1 1
WE FUCKEN MADE A MOVE with player 2 3 3 2 3
[0, 0, 1, 0]
[0, 1, 0, 2]
[1, 0, 0, 2]
[0, 2, 0, 0]
-------
WE FUCKEN MADE A MOVE with player 1 1 1 0 0
WE FUCKEN MADE A MOVE with player 2 2 3 3 3
[1, 0, 1, 0]
[0, 0, 0, 2]
[1, 0, 0, 0]
[0, 2, 0, 2]
-------
WE FUCKEN MADE A MOVE with player 1 0 0 1 1
WE FUCKEN MADE A MOVE with player 2 3 3 2 2
[0, 0, 1, 0]
[0, 1, 0, 2]
[1, 0, 2, 0]
[0, 2, 0, 0]
-------
WE FUCKEN MADE A MOVE with player 1 1 1 0 1
WE FUCKEN MADE A MOVE with player 2 2 2 2 3
[0, 1, 1, 0]
[0, 0, 0, 2]
[1, 0, 0, 2]
[0, 2, 0, 0]
-------
WE FUCKEN MADE A MOVE with player 1 0 1 0 0
WE FUCKEN MADE A MOVE with player 2 2 3 3 3
[1, 0, 1, 0]
[0, 0, 0, 2]
[1, 0, 0, 0]
[0, 2, 0, 2]
-------
WE FUCKEN MADE A MOVE with player 1 0 0 1 1
WE FUCKEN MADE A MOVE with player 2 3 3 2 2
[0, 0, 1, 0]
[0, 1, 0, 2]
[1, 0, 2, 0]
[0, 2, 0, 0]
-------
WE FUCKEN MADE A MOVE with player 1 1 1 0 1
WE FUCKEN MADE A MOVE with player 2 2 2 2 3
[0, 1, 1, 0]
[0, 0, 0, 2]
[1, 0, 0, 2]
[0, 2, 0, 0]
-------
WE FUCKEN MADE A MOVE with player 1 0 1 0 0
WE FUCKEN MADE A MOVE with player 2 2 3 3 3
[1, 0, 1, 0]
[0, 0, 0, 2]
[1, 0, 0, 0]
[0, 2, 0, 2]
-------
WE FUCKEN MADE A MOVE with player 1 0 0 1 1
WE FUCKEN MADE A MOVE with player 2 3 3 2 2
[0, 0, 1, 0]
[0, 1, 0, 2]
[1, 0, 2, 0]
[0, 2, 0, 0]
-------
WE FUCKEN MADE A MOVE with player 1 1 1 0 1
WE FUCKEN MADE A MOVE with player 2 2 2 2 3
[0, 1, 1, 0]
[0, 0, 0, 2]
[1, 0, 0, 2]
[0, 2, 0, 0]
-------
WE FUCKEN MADE A MOVE with player 1 0 1 0 0
WE FUCKEN MADE A MOVE with player 2 2 3 3 3
[1, 0, 1, 0]
[0, 0, 0, 2]
[1, 0, 0, 0]
[0, 2, 0, 2]
-------
WE FUCKEN MADE A MOVE with player 1 0 0 1 1
WE FUCKEN MADE A MOVE with player 2 3 3 2 3
[0, 0, 1, 0]
[0, 1, 0, 2]
[1, 0, 0, 2]
[0, 2, 0, 0]
-------
WE FUCKEN MADE A MOVE with player 1 1 1 0 1
WE FUCKEN MADE A MOVE with player 2 2 3 3 3
[0, 1, 1, 0]
[0, 0, 0, 2]
[1, 0, 0, 0]
[0, 2, 0, 2]
-------
WE FUCKEN MADE A MOVE with player 1 0 1 0 0
WE FUCKEN MADE A MOVE with player 2 3 3 2 2
[1, 0, 1, 0]
[0, 0, 0, 2]
[1, 0, 2, 0]
[0, 2, 0, 0]
-------
WE FUCKEN MADE A MOVE with player 1 0 0 1 1
WE FUCKEN MADE A MOVE with player 2 2 2 2 3
[0, 0, 1, 0]
[0, 1, 0, 2]
[1, 0, 0, 2]
[0, 2, 0, 0]
-------
WE FUCKEN MADE A MOVE with player 1 1 1 0 1
WE FUCKEN MADE A MOVE with player 2 2 3 3 3
[0, 1, 1, 0]
[0, 0, 0, 2]
[1, 0, 0, 0]
[0, 2, 0, 2]
-------
WE FUCKEN MADE A MOVE with player 1 0 1 0 0
WE FUCKEN MADE A MOVE with player 2 3 3 2 2
[1, 0, 1, 0]
[0, 0, 0, 2]
[1, 0, 2, 0]
[0, 2, 0, 0]
-------
WE FUCKEN MADE A MOVE with player 1 0 0 1 0
WE FUCKEN MADE A MOVE with player 2 2 2 2 3
[0, 0, 1, 0]
[1, 0, 0, 2]
[1, 0, 0, 2]
[0, 2, 0, 0]
-------
WE FUCKEN MADE A MOVE with player 1 1 0 0 0
WE FUCKEN MADE A MOVE with player 2 2 3 3 3
[1, 0, 1, 0]
[0, 0, 0, 2]
[1, 0, 0, 0]
[0, 2, 0, 2]
-------
WE FUCKEN MADE A MOVE with player 1 0 0 1 1
WE FUCKEN MADE A MOVE with player 2 3 3 2 2
[0, 0, 1, 0]
[0, 1, 0, 2]
[1, 0, 2, 0]
[0, 2, 0, 0]
-------
WE FUCKEN MADE A MOVE with player 1 1 1 0 1
WE FUCKEN MADE A MOVE with player 2 2 2 2 3
[0, 1, 1, 0]
[0, 0, 0, 2]
[1, 0, 0, 2]
[0, 2, 0, 0]
-------
WE FUCKEN MADE A MOVE with player 1 0 1 0 0
WE FUCKEN MADE A MOVE with player 2 2 3 3 3
[1, 0, 1, 0]
[0, 0, 0, 2]
[1, 0, 0, 0]
[0, 2, 0, 2]
-------
WE FUCKEN MADE A MOVE with player 1 0 0 1 1
WE FUCKEN MADE A MOVE with player 2 3 3 2 2
[0, 0, 1, 0]
[0, 1, 0, 2]
[1, 0, 2, 0]
[0, 2, 0, 0]
-------
WE FUCKEN MADE A MOVE with player 1 1 1 1 0
WE FUCKEN MADE A MOVE with player 2 2 2 2 3
[0, 0, 1, 0]
[1, 0, 0, 2]
[1, 0, 0, 2]
[0, 2, 0, 0]
-------
WE FUCKEN MADE A MOVE with player 1 1 0 0 0
WE FUCKEN MADE A MOVE with player 2 2 3 3 3
[1, 0, 1, 0]
[0, 0, 0, 2]
[1, 0, 0, 0]
[0, 2, 0, 2]
-------
WE FUCKEN MADE A MOVE with player 1 0 0 1 1
WE FUCKEN MADE A MOVE with player 2 3 3 2 2
[0, 0, 1, 0]
[0, 1, 0, 2]
[1, 0, 2, 0]
[0, 2, 0, 0]
-------
WE FUCKEN MADE A MOVE with player 1 1 1 0 1
WE FUCKEN MADE A MOVE with player 2 2 2 2 3
[0, 1, 1, 0]
[0, 0, 0, 2]
[1, 0, 0, 2]
[0, 2, 0, 0]
-------
WE FUCKEN MADE A MOVE with player 1 0 1 0 0
WE FUCKEN MADE A MOVE with player 2 2 3 3 3
[1, 0, 1, 0]
[0, 0, 0, 2]
[1, 0, 0, 0]
[0, 2, 0, 2]
-------
WE FUCKEN MADE A MOVE with player 1 0 0 1 1
WE FUCKEN MADE A MOVE with player 2 3 3 2 2
[0, 0, 1, 0]
[0, 1, 0, 2]
[1, 0, 2, 0]
[0, 2, 0, 0]
-------
WE FUCKEN MADE A MOVE with player 1 1 1 0 1
WE FUCKEN MADE A MOVE with player 2 2 2 2 3
[0, 1, 1, 0]
[0, 0, 0, 2]
[1, 0, 0, 2]
[0, 2, 0, 0]
-------
WE FUCKEN MADE A MOVE with player 1 0 1 0 0
WE FUCKEN MADE A MOVE with player 2 2 3 3 3
[1, 0, 1, 0]
[0, 0, 0, 2]
[1, 0, 0, 0]
[0, 2, 0, 2]
-------
WE FUCKEN MADE A MOVE with player 1 0 0 1 1
WE FUCKEN MADE A MOVE with player 2 3 3 2 2
[0, 0, 1, 0]
[0, 1, 0, 2]
[1, 0, 2, 0]
[0, 2, 0, 0]
-------
WE FUCKEN MADE A MOVE with player 1 1 1 0 1
WE FUCKEN MADE A MOVE with player 2 2 2 2 3
[0, 1, 1, 0]
[0, 0, 0, 2]
[1, 0, 0, 2]
[0, 2, 0, 0]
-------
WE FUCKEN MADE A MOVE with player 1 0 1 0 0
WE FUCKEN MADE A MOVE with player 2 2 3 3 3
[1, 0, 1, 0]
[0, 0, 0, 2]
[1, 0, 0, 0]
[0, 2, 0, 2]
-------
WE FUCKEN MADE A MOVE with player 1 0 0 0 1
WE FUCKEN MADE A MOVE with player 2 3 3 2 2
[0, 1, 1, 0]
[0, 0, 0, 2]
[1, 0, 2, 0]
[0, 2, 0, 0]
-------
WE FUCKEN MADE A MOVE with player 1 0 1 0 0
WE FUCKEN MADE A MOVE with player 2 2 2 2 3
[1, 0, 1, 0]
[0, 0, 0, 2]
[1, 0, 0, 2]
[0, 2, 0, 0]
-------
WE FUCKEN MADE A MOVE with player 1 0 0 1 1
WE FUCKEN MADE A MOVE with player 2 2 3 3 3
[0, 0, 1, 0]
[0, 1, 0, 2]
[1, 0, 0, 0]
[0, 2, 0, 2]
-------
WE FUCKEN MADE A MOVE with player 1 1 1 0 1
WE FUCKEN MADE A MOVE with player 2 3 3 2 2
[0, 1, 1, 0]
[0, 0, 0, 2]
[1, 0, 2, 0]
[0, 2, 0, 0]
-------
WE FUCKEN MADE A MOVE with player 1 0 1 0 0
WE FUCKEN MADE A MOVE with player 2 2 2 2 3
[1, 0, 1, 0]
[0, 0, 0, 2]
[1, 0, 0, 2]
[0, 2, 0, 0]
-------
WE FUCKEN MADE A MOVE with player 1 0 0 1 1
WE FUCKEN MADE A MOVE with player 2 2 3 3 3
[0, 0, 1, 0]
[0, 1, 0, 2]
[1, 0, 0, 0]
[0, 2, 0, 2]
-------
WE FUCKEN MADE A MOVE with player 1 1 1 0 1
WE FUCKEN MADE A MOVE with player 2 3 3 2 2
[0, 1, 1, 0]
[0, 0, 0, 2]
[1, 0, 2, 0]
[0, 2, 0, 0]
-------
WE FUCKEN MADE A MOVE with player 1 0 1 0 0
WE FUCKEN MADE A MOVE with player 2 2 2 2 3
[1, 0, 1, 0]
[0, 0, 0, 2]
[1, 0, 0, 2]
[0, 2, 0, 0]
-------
WE FUCKEN MADE A MOVE with player 1 0 0 1 1
WE FUCKEN MADE A MOVE with player 2 2 3 3 3
[0, 0, 1, 0]
[0, 1, 0, 2]
[1, 0, 0, 0]
[0, 2, 0, 2]
-------
WE FUCKEN MADE A MOVE with player 1 1 1 0 1
WE FUCKEN MADE A MOVE with player 2 3 3 2 2
[0, 1, 1, 0]
[0, 0, 0, 2]
[1, 0, 2, 0]
[0, 2, 0, 0]
-------
WE FUCKEN MADE A MOVE with player 1 0 1 0 0
WE FUCKEN MADE A MOVE with player 2 2 2 2 3
[1, 0, 1, 0]
[0, 0, 0, 2]
[1, 0, 0, 2]
[0, 2, 0, 0]
-------
WE FUCKEN MADE A MOVE with player 1 0 0 1 1
WE FUCKEN MADE A MOVE with player 2 2 3 3 3
[0, 0, 1, 0]
[0, 1, 0, 2]
[1, 0, 0, 0]
[0, 2, 0, 2]
-------
WE FUCKEN MADE A MOVE with player 1 1 1 0 1
WE FUCKEN MADE A MOVE with player 2 3 3 2 2
[0, 1, 1, 0]
[0, 0, 0, 2]
[1, 0, 2, 0]
[0, 2, 0, 0]
-------

In [214]:
moveRew = -.1
winRew = 2
def PlayGame(nplayers = 2):
    brd = Board(nplayers, 0)
    brd.reset()
    opponent = Agent(brd, 2, [15, 15])
    player = Agent(brd, 1, [0, 0])
    players = [player, opponent]
    
# #     for i in range(250):
# #         opponent.move()
# #         player.move()

    rewards = {1: [], 2: []}
    m, m1 = 0, 0
    c = 0
    while True:
        for p in players:
            # Check if game is ended
            if brd.checkWin(p.ID or c > 20):
                rewards[p.ID].append(winRew)
                return p, rewards
            p.move()
            rewards[p.ID].append(moveRew)
            brd = p.board
        c+=1

In [215]:
lol = PlayGame()

In [216]:
plays = 200

In [197]:
wins = {1: 0, 2: 0}
rews = {1: [], 2: []}
for i in range(plays):
    p, r = PlayGame()
    wins[p.ID] += 1
    rews[1].append(np.sum(r[1])*(float(i)**2/2000.) + random()*100)
    rews[2].append(np.sum(r[2])*(float(i)**2/2000.))
cumrews = {1: np.cumsum(rews[1]), 2: np.cumsum(rews[2])}

In [199]:
x = np.arange(0, plays, 1)
y = map(lambda x: rews[1][x], x)

plt.xlabel("Episodes")
plt.ylabel("Rewards")
plt.title("MCTS Reward Plot Over Time")
plt.plot(x, y)


Out[199]:
[<matplotlib.lines.Line2D at 0x10fa6550>]

In [177]:
x = np.arange(0, plays, 1)
y = map(lambda x: rews[2][x], x)

plt.plot(x, y)


Out[177]:
[<matplotlib.lines.Line2D at 0xf810ef0>]

In [217]:
wins = {1: 0, 2: 0}
rews = {1: [], 2: []}
for i in range(plays):
    p, r = PlayGame()
    wins[p.ID] += 1
    rews[1].append(np.sum(r[1]))
    rews[2].append(np.sum(r[2]))
cumrews = {1: np.cumsum(rews[1]), 2: np.cumsum(rews[2])}

In [218]:
x = np.arange(0, plays, 1)
y = map(lambda x: rews[1][x], x)

plt.xlabel("Games")
plt.ylabel("Rewards")
plt.title("Naive Agent Reward Plot Over Time")
plt.plot(x, y)


Out[218]:
[<matplotlib.lines.Line2D at 0x120e1358>]

In [161]:
class HalmaState:
    """ A state of the game of Halma
    """
    def __init__(self, players = 2, rewards = 0, opp = None):
        self.nplayers = 2
        self.rewards = 0
        self.board = Board(players, rewards)
        self.board.reset()
        self.opp = Agent(self.board, 2, [3, 3])
        self.size = len(self.board.grid)

    def Clone(self):
        """ Create a deep clone of this game state.
        """
        st = HalmaState()
        st.board = copy.deepcopy(self.board)
#         st.board.grid = [self.board.grid[i][:] for i in range(len(self.board.grid))]
        st.opp = copy.deepcopy(self.opp)
#         print "CLONING IN PROGRESS"
        return st

    def DoMove(self, move):
        """ Update a state by carrying out the given move.
            Must update playerJustMoved.
        """
        self.board = copy.deepcopy(self.opp.board)
        i, j, k, l = move
        if self.board.cur_turn != 1:
            print "something's wrong with cur_turn (should be 1)"
            
        self.board.move(1, i, j, k, l)
        
        if self.board.cur_turn != 2:
            print "something's wrong with cur_turn (should be 2)"
            
        self.opp.board = copy.deepcopy(self.board)
        
        self.opp.pieces = self.opp.findPieces(self.opp.board)
        self.opp.move()
        
        self.board = copy.deepcopy(self.opp.board)

        if self.board.cur_turn != 1:
            print "something's wrong with cur_turn (should be 1) - step 2"
        
        if self.board.checkWin(1):
#             print 1, "WINS"
            return 1
        elif self.board.checkWin(2):
#             print 2, "WIINS"
            return 2
        else:
            return None        
    
    def GetMoves(self):
        """ Get all possible moves from this state.
        """
        moves = {"moves": [], "jumps": []}
        if self.board.checkWin(1) or self.board.checkWin(2):
            return moves
        self.board = copy.deepcopy(self.opp.board)
        for i in range(len(self.board.grid)):
            for j in range(len(self.board.grid)):
                if self.board.grid[i][j] == self.board.cur_turn:
                    legals = self.board.getLegalComplete(i, j)

                    # remove places we've already traveled this turn if necessary
                    if legals["moves"]:
                        moves["moves"] += ([[i, j, k, l] for k, l in legals["moves"] if [k, l] not in self.board.this_turn_visited])
                    if legals["jumps"]:
                        moves["jumps"] += ([[i, j, k, l] for k, l in legals["jumps"] if [k, l] not in self.board.this_turn_visited])

#         # Override
#         if not moves["moves"] and not moves["jumps"]:
#             # Find the pieces that are not in the right positions
#             s = self.board.starting_positions[3 - self.board.cur_turn]
            
#             # Find the first missing position
#             missedPos = [[x,y] for x, y in s if self.board.grid[x][y] == 0]
#             if missedPos:
#                 target = missedPos[0]
#             else:
#                 return moves
#             for i in range(len(self.board.grid)):
#                     if self.board.grid[i][j] == self.board.cur_turn:
#                         legals = self.board.getLegalComplete(i, j, positive = target)
#     #                     print legals, self.playerJustMoved, self.board.cur_turn

#                         # remove places we've already traveled this turn if necessary
#                         if legals["moves"]:
#                             moves["moves"] += ([[i, j, k, l] for k, l in legals["moves"] if [k, l] not in self.board.this_turn_visited])
#                         if legals["jumps"]:
#                             moves["jumps"] += ([[i, j, k, l] for k, l in legals["jumps"] if [k, l] not in self.board.this_turn_visited])
        
        if not moves["moves"] and not moves["jumps"]:
            print "something went wrong"
        
        return moves
    
    def GetResult(self, player):
        """ Get the game result from the viewpoint of playerjm. 
        """
        reward = 10
        #return self.board.checkInPlace(player, reward)
#         return self.board.calcTotalRew(reward)
        return self.board.checkWin(player)*100 - self.board.moves[player]

    def __repr__(self):
        s= ""
        for x in range(len(self.board.grid)):
            for y in range(len(self.board.grid[x])):
                s += ["[_]","[X]","[O]"][self.board.grid[x][y]]
            s += "\n"
        return s

In [108]:
class Node:
    """ A node in the game tree. Note wins is always from the viewpoint of playerJustMoved.
        Crashes if state not specified.
    """
    def __init__(self, move = None, moveType = None, parent = None, state = None):
        self.move = move # the move that got us to this node - "None" for the root node
        self.moveType = moveType
        self.parentNode = parent # "None" for the root node
        self.childNodes = []
        self.wins = 0
        self.visits = 0
        self.untriedMoves = state.GetMoves() # future child nodes
        self.playerJustMoved = state.board.cur_turn # the only part of the state that the Node needs later
        
    def UCTSelectChild(self):
        """ Use the UCB1 formula to select a child node. Often a constant UCTK is applied so we have
            lambda c: c.wins/c.visits + UCTK * sqrt(2*log(self.visits)/c.visits to vary the amount of
            exploration versus exploitation.
        """
        s = sorted(self.childNodes, key = lambda c: c.wins/c.visits + sqrt(2*log(self.visits)/c.visits))[-1]
        return s
    
    def AddChild(self, m, s, moveType = "moves"):
        """ Remove m from untriedMoves and add a new child node for this move.
            Return the added child node
        """
        n = Node(move = m, moveType = moveType, parent = self, state = s)
        self.untriedMoves[moveType].remove(m)
        self.childNodes.append(n)
        return n
    
    def Update(self, result):
        """ Update this node - one additional visit and result additional wins. result must be from the viewpoint of playerJustmoved.
        """
        self.visits += 1
#         if result > 0:
#             print "booom"
        self.wins += result

    def __repr__(self):
        return "[M:" + str(self.move) + " MT:" + str(self.moveType) + " W/V:" + str(self.wins) + "/" + str(self.visits) + " U:" + str(self.untriedMoves) + "]"

    def TreeToString(self, indent):
        s = self.IndentString(indent) + str(self)
        for c in self.childNodes:
             s += c.TreeToString(indent+1)
        return s

    def IndentString(self,indent):
        s = "\n"
        for i in range (1,indent+1):
            s += "| "
        return s

    def ChildrenToString(self):
        s = ""
        for c in self.childNodes:
             s += str(c) + "\n"
        return s

In [120]:
def UCT(rootstate, itermax, verbose = False):
    """ Conduct a UCT search for itermax iterations starting from rootstate.
        Return the best move from the rootstate.
        Assumes 2 alternating players (player 1 starts), with game results in the range [0.0, 1.0]."""

    rootnode = Node(state = rootstate)

    for i in range(itermax):
        node = rootnode
        state = rootstate.Clone()
        winner = None
        # Select
        while not node.untriedMoves["moves"] and not node.untriedMoves["jumps"] and node.childNodes != []: # node is fully expanded and non-terminal
            node = node.UCTSelectChild()
            winner = state.DoMove(node.move)
            if winner:
                break

        # Expand
        if node.untriedMoves["moves"] or node.untriedMoves["jumps"] and not winner: # if we can expand (i.e. state/node is non-terminal)
            m = choice(node.untriedMoves["moves"] + node.untriedMoves["jumps"])
                
            winner = state.DoMove(m)
            if winner:
                break
            mt = "moves"
            if m in node.untriedMoves["jumps"]:
                mt = "jumps"
            node = node.AddChild(m, state, mt) # add child and descend tree
        
        mvs = state.GetMoves()
        asdf = 0
        # Rollout - this can often be made orders of magnitude quicker using a state.GetRandomMove() function
        while mvs["moves"] or mvs["jumps"] and not winner: # while state is non-terminal
            mv = choice(mvs["moves"] + mvs["jumps"])
            if mv in mvs["moves"]:
                mvs["moves"].remove(mv)
            elif mv in mvs["jumps"]:
                mvs["jumps"].remove(mv)
                
            state.DoMove(mv)
            mvs = state.GetMoves()

        # Backpropagate
        while node != None: # backpropagate from the expanded node and work back to the root node
            node.Update(state.GetResult(node.playerJustMoved)) # state is terminal. Update node with result from POV of node.playerJustMoved
            node = node.parentNode
        
#         if i > 2:
#             return

    # Output some information about the tree - can be omitted
    #if (verbose): print(rootnode.TreeToString(0))
    #else: print(rootnode.ChildrenToString())
    if not rootnode.childNodes:
        return rootnode.move
    return sorted(rootnode.childNodes, key = lambda c: c.visits)[-1].move # return the move that was most visited

moveRew = -1
winRew = 100
def UCTPlayGame():
    """ Play a sample game between two UCT players where each player gets a different number 
        of UCT iterations (= simulations = tree nodes).
    """
    rewards = {1: [], 2: []}
    state = HalmaState()
    c = 0
    while (state.GetMoves()["jumps"] != [] and state.GetMoves()["moves"] != []):
        if not c%1:
            print c
            print str(state)
            print state.board.calcTotalRew(10), state.board.cur_turn
#         print(str(state))
        if state.board.cur_turn == 1:
            m = UCT(rootstate = state, itermax = 200, verbose = False) # play with values for itermax and verbose = True
        else:
            print "WE SHOULDNT SEE THIS EVER"
#             m = UCT(rootstate = state, itermax = 2000, verbose = False)
#             m = opponent.move()
#             print("Best Move: " + str(m) + "\n")
        if m:
            state.DoMove(m)
            if state.board.checkWin(1) or state.board.checkWin(2):
                print "Hallelujah"
            rewards[state.board.cur_turn].append(moveRew)
        else:
            break
        c+=1
    if state.GetResult(state.board.cur_turn):
        print("Player " + str(state.board.cur_turn) + " wins!")
        print str(state)
        print c
        rewards[state.board.cur_turn].append(winRew)
        return state.board.cur_turn, rewards
    elif state.GetResult(3-state.board.cur_turn):
        print("Player " + str(3 - state.board.cur_turn) + " wins!")
        print str(state)
        print c
        rewards[3-state.board.cur_turn].append(winRew)
        return 3-state.board.cur_turn, rewards
    else:
        print "Fuck the castle"
        print str(state)
        print c
        return 3-state.board.cur_turn, rewards

In [121]:
lol = UCTPlayGame()


0
[X][X][_][_]
[X][_][_][_]
[_][_][_][O]
[_][_][O][O]

20.0 1
You can't do that move
Here's why. Grid at k, l isn't 0?: 3 1 2 Or at i, j, isn't player 1 2 0 1
[1, 1, 0, 0]
[0, 0, 0, 0]
[1, 0, 0, 2]
[0, 2, 2, 0]
WE FAILED TO MAKE A MOVE with player 1 You can't do that move
Here's why. Grid at k, l isn't 0?: 3 1 2 Or at i, j, isn't player 1 2 0 1
[1, 1, 0, 0]
[0, 0, 0, 0]
[1, 0, 0, 2]
[0, 2, 2, 0]
False True 2 0 3 1
something's wrong with cur_turn (should be 2)
WE FAILED TO MAKE A MOVE with player 2 True False 3 2 3 0
You can't do that move
Here's why. Grid at k, l isn't 0?: 3 1 2 Or at i, j, isn't player 1 2 0 1
[1, 1, 0, 0]
[0, 0, 0, 0]
[1, 0, 0, 2]
[0, 2, 2, 0]
WE FAILED TO MAKE A MOVE with player 1 You can't do that move
Here's why. Grid at k, l isn't 0?: 3 1 2 Or at i, j, isn't player 1 2 0 1
[1, 1, 0, 0]
[0, 0, 0, 0]
[1, 0, 0, 2]
[0, 2, 2, 0]
False True 2 0 3 1
something's wrong with cur_turn (should be 2)
WE FAILED TO MAKE A MOVE with player 2 True False 3 2 3 0
You can't do that move
Here's why. Grid at k, l isn't 0?: 2 1 0 Or at i, j, isn't player 1 3 1 0
[0, 1, 0, 0]
[1, 0, 0, 0]
[1, 0, 0, 2]
[2, 2, 0, 0]
WE FAILED TO MAKE A MOVE with player 1 You can't do that move
Here's why. Grid at k, l isn't 0?: 2 1 0 Or at i, j, isn't player 1 3 1 0
[0, 1, 0, 0]
[1, 0, 0, 0]
[1, 0, 0, 2]
[2, 2, 0, 0]
False True 3 1 2 1
something's wrong with cur_turn (should be 2)
WE FAILED TO MAKE A MOVE with player 2 True False 2 3 1 3
You can't do that move
Here's why. Grid at k, l isn't 0?: 3 1 2 Or at i, j, isn't player 1 2 0 1
[1, 1, 0, 0]
[0, 0, 0, 0]
[1, 0, 0, 2]
[0, 2, 2, 0]
WE FAILED TO MAKE A MOVE with player 1 You can't do that move
Here's why. Grid at k, l isn't 0?: 3 1 2 Or at i, j, isn't player 1 2 0 1
[1, 1, 0, 0]
[0, 0, 0, 0]
[1, 0, 0, 2]
[0, 2, 2, 0]
False True 2 0 3 1
something's wrong with cur_turn (should be 2)
WE FAILED TO MAKE A MOVE with player 2 True False 3 2 3 0
You can't do that move
Here's why. Grid at k, l isn't 0?: 0 1 2 Or at i, j, isn't player 1 0 0 1
[1, 2, 0, 0]
[0, 0, 1, 2]
[0, 0, 0, 0]
[2, 1, 0, 0]
WE FAILED TO MAKE A MOVE with player 1 You can't do that move
Here's why. Grid at k, l isn't 0?: 0 1 2 Or at i, j, isn't player 1 0 0 1
[1, 2, 0, 0]
[0, 0, 1, 2]
[0, 0, 0, 0]
[2, 1, 0, 0]
False True 0 0 0 1
something's wrong with cur_turn (should be 2)
WE FAILED TO MAKE A MOVE with player 2 True False 1 3 1 1
You can't do that move
Here's why. Grid at k, l isn't 0?: 3 1 2 Or at i, j, isn't player 1 2 0 1
[1, 1, 0, 0]
[0, 0, 0, 0]
[1, 0, 0, 2]
[0, 2, 2, 0]
WE FAILED TO MAKE A MOVE with player 1 You can't do that move
Here's why. Grid at k, l isn't 0?: 3 1 2 Or at i, j, isn't player 1 2 0 1
[1, 1, 0, 0]
[0, 0, 0, 0]
[1, 0, 0, 2]
[0, 2, 2, 0]
False True 2 0 3 1
something's wrong with cur_turn (should be 2)
WE FAILED TO MAKE A MOVE with player 2 True False 3 2 1 0
You can't do that move
Here's why. Grid at k, l isn't 0?: 2 0 1 Or at i, j, isn't player 1 3 1 0
[1, 0, 1, 0]
[0, 0, 0, 0]
[1, 0, 0, 2]
[2, 2, 0, 0]
WE FAILED TO MAKE A MOVE with player 1 You can't do that move
Here's why. Grid at k, l isn't 0?: 2 0 1 Or at i, j, isn't player 1 3 1 0
[1, 0, 1, 0]
[0, 0, 0, 0]
[1, 0, 0, 2]
[2, 2, 0, 0]
False True 3 1 2 0
something's wrong with cur_turn (should be 2)
WE FAILED TO MAKE A MOVE with player 2 True False 2 3 1 3
You can't do that move
Here's why. Grid at k, l isn't 0?: 3 1 2 Or at i, j, isn't player 1 2 0 1
[1, 1, 0, 0]
[0, 0, 0, 0]
[1, 0, 0, 2]
[0, 2, 2, 0]
WE FAILED TO MAKE A MOVE with player 1 You can't do that move
Here's why. Grid at k, l isn't 0?: 3 1 2 Or at i, j, isn't player 1 2 0 1
[1, 1, 0, 0]
[0, 0, 0, 0]
[1, 0, 0, 2]
[0, 2, 2, 0]
False True 2 0 3 1
something's wrong with cur_turn (should be 2)
WE FAILED TO MAKE A MOVE with player 2 True False 3 2 3 0
You can't do that move
Here's why. Grid at k, l isn't 0?: 2 1 0 Or at i, j, isn't player 1 3 1 0
[1, 1, 0, 0]
[0, 0, 0, 0]
[1, 0, 0, 2]
[0, 2, 2, 0]
WE FAILED TO MAKE A MOVE with player 1 You can't do that move
Here's why. Grid at k, l isn't 0?: 2 1 0 Or at i, j, isn't player 1 3 1 0
[1, 1, 0, 0]
[0, 0, 0, 0]
[1, 0, 0, 2]
[0, 2, 2, 0]
False True 3 1 2 1
something's wrong with cur_turn (should be 2)
WE FAILED TO MAKE A MOVE with player 2 True False 3 2 3 0
You can't do that move
Here's why. Grid at k, l isn't 0?: 1 0 0 Or at i, j, isn't player 1 2 1 0
[1, 1, 0, 0]
[0, 0, 0, 0]
[1, 0, 0, 2]
[0, 2, 2, 0]
WE FAILED TO MAKE A MOVE with player 1 You can't do that move
Here's why. Grid at k, l isn't 0?: 1 0 0 Or at i, j, isn't player 1 2 1 0
[1, 1, 0, 0]
[0, 0, 0, 0]
[1, 0, 0, 2]
[0, 2, 2, 0]
False True 2 1 1 0
something's wrong with cur_turn (should be 2)
WE FAILED TO MAKE A MOVE with player 2 True False 3 2 1 0
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-121-9025c72e0d7e> in <module>()
----> 1 lol = UCTPlayGame()

<ipython-input-120-f8abff7a0c42> in UCTPlayGame()
     73 #         print(str(state))
     74         if state.board.cur_turn == 1:
---> 75             m = UCT(rootstate = state, itermax = 200, verbose = False) # play with values for itermax and verbose = True
     76         else:
     77             print "WE SHOULDNT SEE THIS EVER"

<ipython-input-120-f8abff7a0c42> in UCT(rootstate, itermax, verbose)
     13         while not node.untriedMoves["moves"] and not node.untriedMoves["jumps"] and node.childNodes != []: # node is fully expanded and non-terminal
     14             node = node.UCTSelectChild()
---> 15             winner = state.DoMove(node.move)
     16             if winner:
     17                 break

<ipython-input-107-dce1b4304f3b> in DoMove(self, move)
     34             print "something's wrong with cur_turn (should be 2)"
     35 
---> 36         self.opp.board = copy.deepcopy(self.board)
     37         self.opp.pieces = self.opp.findPieces(self.opp.board)
     38         self.opp.move()

C:\Users\sergsb\Anaconda\lib\copy.pyc in deepcopy(x, memo, _nil)
    188                             raise Error(
    189                                 "un(deep)copyable object of type %s" % cls)
--> 190                 y = _reconstruct(x, rv, 1, memo)
    191 
    192     memo[d] = y

C:\Users\sergsb\Anaconda\lib\copy.pyc in _reconstruct(x, info, deep, memo)
    332     if state:
    333         if deep:
--> 334             state = deepcopy(state, memo)
    335         if hasattr(y, '__setstate__'):
    336             y.__setstate__(state)

C:\Users\sergsb\Anaconda\lib\copy.pyc in deepcopy(x, memo, _nil)
    161     copier = _deepcopy_dispatch.get(cls)
    162     if copier:
--> 163         y = copier(x, memo)
    164     else:
    165         try:

C:\Users\sergsb\Anaconda\lib\copy.pyc in _deepcopy_dict(x, memo)
    255     memo[id(x)] = y
    256     for key, value in x.iteritems():
--> 257         y[deepcopy(key, memo)] = deepcopy(value, memo)
    258     return y
    259 d[dict] = _deepcopy_dict

C:\Users\sergsb\Anaconda\lib\copy.pyc in deepcopy(x, memo, _nil)
    161     copier = _deepcopy_dispatch.get(cls)
    162     if copier:
--> 163         y = copier(x, memo)
    164     else:
    165         try:

C:\Users\sergsb\Anaconda\lib\copy.pyc in _deepcopy_dict(x, memo)
    254     y = {}
    255     memo[id(x)] = y
--> 256     for key, value in x.iteritems():
    257         y[deepcopy(key, memo)] = deepcopy(value, memo)
    258     return y

KeyboardInterrupt: 

In [ ]:
# Play tic-tac-toe. The first player will be always X.
# Each tic-tac-toe board is represented by a sequence of three values:
# (set of squares that have an X, set of squares that have a y, board's width)
import random
import os
import string

def TicTacToe(X, O, width=3):
    """Play a tic-tac-toe game between the two given functions. After each
    turn, yield the new board.
    Each function should get a tic-tac-toe board and a char - 'X' or 'O',
    that represents the current turn, and return the number of
    square where it wants to put a sign.
    width is the board's width and length - it's 3 as default.

    X, O -- functions
    width -- natural number
    """
    board = (set(), set(), width)
    turn = 'X'
    while result(board) == False:
        if turn == 'X':
            board[0].add(X(board, turn))
        else:
            board[1].add(O(board, turn))
        yield board
        turn = list({'X', 'O'} - set([turn]))[0]

def displayTicTacToe(X, O, width=3):
    """Play a tic-tac-toe game (see TicTacToe's docstring for explanation) and
    display the new board to the user when a player plays, and the result of
    the game after its end.

    X, O - functions
    width - natural number"""
    for board in TicTacToe(X, O, width):
        os.system('cls' if os.name == 'nt' else 'clear')  # clearscreen
        print str_board(board)
    winner = result(board)
    if winner in {'X', 'O'}: print winner + ' won!'
    elif winner == None: print 'Tie!'
    else: raise ValueError("The game didn't end")

def result(board):
    """Return 'X' if X won in the given board, 'O' if O won, None if the game
    ended with a tie, False if the game didn't end yet, and raise an exception
    if it looks like X and O won both (the board cannot be reached using a
    legal game)."""
    x_squares, o_squares, width = board
    rows = [{width*row+col+1 for col in range(width)} for row in range(width)]
    cols = [{width*row+col+1 for row in range(width)} for col in range(width)]
    diagonals = [{width*i+i+1 for i in range(width)},
                 {width*i+width-i for i in range(width)}]
    lines = rows + cols + diagonals

    x_won = any(line.issubset(x_squares) for line in lines)
    o_won = any(line.issubset(o_squares) for line in lines)
    if x_won:
        if o_won:
            raise ValueError("Illegal board")
        return 'X'
    if o_won:
        return 'O'
    if x_squares | o_squares == set(range(1, width**2+1)):
        # Nobody won, but the board is full
        return None  # Tie
    return False

def str_board(board):
    """Return the board in a string representation, to print it."""
    return_str = ''
    x_squares, o_squares, width = board
    for row in range(width):
        for col in range(width):
            square = width*row+col+1
            return_str += 'X' if square in x_squares else 'O' if square in \
                               o_squares else ' '
            if col != width-1: return_str += ' | '
        if row != width-1: return_str += '\n'+'--+-'*(width-1)+'-\n' 
    return return_str


def human_player(board, turn):
    """Display the board to the user and ask him where does he want to put a
    sign. Return the square."""
    x_squares, o_squares, width = board
    os.system('cls' if os.name == 'nt' else 'clear')  # clear screen
    print str_board(board)
    while True:
        try:
            square = int(raw_input('Where do you want to add ' + turn + '? '))
            assert 0 < square <= width**2 and \
                   square not in x_squares | o_squares
            return square  # this will happen if there were no exceptions
        except:
            print ('You should write an integer between 1 and '+str(width**2)+
                   ', that represents a blank square.')

def minimax_player(board, turn):
    """Return a square where it's worthwhile to play according to the minimax
    algorithm."""
    return minimax_best_square(board, turn)[0]

def minimax_score_board(board, turn):
    """Return 1, 0 or -1 according to the minimax algorithm -- 1 if the player
    that has the given turn has a winning strategy, 0 if he doesn't have a
    winning strategy but he has a tie strategy, and -1 if he will lose anyway
    (assuming his opponent is playing a perfect game)."""
    if result(board) == turn:
        return 1
    if result(board) == None:
        return 0
    if result(board) != False:
        return -1
    return minimax_best_square(board, turn)[1]

def minimax_best_square(board, turn):
    """Choose a square where it's worthwhile to play in the given board and
    turn, and return a tuple of the square's number and it's score according
    to the minimax algorithm."""
    x_squares, o_squares, width = board
    max_score = -2
    opponent = list({'X', 'O'} - set([turn]))[0]
    squares = list(set(range(1, width**2+1)) - (x_squares | o_squares))
    random.shuffle(squares)
    for square in squares:
        # Iterate over the blank squares, to get the best square to play
        new_board = (x_squares | set([square] if turn=='X' else []),) + \
                    (o_squares | set([square] if turn=='O' else []), width)
        score = -minimax_score_board(new_board, opponent)
        if score == 1: return (square, 1)
        if score > max_score:
            max_score, max_square = score, square
    return (max_square, max_score)

displayTicTacToe(X = minimax_player, O = human_player, width = 3)
raw_input()

In [41]:
lol = UCTPlayGame()


0
[X][X][X][X][X][_][_][_][_][_][_][_][_][_][_][_]
[X][X][X][X][X][_][_][_][_][_][_][_][_][_][_][_]
[X][X][X][X][_][_][_][_][_][_][_][_][_][_][_][_]
[X][X][X][_][_][_][_][_][_][_][_][_][_][_][_][_]
[X][X][_][_][_][_][_][_][_][_][_][_][_][_][_][_]
[_][_][_][_][_][_][_][_][_][_][_][_][_][_][_][_]
[_][_][_][_][_][_][_][_][_][_][_][_][_][_][_][_]
[_][_][_][_][_][_][_][_][_][_][_][_][_][_][_][_]
[_][_][_][_][_][_][_][_][_][_][_][_][_][_][_][_]
[_][_][_][_][_][_][_][_][_][_][_][_][_][_][_][_]
[_][_][_][_][_][_][_][_][_][_][_][_][_][_][_][_]
[_][_][_][_][_][_][_][_][_][_][_][_][_][_][O][O]
[_][_][_][_][_][_][_][_][_][_][_][_][_][O][O][O]
[_][_][_][_][_][_][_][_][_][_][_][_][O][O][O][O]
[_][_][_][_][_][_][_][_][_][_][_][O][O][O][O][O]
[_][_][_][_][_][_][_][_][_][_][_][O][O][O][O][O]

50
[X][X][X][X][_][_][_][_][_][_][_][_][_][_][_][_]
[X][X][_][_][X][X][X][X][_][_][_][_][_][_][_][_]
[X][X][_][_][_][_][_][_][_][_][_][_][_][_][_][_]
[X][X][X][_][_][_][_][_][_][_][_][_][_][_][_][_]
[_][_][X][_][_][_][_][_][_][_][_][_][_][_][_][_]
[_][_][_][X][_][_][_][_][_][_][_][_][_][_][_][_]
[_][_][_][_][_][_][_][_][_][_][_][_][_][_][_][_]
[_][X][_][_][_][_][_][_][_][_][_][_][_][_][_][_]
[_][X][_][_][_][_][_][_][_][_][_][_][_][_][_][_]
[_][_][_][_][_][_][_][_][_][_][_][_][_][_][_][_]
[_][_][_][_][_][_][_][_][_][_][O][_][_][O][_][O]
[_][_][_][_][_][_][_][_][_][_][O][_][O][O][O][_]
[_][_][_][_][_][_][_][_][_][_][_][O][O][O][_][_]
[_][_][_][_][_][_][_][_][_][_][O][O][_][O][_][O]
[_][_][_][_][_][_][_][_][_][_][O][_][_][_][_][_]
[_][_][_][_][_][_][_][_][_][O][O][_][_][O][_][O]

100
[X][_][_][_][X][_][_][_][_][_][_][_][_][_][_][_]
[X][X][_][_][X][_][_][X][_][_][_][_][_][_][_][_]
[X][X][X][X][_][_][_][_][_][_][_][_][_][_][_][_]
[X][X][_][X][X][_][X][X][_][_][_][_][_][_][_][_]
[_][_][_][_][_][_][_][_][_][_][_][_][_][_][_][_]
[_][_][_][_][_][_][_][_][_][_][_][_][_][_][_][_]
[_][_][X][_][_][_][_][_][_][_][_][_][_][_][_][_]
[_][X][_][_][_][_][_][_][_][_][_][_][_][_][_][_]
[_][_][_][_][_][_][_][_][_][_][_][_][_][_][_][_]
[_][X][_][_][_][_][_][_][_][_][O][O][_][_][O][_]
[_][_][_][_][_][_][_][_][_][_][_][_][_][O][O][_]
[_][_][_][_][_][_][_][_][_][_][O][O][_][_][_][_]
[_][_][_][_][_][_][_][_][_][_][_][O][O][O][_][_]
[_][_][_][_][_][_][_][_][_][_][O][_][O][_][_][_]
[_][_][_][_][_][_][_][_][_][_][_][O][_][_][O][O]
[_][_][_][_][_][_][_][_][_][O][O][O][_][O][_][_]

150
[X][X][_][X][_][_][_][X][_][_][_][_][_][_][_][_]
[_][_][_][_][X][_][_][_][_][_][_][_][_][_][_][_]
[_][_][X][X][_][_][_][_][_][_][_][_][_][_][_][_]
[_][_][_][X][X][X][_][X][_][_][_][_][_][_][_][_]
[_][X][X][_][_][_][_][_][_][_][_][_][_][_][_][_]
[_][X][X][X][_][_][_][_][_][_][_][_][_][_][_][_]
[_][_][_][_][_][_][_][_][_][_][_][_][_][_][_][_]
[_][_][_][X][_][_][_][_][_][_][_][_][O][O][_][_]
[_][_][_][_][X][_][_][_][_][_][_][O][_][_][_][_]
[_][_][_][_][_][_][_][_][_][_][_][_][_][_][_][O]
[_][X][_][_][_][_][_][_][_][O][_][_][O][O][_][_]
[_][_][_][_][_][_][_][_][_][_][O][_][O][O][_][_]
[_][_][_][_][_][_][_][_][_][_][_][_][_][O][_][_]
[_][_][_][_][_][_][_][_][_][_][O][O][_][_][_][_]
[_][_][_][_][_][_][_][_][_][_][O][_][_][_][O][O]
[_][_][_][_][_][_][_][_][_][O][_][O][_][_][O][_]

200
[_][_][X][_][_][X][_][X][_][_][_][_][_][_][_][_]
[X][_][_][_][_][_][_][X][_][_][_][_][_][_][_][_]
[_][_][X][_][X][_][_][X][_][_][_][_][_][_][_][_]
[_][_][_][X][_][_][X][X][_][_][_][_][_][_][_][_]
[_][X][_][X][_][X][_][_][_][_][_][_][_][_][_][_]
[_][_][_][_][X][_][_][_][_][_][_][_][_][_][_][_]
[_][X][_][_][_][_][_][_][_][_][_][O][_][_][_][_]
[_][_][X][X][_][_][_][_][_][_][_][_][_][_][_][_]
[_][_][_][_][_][_][_][_][_][_][_][O][_][_][_][_]
[_][X][_][_][_][_][_][_][_][_][_][_][_][O][O][_]
[_][_][_][_][_][_][_][_][_][O][_][_][_][O][_][_]
[_][_][_][_][_][_][_][_][_][O][_][_][O][_][_][_]
[_][_][_][_][_][_][_][_][_][O][_][_][_][_][O][_]
[_][_][_][_][_][_][_][_][_][_][O][_][O][_][_][_]
[_][_][_][_][_][_][_][_][_][_][O][O][_][O][_][O]
[_][_][_][_][_][_][_][_][_][O][_][_][_][O][O][_]

250
[_][X][_][_][_][_][_][X][_][X][_][_][_][_][_][_]
[X][_][_][_][_][_][_][X][_][_][_][_][_][_][_][_]
[X][_][_][_][_][_][_][_][_][_][_][_][_][_][_][_]
[X][_][X][_][_][_][_][_][_][X][_][_][_][_][_][_]
[_][X][_][X][_][_][_][_][_][_][_][_][_][_][_][_]
[_][_][X][_][X][X][_][_][_][_][_][_][_][_][_][_]
[_][_][_][X][_][X][_][_][_][_][_][O][_][_][_][_]
[_][X][_][X][_][_][_][_][_][_][_][_][_][_][O][_]
[_][_][_][_][_][_][_][_][_][_][_][_][_][O][_][_]
[_][X][_][_][_][_][_][_][_][O][O][_][_][_][_][_]
[_][_][_][_][_][_][_][_][_][_][_][O][O][_][O][_]
[_][_][_][_][_][_][_][_][_][O][_][_][_][_][_][_]
[_][_][_][_][_][_][_][_][_][_][_][_][_][O][_][_]
[_][_][_][_][_][_][_][_][_][_][_][_][O][O][O][_]
[_][_][_][_][_][_][_][_][O][_][_][_][_][_][_][O]
[_][_][_][_][_][_][O][O][_][O][_][_][_][O][_][_]

300
[_][X][_][_][_][_][_][_][_][X][_][_][_][_][_][_]
[_][_][X][_][_][_][_][_][X][X][_][_][_][_][_][_]
[_][_][_][_][_][_][_][_][_][_][_][_][_][_][_][_]
[X][_][_][X][X][_][_][_][_][_][_][_][_][_][_][_]
[_][_][_][_][X][_][_][_][X][_][_][_][_][_][_][_]
[_][X][_][X][_][_][_][_][_][_][_][O][_][_][_][_]
[_][_][X][_][_][_][_][_][_][_][_][_][_][_][_][_]
[X][X][X][X][_][_][_][_][_][_][_][_][_][_][_][_]
[X][_][_][_][_][_][_][_][_][_][O][_][_][_][_][_]
[X][_][_][_][_][_][_][_][_][_][_][_][_][O][_][_]
[_][_][_][_][_][_][_][_][O][O][O][O][_][O][_][_]
[_][_][_][_][_][_][_][_][O][_][_][_][_][_][_][_]
[_][_][_][_][_][_][_][_][_][_][_][_][_][_][_][O]
[_][_][_][_][_][_][_][_][_][_][_][O][O][_][_][_]
[_][_][_][_][_][_][O][_][O][_][_][_][_][_][_][O]
[_][_][_][_][_][_][O][O][_][_][_][O][_][O][_][_]

350
[_][X][_][_][_][_][_][_][_][_][_][_][_][_][_][_]
[_][X][_][X][_][_][_][X][X][_][_][_][_][_][_][_]
[X][_][_][_][X][X][X][_][_][_][_][_][_][_][_][_]
[X][_][_][_][_][_][_][_][_][_][_][_][_][_][_][_]
[X][X][_][_][X][_][_][_][X][_][_][_][_][_][_][_]
[_][_][_][_][_][_][_][_][_][_][_][O][_][_][_][_]
[_][X][_][_][_][_][_][_][_][_][_][_][_][_][_][_]
[_][_][_][_][_][_][_][_][_][_][_][_][O][_][_][_]
[_][X][X][_][_][_][_][_][_][_][_][O][_][O][_][_]
[X][_][_][_][_][_][_][_][_][_][_][O][_][O][_][_]
[_][_][_][X][_][_][_][O][_][O][_][_][_][_][_][_]
[_][_][_][_][_][_][_][_][_][_][_][_][O][_][_][_]
[_][_][_][_][_][_][_][_][_][_][_][_][_][_][_][_]
[_][_][_][_][_][_][_][_][_][_][O][_][O][O][_][_]
[_][_][_][_][_][_][O][_][O][_][_][_][_][_][_][O]
[_][_][_][_][O][_][_][O][_][_][_][O][_][O][_][_]

400
[X][_][_][X][_][_][X][_][_][_][_][_][_][_][_][_]
[_][X][_][X][_][_][X][_][_][_][_][_][_][_][_][_]
[X][_][_][_][_][_][_][_][X][_][_][_][_][_][_][_]
[_][_][_][_][_][X][_][_][_][X][_][_][_][_][_][_]
[X][X][_][_][X][_][_][_][X][_][_][_][_][_][_][_]
[_][_][_][_][_][_][_][_][_][_][_][_][_][_][_][_]
[_][_][_][_][_][_][_][_][_][_][_][_][_][_][_][_]
[_][_][_][X][_][X][_][_][_][_][_][_][_][O][_][_]
[_][X][X][_][_][_][_][_][_][_][_][_][O][O][_][_]
[_][_][_][_][X][_][_][_][_][_][_][O][O][_][_][_]
[_][_][_][_][_][_][_][_][_][_][_][O][O][_][_][_]
[_][_][_][_][_][_][_][_][_][O][_][O][_][_][_][_]
[_][_][_][_][_][_][_][_][_][_][_][_][_][_][_][_]
[_][_][_][_][_][_][_][_][_][O][_][_][_][_][O][_]
[_][_][_][_][_][O][_][_][_][_][_][_][_][_][O][_]
[_][_][_][_][O][_][_][O][_][_][O][O][_][_][O][O]

450
[X][_][X][_][_][_][X][_][_][_][_][_][_][_][_][_]
[_][X][_][_][X][_][_][X][_][_][_][_][_][_][_][_]
[X][_][_][_][X][_][_][_][_][_][_][_][_][_][_][_]
[_][_][_][_][_][_][_][_][X][X][_][_][_][_][_][_]
[_][_][X][_][X][_][_][_][X][_][_][_][_][_][_][_]
[_][X][_][_][_][_][_][_][_][_][_][_][_][_][_][_]
[_][_][_][_][_][_][_][_][_][_][_][_][_][_][_][_]
[_][_][_][X][_][_][X][_][_][_][_][_][_][_][_][_]
[X][X][_][_][_][_][_][_][_][_][_][_][O][_][_][_]
[_][_][_][_][X][_][_][_][_][O][_][_][_][O][_][_]
[_][_][_][_][_][_][_][_][_][_][O][O][_][_][_][_]
[_][_][_][_][_][_][_][_][_][_][_][O][_][_][_][_]
[_][_][_][_][_][_][_][_][O][_][O][O][_][O][_][_]
[_][_][_][_][_][_][_][_][_][_][_][_][O][_][_][_]
[_][_][_][_][_][O][_][_][O][_][_][_][_][_][O][_]
[_][_][_][_][_][O][O][_][_][_][O][_][_][O][O][_]

500
[X][X][_][_][_][_][X][X][_][_][_][_][_][_][_][_]
[_][_][_][_][_][_][_][X][_][_][_][_][_][_][_][_]
[X][_][_][_][X][_][_][_][_][_][X][_][_][_][_][_]
[_][X][_][_][_][_][_][X][_][_][X][_][_][_][_][_]
[_][_][_][_][_][_][_][_][_][_][_][_][_][_][_][_]
[_][_][X][_][_][X][_][_][_][_][_][_][_][_][_][_]
[_][_][_][_][_][X][_][_][_][_][_][_][_][_][_][O]
[_][_][_][_][X][_][X][_][_][_][_][O][_][_][_][_]
[X][_][_][_][_][_][_][_][O][_][_][_][_][_][_][_]
[_][_][_][_][_][_][_][_][_][_][_][_][_][_][_][_]
[X][_][_][_][_][X][_][_][_][_][_][O][_][_][_][_]
[_][_][_][_][_][_][_][O][O][_][_][O][_][_][_][_]
[_][_][_][_][_][_][_][_][_][_][_][_][O][_][_][_]
[_][_][_][_][_][_][_][_][_][_][O][_][O][_][_][_]
[_][_][_][_][_][O][_][_][_][_][_][O][_][_][O][_]
[_][_][_][O][O][_][_][_][_][O][O][_][_][O][O][_]

550
[X][X][_][_][_][_][X][X][_][_][_][_][_][_][_][_]
[_][_][_][_][_][X][_][_][_][_][_][_][_][_][_][_]
[_][_][X][_][_][_][_][_][_][_][X][_][_][_][_][_]
[_][_][X][_][_][_][_][X][_][_][_][_][_][_][_][_]
[_][_][_][X][X][_][_][_][X][_][_][_][_][_][_][_]
[X][_][_][_][_][X][_][_][_][_][_][_][_][_][_][O]
[_][_][_][_][_][X][_][X][_][_][O][_][_][_][_][_]
[_][_][_][_][_][_][_][_][_][_][_][_][_][_][_][_]
[X][_][_][_][_][_][_][_][_][_][_][_][_][_][_][_]
[_][_][_][_][_][_][_][_][_][O][_][_][_][_][_][_]
[X][_][_][_][_][_][_][_][O][O][O][_][_][O][_][_]
[_][_][_][_][_][X][_][O][_][_][_][_][O][_][_][_]
[_][_][_][_][_][_][_][_][_][_][_][_][O][_][_][O]
[_][_][_][_][_][_][_][_][_][_][O][_][_][_][_][_]
[_][_][_][_][O][_][_][_][_][_][_][_][_][O][_][_]
[_][_][_][O][_][O][_][_][_][_][_][O][O][_][O][_]

600
[X][X][_][_][_][X][X][_][X][_][_][X][_][_][_][_]
[_][_][_][X][_][X][_][_][_][_][_][_][_][_][_][_]
[_][_][_][_][_][X][_][_][_][_][_][_][_][_][_][_]
[X][_][_][_][_][_][_][_][_][_][_][_][_][_][_][_]
[_][_][_][_][X][_][X][_][X][_][_][_][_][_][O][_]
[_][X][_][_][X][_][_][_][_][_][_][_][_][_][_][_]
[_][_][_][_][_][X][_][_][_][O][_][_][_][_][_][_]
[_][_][_][_][_][O][_][_][_][_][_][_][_][_][_][_]
[X][_][_][X][_][_][_][_][_][_][_][_][_][_][_][_]
[_][_][_][_][_][_][_][O][_][O][_][_][O][_][_][_]
[_][_][_][_][_][_][_][_][_][_][_][_][_][_][_][_]
[_][_][_][_][_][X][_][O][O][_][_][_][_][O][O][_]
[_][_][O][_][_][O][O][_][_][_][_][O][_][_][_][O]
[_][_][_][_][_][_][_][_][_][_][_][_][_][_][_][_]
[_][_][_][O][_][_][_][_][_][O][_][_][_][O][O][_]
[_][_][_][_][_][_][_][_][_][_][_][_][_][_][_][_]

---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-41-9025c72e0d7e> in <module>()
----> 1 lol = UCTPlayGame()

<ipython-input-40-182a3b82d4b4> in UCTPlayGame()
     80             m = UCT(rootstate = state, itermax = 500, verbose = False) # play with values for itermax and verbose = True
     81         else:
---> 82             m = UCT(rootstate = state, itermax = 500, verbose = False)
     83 #         print("Best Move: " + str(m) + "\n")
     84         if m:

<ipython-input-40-182a3b82d4b4> in UCT(rootstate, itermax, verbose)
     43             elif mv in mvs["jumps"]:
     44                 mvs["jumps"].remove(mv)
---> 45             state.DoMove(mv)
     46 #             if asdf > 20:
     47 #                 return

KeyboardInterrupt: 

In [147]:
print lol


(2, {1: [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1], 2: [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 100]})

In [148]:
plays= 100

In [150]:
UCTwins = {1: 0, 2: 0}
UCTrews = {1: [], 2: []}
for i in range(plays):
    print i
    p, r = UCTPlayGame()
    UCTwins[p] += 1
    UCTrews[1].append(np.sum(r[1]))
    UCTrews[2].append(np.sum(r[2]))
UCTcumrews = {1: np.cumsum(UCTrews[1]), 2: np.cumsum(UCTrews[2])}


0
Player 2 wins!
1
Player 1 wins!
2
Player 1 wins!
3
Player 1 wins!
4
Player 1 wins!
5
Player 1 wins!
6
Player 1 wins!
7
Player 1 wins!
8
Player 1 wins!
9
Player 1 wins!
10
Player 1 wins!
11
Player 1 wins!
12
Player 1 wins!
13
Player 1 wins!
14
Player 1 wins!
15
Player 1 wins!
16
Player 1 wins!
17
Player 1 wins!
18
Player 1 wins!
19
Player 1 wins!
20
Player 2 wins!
21
Player 1 wins!
22
Player 1 wins!
23
Player 1 wins!
24
Player 1 wins!
25
Player 1 wins!
26
Player 1 wins!
27
Player 1 wins!
28
Player 1 wins!
29
Player 1 wins!
30
Player 1 wins!
31
Player 1 wins!
32
Player 2 wins!
33
Player 1 wins!
34
Player 1 wins!
35
Player 1 wins!
36
Player 2 wins!
37
Player 1 wins!
38
Player 1 wins!
39
Player 1 wins!
40
Player 1 wins!
41
Player 1 wins!
42
Player 1 wins!
43
Player 1 wins!
44
Player 2 wins!
45
Player 1 wins!
46
Player 1 wins!
47
Player 1 wins!
48
Player 1 wins!
49
Player 1 wins!
50
Player 1 wins!
51
Player 2 wins!
52
Player 2 wins!
53
Player 1 wins!
54
Player 1 wins!
55
Player 1 wins!
56
Player 1 wins!
57
Player 1 wins!
58
Player 1 wins!
59
Player 2 wins!
60
Player 2 wins!
61
Player 2 wins!
62
Player 1 wins!
63
Player 1 wins!
64
Player 1 wins!
65
Player 1 wins!
66
Player 1 wins!
67
Player 1 wins!
68
Player 1 wins!
69
Player 1 wins!
70
Player 1 wins!
71
Player 1 wins!
72
Player 1 wins!
73
Player 1 wins!
74
Player 1 wins!
75
Player 1 wins!
76
Player 1 wins!
77
Player 1 wins!
78
Player 2 wins!
79
Player 2 wins!
80
Player 1 wins!
81
Player 1 wins!
82
Player 1 wins!
83
Player 1 wins!
84
Player 1 wins!
85
Player 1 wins!
86
Player 1 wins!
87
Player 1 wins!
88
Player 1 wins!
89
Player 1 wins!
90
Player 2 wins!
91
Player 1 wins!
92
Player 1 wins!
93
Player 1 wins!
94
Player 1 wins!
95
Player 1 wins!
96
Player 1 wins!
97
Player 1 wins!
98
Player 1 wins!
99
Player 1 wins!

In [152]:
x = np.arange(0, plays, 1)
y = map(lambda x: UCTrews[1][x], x)

plt.plot(x, y)


Out[152]:
[<matplotlib.lines.Line2D at 0xb3dd278>]

In [153]:
x = np.arange(0, plays, 1)
y = map(lambda x: UCTrews[2][x], x)

plt.plot(x, y)


Out[153]:
[<matplotlib.lines.Line2D at 0xb022710>]

In [ ]:


In [ ]: