In [1]:
from datetime import datetime
In [1]:
import argparse
import socket
import sys
import numpy as np
import random as random
from random import sample
import copy as cp
from HandEvaluator import HandEvaluator
from Brains import RationalBrain
from Brains import AdaptiveBrain
#sys.path.add("/Users/sbiswas/GitHub/poker/PokerBots_2017/Johnny")
np.set_printoptions(linewidth=300)
# ----------- BRAIN ----------- #
# ----------- BRAIN ----------- #
class Johnny:
"""
self.bot_name = bot_name in config file. Corresponds to PLAYER_X_NAME field.
self.hand = a dictionary storing properties of the current hand. Has the following keys.
hand_id = int. ID of hand.
button = boolean. Are we the dealer?
hole1 = hole card 1
hole2 = hole card 2
board = current known board cards
action_history = a list of the last actions. action_history[i] = another list that describes
the previous actions up until action point i.
result = int. Chips won (can be a negative integer if we lost chips)
self.state = A dictionary representing the our state in the current match. Has the following keys.
my_bank = our current bankroll
their_bank = their current bankroll
time_bank = cumulative time remaining in the match
"""
def __init__(self, bot_name="P1", brain=AdaptiveBrain):
self.bot_name = bot_name
self.brain = brain()
self.state = {}
self.reset_hand()
def reset_hand(self):
self.hand = {}
self.hand['action_history'] = []
self.hand['pot_size'] = []
self.temporal_feature_matrix = []
self.possible_actions = []
### ------- PARSING ------- ###
def parse_data(self, data):
splits = data.split()
packet_type = splits[0]
if packet_type == "NEWHAND":
self.parse_new_hand(splits)
elif packet_type == "GETACTION":
self.parse_get_action(splits)
elif packet_type == "HANDOVER":
self.parse_hand_over(splits)
def parse_win_result(self, wr):
splits = wr.split(":")
amt = int(splits[1])
winner = splits[2]
if winner == self.bot_name:
return amt/2
else:
return -amt/2
def parse_hand_over(self, data_splits):
# HANDOVER Stack1 Stack2 numBoardCards [boardCards] numLastActions [lastActions] timeBank
# Can ignore Stack1 and Stack2 because we'll get it on the next NEWHAND packet.
num_board_cards = int(data_splits[3])
counter = 4
self.hand['board'] = data_splits[counter:counter+num_board_cards]
counter += num_board_cards
num_last_actions = int(data_splits[counter])
counter += 1
self.hand['action_history'].append(data_splits[counter:counter+num_last_actions-1])
self.hand['winnings'] = self.parse_win_result(data_splits[counter+num_last_actions-1])
counter += num_last_actions
self.state['time_bank'] = float(data_splits[-1])
def check_for_hand_update_and_update_hand(self, last_actions):
for i in range(len(last_actions)):
splits = last_actions[i].split(":")
if len(splits) == 4 and splits[0] == "DISCARD":
# DISCARD:(oldcard):(newcard):PLAYER
if self.hand['hole1'] == splits[1]:
self.hand['hole1'] = splits[2]
else:
self.hand['hole2'] = splits[2]
print("Updated hand ... ")
print([self.hand['hole1'], self.hand['hole2']])
def parse_get_action(self, data_splits):
# GETACTION potSize numBoardCards [boardCards] numLastActions [lastActions] numLegalActions [legalActions] timebank
self.hand['pot_size'].append(int(data_splits[1]))
num_board_cards = int(data_splits[2])
counter = 3
self.hand['board'] = data_splits[counter:counter+num_board_cards]
counter += num_board_cards
num_last_actions = int(data_splits[counter])
counter += 1
self.check_for_hand_update_and_update_hand(data_splits[counter:counter+num_last_actions]) # update hand if discard was made.
self.hand['action_history'].append(data_splits[counter:counter+num_last_actions])
counter += num_last_actions
self.hand['winnings'] = 0 # if we're in a get action packet, then we haven't won anything yet.
num_legal_actions = int(data_splits[counter])
counter += 1
self.possible_actions = data_splits[counter:counter+num_legal_actions]
counter += num_legal_actions
self.state['time_bank'] = float(data_splits[-1])
def parse_new_hand(self, data_splits):
# NEWHAND handId button holeCard1 holeCard2 myBank otherBank timeBank
self.reset_hand()
self.hand['hand_id'] = int(data_splits[1])
self.hand['button'] = data_splits[2]
self.hand['hole1'] = data_splits[3]
self.hand['hole2'] = data_splits[4]
self.state['my_bank'] = int(data_splits[5])
self.state['their_bank'] = int(data_splits[6])
self.state['time_bank'] = float(data_splits[7])
### ----------------------- ###
### ------- FEATURE GENERATION ------- ###
def update_temporal_feature_matrix(self):
# Columns represent time steps in a hand.
# Rows are as follows:
# 0 - hero action
# 1 - villain action
# 2 - street
# 3 - hero discard?
# 4 - villain discard?
al = [item for sublist in self.hand['action_history'] for item in sublist] # linearize action history list
if len(al) > 0:
if len(self.temporal_feature_matrix) > 0: # check if it's started to fill out.
start_from_idx = self.temporal_feature_matrix.shape[1]
else:
self.temporal_feature_matrix = self.build_temporal_feature_vector(al[0])
start_from_idx = 1
for i in range(start_from_idx, len(al)):
self.temporal_feature_matrix = np.hstack((self.temporal_feature_matrix,
self.build_temporal_feature_vector(al[i])))
def build_temporal_feature_vector(self, performed_action):
# Performed actions to expect.
# BET:amount[:actor]
# CALL[:actor]
# CHECK[:actor]
# DEAL:STREET
# FOLD[:actor]
# POST:amount:actor
# DISCARD[:actor]
# RAISE:amount[:actor]
# REFUND:amount:actor
# SHOW:card1:card2:actor
# TIE:amount:actor
# WIN:amount:actor
NFEATURES = 5
STACKSIZE = 200.0
hero_idx = 0
villain_idx = 1
street_idx = 2
hero_discard_idx = 3
villain_discard_idx = 4
street = self.get_street()
splits = performed_action.split(":")
fv = np.zeros((NFEATURES,1))
if splits[0] == "BET":
actor_idx = self.get_actor_idx(splits[-1])
amount = float(splits[1])
fv[actor_idx] = amount/STACKSIZE + np.max(self.temporal_feature_matrix[actor_idx])
fv[1-actor_idx] = self.temporal_feature_matrix[1-actor_idx,-1]
fv[street_idx] = street
elif splits[0] == "CALL":
actor_idx = self.get_actor_idx(splits[-1])
player_to_call = 1 - actor_idx
call_amt = np.max(self.temporal_feature_matrix[player_to_call])
fv[actor_idx] = call_amt
fv[1-actor_idx] = self.temporal_feature_matrix[1-actor_idx,-1]
fv[street_idx] = street
elif splits[0] == "CHECK":
actor_idx = self.get_actor_idx(splits[-1])
fv[actor_idx] = self.temporal_feature_matrix[actor_idx,-1]
fv[1-actor_idx] = self.temporal_feature_matrix[1-actor_idx,-1]
fv[street_idx] = street
elif splits[0] == "DEAL":
if splits[1] == "FLOP":
street = 1
elif splits[1] == "TURN":
street = 2
elif splits[1] == "RIVER":
street = 3
fv[street_idx] = street
fv[0] = self.temporal_feature_matrix[0,-1]
fv[1] = self.temporal_feature_matrix[1,-1]
elif splits[0] == "FOLD":
# Hand is now over. nothing to do here.
pass
elif splits[0] == "POST":
actor_idx = self.get_actor_idx(splits[-1])
amount = float(splits[1])/STACKSIZE
fv[actor_idx] = amount
fv[street_idx] = street
if len(self.temporal_feature_matrix) > 0:
fv[1-actor_idx] = self.temporal_feature_matrix[1-actor_idx,-1]
elif splits[0] == "DISCARD":
actor_idx = self.get_actor_idx(splits[-1])
fv[actor_idx+3] = 1
fv[street_idx] = street
fv[actor_idx] = self.temporal_feature_matrix[actor_idx,-1]
fv[1-actor_idx] = self.temporal_feature_matrix[1-actor_idx,-1]
elif splits[0] == "RAISE":
# Raise specifies the amount raised to, not the amount raised.
# This creates some complications with respect to maintaining the
# temporal feature matrix.
# Basically, we need to add the raise value to the latest pot value from
# the previous street, since multiple raises and re-raises specify
# only the amount raised to.
actor_idx = self.get_actor_idx(splits[-1])
max_of_prev_street = self.get_max_of_prev_street(actor_idx)
amount = float(splits[1])/STACKSIZE + max_of_prev_street
fv[actor_idx] = amount
fv[1-actor_idx] = self.temporal_feature_matrix[1-actor_idx,-1]
fv[street_idx] = street
elif splits[0] == "REFUND":
# Hand is now over. nothing to do here.
pass
elif splits[0] == "SHOW":
# Hand is now over. nothing to do here.
pass
elif splits[0] == "TIE":
# Hand is now over. nothing to do here.
pass
elif splits[0] == "WIN":
# Hand is now over. nothing to do here.
pass
# Difference betting results?
# if True and len(self.temporal_feature_matrix) > 0:
# fv[0] -= self.temporal_feature_matrix[0,-1]
# fv[1] -= self.temporal_feature_matrix[1,-1]
return fv
def get_street(self):
if len(self.temporal_feature_matrix) == 0: # if has not been initialized
street = 0 # preflop
else:
street = np.max(self.temporal_feature_matrix[2])
return street
def get_max_of_prev_street(self, actor_idx):
street = self.get_street()
if street == 0:
max_of_prev_street = 0
else:
mask = self.temporal_feature_matrix[2] == street - 1
max_of_prev_street = np.max(self.temporal_feature_matrix[actor_idx, mask])
return max_of_prev_street
def get_actor_idx(self, actor):
if actor == self.bot_name:
actor_idx = 0
else:
actor_idx = 1
return actor_idx
def check_synchrony_to_brain(self):
if len(self.brain.new_state) > 0:
ns = self.brain.new_state[0]
print(ns)
print(self.temporal_feature_matrix[:ns.shape[0]])
states_same = np.array_equal(ns, self.temporal_feature_matrix[:ns.shape[0]])
assert()
### ------------------------------- ###
def run(self, input_socket):
# Get a file-object for reading packets from the socket.
# Using this ensures that you get exactly one packet per read.
f_in = input_socket.makefile()
while True:
# Block until the engine sends us a packet.
data = f_in.readline().strip()
# If data is None, connection has closed.
if not data:
print "Gameover, engine disconnected."
break
# Here is where you should implement code to parse the packets from
# the engine and act on it. We are just printing it instead.
print("PACKET -> ", data)
self.parse_data(data)
self.update_temporal_feature_matrix()
#self.check_synchrony_to_brain()
# First before taking our next action, let's learn from the move we
# made at the last decision point.
self.brain.learn_from_last_action(self)
# When appropriate, reply to the engine with a legal action.
# The engine will ignore all spurious responses.
# The engine will also check/fold for you if you return an
# illegal action.
# When sending responses, terminate each response with a newline
# character (\n) or your bot will hang!
word = data.split()[0]
if word == "GETACTION":
action = self.brain.make_decision(self)
s.send(action + "\n")
elif word == "REQUESTKEYVALUES":
# At the end, the engine will allow your bot save key/value pairs.
# Send FINISH to indicate you're done.
s.send("FINISH\n")
# Clean up the socket.
s.close()
In [2]:
# data = []
# data.append("NEWHAND 98 false 9c 4s -111 111 9.931115")
# data.append("GETACTION 4 0 3 POST:1:P2 POST:2:P1 CALL:P2 2 CHECK RAISE:4:200 9.931115124999993")
# data.append("GETACTION 4 3 4d 5c Js 2 CHECK:P1 DEAL:FLOP 3 CHECK DISCARD:9c DISCARD:4s 9.930735672999992")
# data.append("GETACTION 4 3 4d 5c Js 2 CHECK:P1 CHECK:P2 2 CHECK BET:2:198 9.930290227999992")
# data.append("GETACTION 4 4 4d 5c Js Jh 3 CHECK:P1 CHECK:P2 DEAL:TURN 3 CHECK DISCARD:9c DISCARD:4s 9.929636328999992")
# data.append("GETACTION 4 4 4d 5c Js Jh 2 CHECK:P1 DISCARD:P2 2 CHECK BET:2:198 9.928944276999992")
# data.append("GETACTION 83 4 4d 5c Js Jh 2 CHECK:P1 BET:79:P2 3 FOLD CALL RAISE:158:198 9.928641209999991")
# data.append("HANDOVER -113 113 4 4d 5c Js Jh 3 FOLD:P1 REFUND:79:P2 WIN:4:P2 9.928236565999992")
# data = []
# data.append("NEWHAND 80 false Kd Qh -77 77 9.942149")
# data.append("GETACTION 4 0 3 POST:1:P2 POST:2:P1 CALL:P2 2 CHECK RAISE:4:200 9.942148733999993")
# data.append("GETACTION 4 3 9d 4h Ts 2 CHECK:P1 DEAL:FLOP 3 CHECK DISCARD:Kd DISCARD:Qh 9.941793586999992")
# data.append("GETACTION 4 3 9d 4h Ts 2 CHECK:P1 CHECK:P2 2 CHECK BET:2:198 9.941348511999992")
# data.append("GETACTION 56 3 9d 4h Ts 2 CHECK:P1 BET:52:P2 3 FOLD CALL RAISE:104:198 9.940922597999993")
# data.append("HANDOVER -79 79 3 9d 4h Ts 3 FOLD:P1 REFUND:52:P2 WIN:4:P2 9.940476787999993")
# #data = []
# data.append("NEWHAND 91 true 3c Ah 410 -410 9.904622")
# data.append("GETACTION 3 0 2 POST:1:P1 POST:2:P2 3 CALL FOLD RAISE:4:200 9.904621749000007")
# data.append("GETACTION 400 3 9d 5h 9s 4 RAISE:200:P1 CALL:P2 DEAL:FLOP CHECK:P2 3 CHECK DISCARD:3c DISCARD:Ah 9.904199743000007")
# data.append("GETACTION 400 4 9d 5h 9s Tc 3 CHECK:P1 DEAL:TURN DISCARD:P2 3 CHECK DISCARD:3c DISCARD:Ah 9.902036590000007")
# data.append("HANDOVER 210 -210 5 9d 5h 9s Tc 2s 5 CHECK:P1 DEAL:RIVER SHOW:3c:Ah:P1 SHOW:Ad:5d:P2 WIN:400:P2 9.901503757000008")
# # data = []
# data.append("NEWHAND 34 false Ks 2d 7 -7 9.968365")
# #data.append("NEWHAND 34 false 9d 4h 7 -7 9.968365")
# data.append("GETACTION 4 0 3 POST:1:P2 POST:2:P1 CALL:P2 2 CHECK RAISE:4:200 9.968364926000001")
# data.append("GETACTION 4 3 5s Kd Qc 2 CHECK:P1 DEAL:FLOP 3 CHECK DISCARD:9d DISCARD:4h 9.968108437000001")
# #data.append("GETACTION 4 3 5s Kd Qc 2 CHECK:P1 DISCARD:P2 2 CHECK BET:2:198 9.967825413000002")
# data.append("GETACTION 4 3 5s Kd Qc 2 DISCARD:2d:Qh:P1 DISCARD:P2 2 CHECK BET:2:198 9.967825413000002")
# data.append("GETACTION 4 4 5s Kd Qc Ah 3 CHECK:P1 CHECK:P2 DEAL:TURN 3 CHECK DISCARD:9d DISCARD:4h 9.967515728000002")
# data.append("GETACTION 4 4 5s Kd Qc Ah 2 CHECK:P1 DISCARD:P2 2 CHECK BET:2:198 9.967211695000001")
# data.append("GETACTION 4 5 5s Kd Qc Ah Tc 3 CHECK:P1 CHECK:P2 DEAL:RIVER 2 CHECK BET:2:198 9.966950326000001")
# data.append("HANDOVER 5 -5 5 5s Kd Qc Ah Tc 5 CHECK:P1 CHECK:P2 SHOW:Qh:9c:P2 SHOW:9d:4h:P1 WIN:4:P2 9.966612284000002")
#data = "GETACTION 4 3 5d Kd 6h 2 CHECK:P1 DEAL:FLOP 3 CHECK DISCARD:Jc DISCARD:9s 9.995735496999998"
# data = []
# data.append("NEWHAND 83 true Qc 2d -1813 1813 6.639722")
# data.append("GETACTION 3 0 2 POST:1:P1 POST:2:P2 3 CALL FOLD RAISE:4:200 6.639721898000003")
# data.append("GETACTION 4 3 7d Th Qd 4 CALL:P1 CHECK:P2 DEAL:FLOP CHECK:P2 3 CHECK DISCARD:Qc DISCARD:2d 6.629336931000003")
# data.append("GETACTION 140 3 7d Th Qd 2 CHECK:P1 BET:136:P2 3 FOLD CALL RAISE:198:198 6.600063579000003")
# data.append("GETACTION 400 4 7d Th Qd 6c 4 RAISE:198:P1 CALL:P2 DEAL:TURN CHECK:P2 3 CHECK DISCARD:Qc DISCARD:2d 6.588907597000003")
# data.append("HANDOVER -1613 1613 5 7d Th Qd 6c 9d 5 DISCARD:2d:8h:P1 DEAL:RIVER SHOW:Qc:8h:P1 SHOW:2h:Td:P2 WIN:400:P1 6.558397055000003")
data = []
data.append("NEWHAND 19 true 4s Ks -342 342 9.451623")
data.append("GETACTION 3 0 2 POST:1:P1 POST:2:P2 3 CALL FOLD RAISE:4:200 9.451623012")
data.append("GETACTION 134 3 4d 9s Ah 4 RAISE:67:P1 CALL:P2 DEAL:FLOP DISCARD:P2 3 CHECK DISCARD:4s DISCARD:Ks 9.436031113")
data.append("GETACTION 134 3 4d 9s Ah 2 DISCARD:Ks:3s:P1 CHECK:P2 2 CHECK BET:2:133 9.406581291")
data.append("GETACTION 332 4 4d 9s Ah Jh 4 BET:99:P1 CALL:P2 DEAL:TURN CHECK:P2 3 CHECK DISCARD:4s DISCARD:3s 9.397651566")
data.append("GETACTION 352 4 4d 9s Ah Jh 2 DISCARD:3s:9h:P1 BET:20:P2 3 FOLD CALL RAISE:34:34 9.350131563")
data.append("GETACTION 378 5 4d 9s Ah Jh Js 3 CALL:P1 DEAL:RIVER BET:6:P2 3 FOLD CALL RAISE:12:14 9.335313411")
data.append("HANDOVER -150 150 5 4d 9s Ah Jh Js 4 CALL:P1 SHOW:4s:9h:P1 SHOW:Ts:8d:P2 WIN:384:P1 9.323017606999999")
#data.append("HANDOVER -150 150 5 4d 9s Ah Jh Js 4 CALL:P1 SHOW:4s:9h:P1 SHOW:Ts:8d:P2 WIN:384:P1 9.323017606999999")
In [3]:
jp = Johnny("P1")
In [4]:
for i in range(0,8):
print(data[i])
jp.parse_data(data[i])
jp.update_temporal_feature_matrix()
#jp.temporal_feature_matrix[:,:17]
#inputs, action_strs = jp.brain.enumerate_next_action_vectors(jp)
#Qvals, possible_states = jp.brain.evaluate_Q_function(jp.temporal_feature_matrix.T, inputs)
action = jp.brain.make_decision(jp)
In [5]:
jp.brain.new_state
Out[5]:
In [9]:
jp.brain.learn_from_last_action(jp)
In [ ]:
T = jp.temporal_feature_matrix.T
T = np.vstack((T, inputs[0][0])).reshape((1,-1,T.shape[1]))
print(T.shape)
new_input = [T, inputs[0][1]]
#print(new_input)
yhat = jp.brain.Q.predict(new_input)
In [ ]:
In [ ]:
startTime = datetime.now()
for i in range(len(data)):
print(data[i])
jp.parse_data(data[i])
if jp.possible_actions:
print(jp.possible_actions)
jp.update_temporal_feature_matrix()
print(jp.temporal_feature_matrix)
print datetime.now() - startTime
In [ ]:
board = ['Th', 'Js', 'Qh']
hand = ['Kh']
nsim = 100
print(jp.evaluate_showdown_probabilities(hand, board, nsim))
In [ ]:
In [ ]:
card_evaluator.evaluate(board_sim,hand_sim)
In [ ]:
al = [item for sublist in jp.hand['action_history'] for item in sublist]
In [ ]:
al
In [ ]:
np.hstack((np.zeros((3,)), np.zeros((3,1))))
In [ ]:
card_evaluator.evaluate(board_sim,villain_hand)
In [ ]:
street = 1 if "FLOP" == "FLOP" else 0
print street
In [ ]:
yo = ['A','B', "C", 'D', 'E']
bo = cp.copy(yo)
bo.extend("F")
print(bo)
print(yo)
In [ ]:
print(bo[0:2])
In [ ]:
print(bo[2:2+3])
In [ ]:
print(bo[(2+3):])
In [ ]:
v = 0.0
v += False
v += True
print(v)
In [ ]:
jp.possible_actions
In [ ]:
for i in range(2, 201, 5):
print(i)
In [ ]:
b
In [4]:
print Johnny.__name__
In [ ]: