Game scores are useful for identifying which team is likely to win a given game against another team. Point scores are useful for saying which team is likely to win a given point against another team.
The problems with this approach were the same problems with my original effort based on simple wins and losses. I resolved those problems with help from PyMC3 developers, and now my original model works well. I am holding off on further development of this approach using point scores for now.
In [1]:
import pandas as pd
import os
import numpy as np
import pymc3 as pm
from pymc3.math import invlogit
from tqdm import tqdm
from theano import tensor as tt
%matplotlib inline
In [2]:
project_dir = '/Users/rbussman/Projects/BUDA/buda-ratings'
scores_dir = os.path.join(project_dir, 'data', 'raw', 'game_scores')
In [3]:
# summer club league 2016
league_id = 40264
game_scores = pd.read_csv(os.path.join(scores_dir, 'scores_{}.csv'.format(league_id)))
In [4]:
game_scores.head()
Out[4]:
In [5]:
game_scores['Team A'].unique()
Out[5]:
In [6]:
game_scores['divname'].unique()
Out[6]:
In [7]:
teams = game_scores['Team A'].unique()
In [8]:
div1_index = 0
div2_index = 0
div3_index = 0
open1_index = 0
open2_index = 0
team2index = {}
team2div = {}
for i, team in enumerate(teams):
row = game_scores['Team A'] == team
div_team = game_scores.loc[row, 'divname'].unique()[0]
team2div[team] = div_team
if div_team == '4/3 Div 1' or div_team == '5/2 Div 1':
team2index[team] = div1_index
div1_index += 1
if div_team == '4/3 Div 2' or div_team == '5/2 Div 2':
team2index[team] = div2_index
div2_index += 1
if div_team == '4/3 Div 3' or div_team == '5/2 Div 3':
team2index[team] = div3_index
div3_index += 1
if div_team == 'Open Div 1':
team2index[team] = open1_index
open1_index += 1
if div_team == 'Open Div 2':
team2index[team] = open2_index
open2_index += 1
In [9]:
game_points = []
for row in game_scores.index:
team_A = game_scores.loc[row, 'Team A']
team_B = game_scores.loc[row, 'Team B']
div_A = team2div[team_A]
div_B = team2div[team_B]
index_A = team2index[team_A]
index_B = team2index[team_B]
scores_A = game_scores.loc[row, 'Score A']
scores_B = game_scores.loc[row, 'Score B']
for i in range(scores_A):
game_point = {
'Team A': team_A,
'Team B': team_B,
'Team A Scores': True,
'Div A': div_A,
'Div B': div_B,
'Index A': index_A,
'Index B': index_B
}
game_points.append(game_point)
for i in range(scores_B):
game_point = {
'Team A': team_A,
'Team B': team_B,
'Team A Scores': False,
'Div A': div_A,
'Div B': div_B,
'Index A': index_A,
'Index B': index_B
}
game_points.append(game_point)
game_points = pd.DataFrame(game_points)
In [10]:
game_points.head()
Out[10]:
In [11]:
n_teams = len(teams)
In [12]:
n_teams
Out[12]:
In [13]:
pair_list = []
for row in tqdm(game_points.index):
team_A = game_points.loc[row, 'Team A']
team_B = game_points.loc[row, 'Team B']
new_pair = (team_A, team_B)
pair_list.append(new_pair)
reverse_pair = (team_B, team_A)
if reverse_pair in pair_list:
game_points = game_points.drop(row)
In [14]:
len(game_points)
Out[14]:
In [15]:
game_points = game_points.reset_index().drop('index', axis=1)
In [16]:
game_points.tail(15)
Out[16]:
In [17]:
d1_index = (game_points['Div A'] == '4/3 Div 1') & (game_points['Div B'] == '4/3 Div 1')
# true_index = (mixed_scores['Index B'] < 30) & (mixed_scores['ID A'] < 30)
d1_points = game_points[d1_index]
In [18]:
d1_points.shape
Out[18]:
In [19]:
nteams = len(set(np.append(d1_points['Team A'].unique(), d1_points['Team B'].unique())))
In [20]:
nteams
Out[20]:
In [21]:
d1_points.head(30)
Out[21]:
In [22]:
d1_points.shape
Out[22]:
In [36]:
skill_prior_div = {
'4/3 Div 1': 0.62,
'4/3 Div 2': 0.0,
'4/3 Div 3': -0.28,
'5/2 Div 1': 0.62,
'5/2 Div 2': -0.05,
'5/2 Div 3': -0.35,
'Open Div 1': 0.0,
'Open Div 2': -0.28
}
alphas = []
for team in teams:
div = team2div[team]
alpha = skill_prior_div[div]
alphas.append(alpha)
In [37]:
len(alphas)
Out[37]:
In [39]:
with pm.Model() as model:
skill = pm.Cauchy('skill', alpha=alphas, beta=0.5, shape=n_teams)
B_minus_A = skill[game_points['Index B'].values] - skill[game_points['Index A'].values]
lower = 1e-6
upper = 1 - 1e-6
probability_A_beats_B = lower + (upper - lower) * 1 / (1 + tt.exp(B_minus_A))
# probability_A_beats_B = pm.math.invlogit(A_minus_B)
# probability_A_beats_B = 1. / (1 + pm.math.exp(B_minus_A))
observation = pm.Bernoulli('observation', probability_A_beats_B, observed=game_points['Team A Scores'].values)
In [40]:
with model:
trace = pm.sample(1000)
In [27]:
pm.traceplot(trace)
Out[27]:
In [41]:
trace.varnames
Out[41]:
In [42]:
meanskills = trace.get_values('skill').mean(axis=0)
In [31]:
teamlist = np.unique(np.append(d1_points['Team A'].unique(), d1_points['Team B'].unique()))
In [44]:
meanskills
Out[44]:
In [45]:
alphas
Out[45]:
In [43]:
for i, meanskill in enumerate(meanskills):
if i == 0 or i == 13:
index = game_points['Index A'] == i
name = game_points.loc[index, 'Team A'].unique()[0]
else:
index = game_points['Index B'] == i
name = game_points.loc[index, 'Team B'].unique()[0]
print("{}: {:.3f}".format(name, meanskill))
In [52]:
i
Out[52]:
In [ ]: