Summary

Game scores are useful for identifying which team is likely to win a given game against another team. Point scores are useful for saying which team is likely to win a given point against another team.

The problems with this approach were the same problems with my original effort based on simple wins and losses. I resolved those problems with help from PyMC3 developers, and now my original model works well. I am holding off on further development of this approach using point scores for now.



In [1]:

    
import pandas as pd
import os
import numpy as np
import pymc3 as pm
from pymc3.math import invlogit
from tqdm import tqdm
from theano import tensor as tt

%matplotlib inline



In [2]:

    
project_dir = '/Users/rbussman/Projects/BUDA/buda-ratings'
scores_dir = os.path.join(project_dir, 'data', 'raw', 'game_scores')



In [3]:

    
# summer club league 2016
league_id = 40264
game_scores = pd.read_csv(os.path.join(scores_dir, 'scores_{}.csv'.format(league_id)))



In [4]:

    
game_scores.head()









    Out[4]:







  
    
      
      Team A
      Team B
      Tourney Qualifying games*
      divname
      Score A
      Score B
    
  
  
    
      0
      AHOC
      Gothrilla
      -99
      4/3 Div 1
      15
      12
    
    
      1
      AHOC
      BBN
      -99
      4/3 Div 1
      15
      1
    
    
      2
      AHOC
      Stonecutters
      -99
      4/3 Div 1
      15
      8
    
    
      3
      AHOC
      FlowChart
      -99
      4/3 Div 1
      15
      10
    
    
      4
      AHOC
      Lady and the BAMF
      -99
      4/3 Div 1
      15
      5



In [5]:

    
game_scores['Team A'].unique()









    Out[5]:





array(['AHOC', 'Gothrilla', 'FlowChart', 'GrassBurner', 'Zerg Rush!',
       'Swingers', 'TuneSquad', 'Turtle Boy',
       'Live Poultry, Fresh Killed (LPFK)', 'BBN', 'Upstream',
       'Injustice League', 'Puddingstone', 'Flaming Croissants',
       "Jack's Abby HAOS Lager",
       'License to Kilt (fka Scoobers in Scotland)', 'Toads', 'JHOP',
       'Rubs The Duckie', 'Disc Envy', 'Store Bought Dirt', 'Fetch',
       'Booooooosh!', 'Dominion', "Rogues' Gallery", 'A Lil Bit Sticky',
       'Nerd Alert', 'Moose Lightning', 'Lady and the BAMF', 'Tubbs',
       'Hipster Kitties', 'TuneSquad Squared', 'Too Drunk to Fail',
       'Oddjob', 'Batman and the Robins', 'Stack to the Future',
       'Underwater Monkey Cowboys', 'Baboon Heart', 'Top Shelf',
       '123 Trap!', 'SnakeCountryBromance', 'Stonecutters',
       'Shake and Bake', 'JuJu Hex', 'Pink Flamingos', 'Baba Yaga',
       'Maverick', 'Hippos', 'Bacon', 'Get Flat Ultimate',
       'Hucky McThrowandcatch', 'SHRedline', 'The Funky Bunch',
       'Iron Squids', 'Rabbit Disc', 'Granite State Grass Stains',
       'Flying Salsa', 'Demons', 'Alpha No Beta', 'Tofu Wolf', 'Spam',
       'Killer Bees', 'THEM!', 'Jiggy', 'SIR!', 'M.O.B.',
       'Spawning Alewives', 'Crossroads', '215 Needham Street', 'Stall 9',
       'Fat Bass Ultimate', 'Squid Squad', 'Hammered', 'Desert Horizon',
       'DISCtinguished Gentlemen', 'Savage Strike',
       "I'm not gonna not play Flong", 'Sky Zone'], dtype=object)



In [6]:

    
game_scores['divname'].unique()









    Out[6]:





array(['4/3 Div 1', '4/3 Div 2', '4/3 Div 3', '5/2 Div 1', '5/2 Div 2',
       '5/2 Div 3', 'Open Div 1', 'Open Div 2'], dtype=object)



In [7]:

    
teams = game_scores['Team A'].unique()



In [8]:

    
div1_index = 0
div2_index = 0
div3_index = 0
open1_index = 0
open2_index = 0
team2index = {}
team2div = {}
for i, team in enumerate(teams):
    row = game_scores['Team A'] == team
    div_team = game_scores.loc[row, 'divname'].unique()[0]
    team2div[team] = div_team
    
    if div_team == '4/3 Div 1' or div_team == '5/2 Div 1':
        team2index[team] = div1_index
        div1_index += 1
        
    if div_team == '4/3 Div 2' or div_team == '5/2 Div 2':
        team2index[team] = div2_index
        div2_index += 1   
        
    if div_team == '4/3 Div 3' or div_team == '5/2 Div 3':
        team2index[team] = div3_index
        div3_index += 1
        
    if div_team == 'Open Div 1':
        team2index[team] = open1_index
        open1_index += 1
        
    if div_team == 'Open Div 2':
        team2index[team] = open2_index
        open2_index += 1



In [9]:

    
game_points = []
for row in game_scores.index:
    
    team_A = game_scores.loc[row, 'Team A']
    team_B = game_scores.loc[row, 'Team B']
    div_A = team2div[team_A]
    div_B = team2div[team_B]
    index_A = team2index[team_A]
    index_B = team2index[team_B]
    
    scores_A = game_scores.loc[row, 'Score A']
    scores_B = game_scores.loc[row, 'Score B']
    for i in range(scores_A):
        game_point = {
            'Team A': team_A,
            'Team B': team_B,
            'Team A Scores': True,
            'Div A': div_A,
            'Div B': div_B,
            'Index A': index_A,
            'Index B': index_B
        }
        game_points.append(game_point)
        
    for i in range(scores_B):
        game_point = {
            'Team A': team_A,
            'Team B': team_B,
            'Team A Scores': False,
            'Div A': div_A,
            'Div B': div_B,
            'Index A': index_A,
            'Index B': index_B
        }
        game_points.append(game_point)
        
game_points = pd.DataFrame(game_points)



In [10]:

    
game_points.head()









    Out[10]:







  
    
      
      Div A
      Div B
      Index A
      Index B
      Team A
      Team A Scores
      Team B
    
  
  
    
      0
      4/3 Div 1
      4/3 Div 1
      0
      1
      AHOC
      True
      Gothrilla
    
    
      1
      4/3 Div 1
      4/3 Div 1
      0
      1
      AHOC
      True
      Gothrilla
    
    
      2
      4/3 Div 1
      4/3 Div 1
      0
      1
      AHOC
      True
      Gothrilla
    
    
      3
      4/3 Div 1
      4/3 Div 1
      0
      1
      AHOC
      True
      Gothrilla
    
    
      4
      4/3 Div 1
      4/3 Div 1
      0
      1
      AHOC
      True
      Gothrilla



In [11]:

    
n_teams = len(teams)



In [12]:

    
n_teams









    Out[12]:





78



In [13]:

    
pair_list = []
for row in tqdm(game_points.index):
    team_A = game_points.loc[row, 'Team A']
    team_B = game_points.loc[row, 'Team B']
    new_pair = (team_A, team_B)
    pair_list.append(new_pair)
    reverse_pair = (team_B, team_A)
    if reverse_pair in pair_list:
        game_points = game_points.drop(row)









    



100%|██████████| 27362/27362 [01:02<00:00, 435.54it/s]



In [14]:

    
len(game_points)









    Out[14]:





13681



In [15]:

    
game_points = game_points.reset_index().drop('index', axis=1)



In [16]:

    
game_points.tail(15)









    Out[16]:







  
    
      
      Div A
      Div B
      Index A
      Index B
      Team A
      Team A Scores
      Team B
    
  
  
    
      13666
      Open Div 2
      Open Div 2
      0
      1
      I'm not gonna not play Flong
      True
      Sky Zone
    
    
      13667
      Open Div 2
      Open Div 2
      0
      1
      I'm not gonna not play Flong
      True
      Sky Zone
    
    
      13668
      Open Div 2
      Open Div 2
      0
      1
      I'm not gonna not play Flong
      True
      Sky Zone
    
    
      13669
      Open Div 2
      Open Div 2
      0
      1
      I'm not gonna not play Flong
      True
      Sky Zone
    
    
      13670
      Open Div 2
      Open Div 2
      0
      1
      I'm not gonna not play Flong
      True
      Sky Zone
    
    
      13671
      Open Div 2
      Open Div 2
      0
      1
      I'm not gonna not play Flong
      True
      Sky Zone
    
    
      13672
      Open Div 2
      Open Div 2
      0
      1
      I'm not gonna not play Flong
      True
      Sky Zone
    
    
      13673
      Open Div 2
      Open Div 2
      0
      1
      I'm not gonna not play Flong
      True
      Sky Zone
    
    
      13674
      Open Div 2
      Open Div 2
      0
      1
      I'm not gonna not play Flong
      True
      Sky Zone
    
    
      13675
      Open Div 2
      Open Div 2
      0
      1
      I'm not gonna not play Flong
      True
      Sky Zone
    
    
      13676
      Open Div 2
      Open Div 2
      0
      1
      I'm not gonna not play Flong
      False
      Sky Zone
    
    
      13677
      Open Div 2
      Open Div 2
      0
      1
      I'm not gonna not play Flong
      False
      Sky Zone
    
    
      13678
      Open Div 2
      Open Div 2
      0
      1
      I'm not gonna not play Flong
      False
      Sky Zone
    
    
      13679
      Open Div 2
      Open Div 2
      0
      1
      I'm not gonna not play Flong
      False
      Sky Zone
    
    
      13680
      Open Div 2
      Open Div 2
      0
      1
      I'm not gonna not play Flong
      False
      Sky Zone



In [17]:

    
d1_index = (game_points['Div A'] == '4/3 Div 1') & (game_points['Div B'] == '4/3 Div 1')
# true_index = (mixed_scores['Index B'] < 30) & (mixed_scores['ID A'] < 30)
d1_points = game_points[d1_index]



In [18]:

    
d1_points.shape









    Out[18]:





(1162, 7)



In [19]:

    
nteams = len(set(np.append(d1_points['Team A'].unique(), d1_points['Team B'].unique())))



In [20]:

    
nteams









    Out[20]:





10



In [21]:

    
d1_points.head(30)









    Out[21]:







  
    
      
      Div A
      Div B
      Index A
      Index B
      Team A
      Team A Scores
      Team B
    
  
  
    
      0
      4/3 Div 1
      4/3 Div 1
      0
      1
      AHOC
      True
      Gothrilla
    
    
      1
      4/3 Div 1
      4/3 Div 1
      0
      1
      AHOC
      True
      Gothrilla
    
    
      2
      4/3 Div 1
      4/3 Div 1
      0
      1
      AHOC
      True
      Gothrilla
    
    
      3
      4/3 Div 1
      4/3 Div 1
      0
      1
      AHOC
      True
      Gothrilla
    
    
      4
      4/3 Div 1
      4/3 Div 1
      0
      1
      AHOC
      True
      Gothrilla
    
    
      5
      4/3 Div 1
      4/3 Div 1
      0
      1
      AHOC
      True
      Gothrilla
    
    
      6
      4/3 Div 1
      4/3 Div 1
      0
      1
      AHOC
      True
      Gothrilla
    
    
      7
      4/3 Div 1
      4/3 Div 1
      0
      1
      AHOC
      True
      Gothrilla
    
    
      8
      4/3 Div 1
      4/3 Div 1
      0
      1
      AHOC
      True
      Gothrilla
    
    
      9
      4/3 Div 1
      4/3 Div 1
      0
      1
      AHOC
      True
      Gothrilla
    
    
      10
      4/3 Div 1
      4/3 Div 1
      0
      1
      AHOC
      True
      Gothrilla
    
    
      11
      4/3 Div 1
      4/3 Div 1
      0
      1
      AHOC
      True
      Gothrilla
    
    
      12
      4/3 Div 1
      4/3 Div 1
      0
      1
      AHOC
      True
      Gothrilla
    
    
      13
      4/3 Div 1
      4/3 Div 1
      0
      1
      AHOC
      True
      Gothrilla
    
    
      14
      4/3 Div 1
      4/3 Div 1
      0
      1
      AHOC
      True
      Gothrilla
    
    
      15
      4/3 Div 1
      4/3 Div 1
      0
      1
      AHOC
      False
      Gothrilla
    
    
      16
      4/3 Div 1
      4/3 Div 1
      0
      1
      AHOC
      False
      Gothrilla
    
    
      17
      4/3 Div 1
      4/3 Div 1
      0
      1
      AHOC
      False
      Gothrilla
    
    
      18
      4/3 Div 1
      4/3 Div 1
      0
      1
      AHOC
      False
      Gothrilla
    
    
      19
      4/3 Div 1
      4/3 Div 1
      0
      1
      AHOC
      False
      Gothrilla
    
    
      20
      4/3 Div 1
      4/3 Div 1
      0
      1
      AHOC
      False
      Gothrilla
    
    
      21
      4/3 Div 1
      4/3 Div 1
      0
      1
      AHOC
      False
      Gothrilla
    
    
      22
      4/3 Div 1
      4/3 Div 1
      0
      1
      AHOC
      False
      Gothrilla
    
    
      23
      4/3 Div 1
      4/3 Div 1
      0
      1
      AHOC
      False
      Gothrilla
    
    
      24
      4/3 Div 1
      4/3 Div 1
      0
      1
      AHOC
      False
      Gothrilla
    
    
      25
      4/3 Div 1
      4/3 Div 1
      0
      1
      AHOC
      False
      Gothrilla
    
    
      26
      4/3 Div 1
      4/3 Div 1
      0
      1
      AHOC
      False
      Gothrilla
    
    
      27
      4/3 Div 1
      4/3 Div 1
      0
      9
      AHOC
      True
      BBN
    
    
      28
      4/3 Div 1
      4/3 Div 1
      0
      9
      AHOC
      True
      BBN
    
    
      29
      4/3 Div 1
      4/3 Div 1
      0
      9
      AHOC
      True
      BBN



In [22]:

    
d1_points.shape









    Out[22]:





(1162, 7)



In [36]:

    
skill_prior_div = {
    '4/3 Div 1': 0.62,
    '4/3 Div 2': 0.0,
    '4/3 Div 3': -0.28,
    '5/2 Div 1': 0.62,
    '5/2 Div 2': -0.05,
    '5/2 Div 3': -0.35,
    'Open Div 1': 0.0,
    'Open Div 2': -0.28
}
alphas = []
for team in teams:
    div = team2div[team]
    alpha = skill_prior_div[div]
    alphas.append(alpha)



In [37]:

    
len(alphas)









    Out[37]:





78



In [39]:

    
with pm.Model() as model:
    skill = pm.Cauchy('skill', alpha=alphas, beta=0.5, shape=n_teams)

    B_minus_A = skill[game_points['Index B'].values] - skill[game_points['Index A'].values]

    lower = 1e-6
    upper = 1 - 1e-6
    probability_A_beats_B = lower + (upper - lower) * 1 / (1 + tt.exp(B_minus_A))
    
#     probability_A_beats_B = pm.math.invlogit(A_minus_B)
#     probability_A_beats_B = 1. / (1 + pm.math.exp(B_minus_A))
    
    observation = pm.Bernoulli('observation', probability_A_beats_B, observed=game_points['Team A Scores'].values)



In [40]:

    
with model:
    trace = pm.sample(1000)









    



Auto-assigning NUTS sampler...
Initializing NUTS using ADVI...
Average Loss = 9,378.9:   7%|▋         | 14643/200000 [00:26<05:15, 587.42it/s]
Convergence archived at 14700
Interrupted at 14,700 [7%]: Average Loss = 9,669.2
100%|██████████| 1500/1500 [09:20<00:00,  6.54it/s]



In [27]:

    
pm.traceplot(trace)









    Out[27]:





array([[<matplotlib.axes._subplots.AxesSubplot object at 0x113db0a50>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x113760a50>]], dtype=object)



In [41]:

    
trace.varnames









    Out[41]:





['skill']



In [42]:

    
meanskills = trace.get_values('skill').mean(axis=0)



In [31]:

    
teamlist = np.unique(np.append(d1_points['Team A'].unique(), d1_points['Team B'].unique()))



In [44]:

    
meanskills









    Out[44]:





array([ 0.65301083,  0.44547386,  0.42931924,  0.33030892,  0.37333916,
        0.24718034,  0.16499764,  0.12679952,  0.22163357,  0.24839653,
        0.27370629,  0.32321066,  0.08341891,  0.09051705,  0.05448306,
       -0.07004865, -0.13926281, -0.08893653, -0.07052182, -0.29699215,
       -0.40601642, -0.46361128,  0.6869104 ,  0.67144911,  0.56612181,
        0.58125229,  0.358072  ,  0.36400688,  0.21634662,  0.27169866,
        0.22357252,  0.06636634,  0.03481271, -0.34979988, -0.46372342,
       -0.10313796,  0.01004278,  0.03618744,  0.1109608 , -0.24434164,
        0.57749495,  0.8656889 ,  1.37702863,  2.16685353,  0.09309964,
        0.04399854, -0.43583996,  0.01278415,  0.15011313,  0.10308817,
       -0.07928212, -1.11573001,  0.10926464, -0.09345529, -0.70139447,
       -0.06994451, -0.30991279, -0.72070356, -0.26859341, -0.54762425,
       -0.28048939, -0.28969197, -0.27292897, -0.58404238, -6.59882065,
        0.10637341, -0.84579763,  0.64191211, -0.30014218,  0.07160027,
       -8.57884636, -0.2079323 ,  0.08633631,  0.52967533, -0.02584353,
        1.80206409,  0.12429495, -2.50220117])



In [45]:

    
alphas









    Out[45]:





[0.62,
 0.62,
 0.62,
 0.62,
 0.62,
 0.62,
 0.62,
 0.62,
 0.62,
 0.62,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 -0.28,
 -0.28,
 -0.28,
 -0.28,
 -0.28,
 -0.28,
 -0.28,
 -0.28,
 0.62,
 0.62,
 0.62,
 -0.05,
 -0.05,
 -0.05,
 -0.05,
 -0.05,
 -0.05,
 -0.05,
 -0.05,
 -0.05,
 -0.05,
 -0.05,
 -0.35,
 -0.35,
 -0.35,
 -0.35,
 -0.35,
 -0.35,
 -0.35,
 -0.35,
 -0.35,
 -0.35,
 -0.35,
 -0.35,
 -0.35,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 -0.28,
 -0.28]



In [43]:

    
for i, meanskill in enumerate(meanskills):
    if i == 0 or i == 13:
        index = game_points['Index A'] == i
        name = game_points.loc[index, 'Team A'].unique()[0]
    else:
        index = game_points['Index B'] == i
        name = game_points.loc[index, 'Team B'].unique()[0]
    print("{}: {:.3f}".format(name, meanskill))









    



AHOC: 0.653
Gothrilla: 0.445
FlowChart: 0.429
GrassBurner: 0.330
Zerg Rush!: 0.373
Swingers: 0.247
TuneSquad: 0.165
Turtle Boy: 0.127
Live Poultry, Fresh Killed (LPFK): 0.222
BBN: 0.248
SnakeCountryBromance: 0.274
Stonecutters: 0.323
Shake and Bake: 0.083
Dominion: 0.091
Rogues' Gallery: 0.054
A Lil Bit Sticky: -0.070
Nerd Alert: -0.139
Moose Lightning: -0.089
Lady and the BAMF: -0.071
Tubbs: -0.297
Hipster Kitties: -0.406
TuneSquad Squared: -0.464
JuJu Hex: 0.687
Pink Flamingos: 0.671
Baba Yaga: 0.566
Maverick: 0.581
Hippos: 0.358
Bacon: 0.364
Get Flat Ultimate: 0.216
Hucky McThrowandcatch: 0.272
SHRedline: 0.224
The Funky Bunch: 0.066
Iron Squids: 0.035






    



---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-43-90cb87ec1470> in <module>()
      5     else:
      6         index = game_points['Index B'] == i
----> 7         name = game_points.loc[index, 'Team B'].unique()[0]
      8     print("{}: {:.3f}".format(name, meanskill))

IndexError: index 0 is out of bounds for axis 0 with size 0



In [52]:

    
i









    Out[52]:





13



In [ ]:

	Team A	Team B	Tourney Qualifying games*	divname	Score A	Score B
0	AHOC	Gothrilla	-99	4/3 Div 1	15	12
1	AHOC	BBN	-99	4/3 Div 1	15	1
2	AHOC	Stonecutters	-99	4/3 Div 1	15	8
3	AHOC	FlowChart	-99	4/3 Div 1	15	10
4	AHOC	Lady and the BAMF	-99	4/3 Div 1	15	5

	Div A	Div B	Index B	Team A	Team A Scores	Team B
13666	Open Div 2	Open Div 2	1	I'm not gonna not play Flong	True	Sky Zone
13667	Open Div 2	Open Div 2	1	I'm not gonna not play Flong	True	Sky Zone
13668	Open Div 2	Open Div 2	1	I'm not gonna not play Flong	True	Sky Zone
13669	Open Div 2	Open Div 2	1	I'm not gonna not play Flong	True	Sky Zone
13670	Open Div 2	Open Div 2	1	I'm not gonna not play Flong	True	Sky Zone
13671	Open Div 2	Open Div 2	1	I'm not gonna not play Flong	True	Sky Zone
13672	Open Div 2	Open Div 2	1	I'm not gonna not play Flong	True	Sky Zone
13673	Open Div 2	Open Div 2	1	I'm not gonna not play Flong	True	Sky Zone
13674	Open Div 2	Open Div 2	1	I'm not gonna not play Flong	True	Sky Zone
13675	Open Div 2	Open Div 2	1	I'm not gonna not play Flong	True	Sky Zone
13676	Open Div 2	Open Div 2	1	I'm not gonna not play Flong	False	Sky Zone
13677	Open Div 2	Open Div 2	1	I'm not gonna not play Flong	False	Sky Zone
13678	Open Div 2	Open Div 2	1	I'm not gonna not play Flong	False	Sky Zone
13679	Open Div 2	Open Div 2	1	I'm not gonna not play Flong	False	Sky Zone
13680	Open Div 2	Open Div 2	1	I'm not gonna not play Flong	False	Sky Zone