In [1]:
import os
import pandas as pd
import numpy as np
import pymc3 as pm
from sklearn.metrics import log_loss, accuracy_score

import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

In [2]:
base_dir = os.path.join("/Users", "sbussmann", "Development", "buda", "buda-ratings")
interim_dir = os.path.join(base_dir, 'data', 'interim')

In [3]:
league_id = 40264
winloss = pd.read_csv(os.path.join(interim_dir, "winloss_divprior_{}.csv".format(league_id)))

In [4]:
winloss.head()


Out[4]:
Team A Team B Team A Wins Index A Index B Div A Div B
0 AHOC Gothrilla True 0 1 4/3 Div 1 4/3 Div 1
1 AHOC BBN True 0 9 4/3 Div 1 4/3 Div 1
2 AHOC Stonecutters True 0 41 4/3 Div 1 5/2 Div 1
3 AHOC FlowChart True 0 2 4/3 Div 1 4/3 Div 1
4 AHOC Lady and the BAMF True 0 28 4/3 Div 1 4/3 Div 2

In [5]:
winloss.shape


Out[5]:
(522, 7)

In [6]:
teams = set(np.append(winloss['Team A'].unique(), winloss['Team B'].unique()))

In [7]:
rating_prior_div = {
    '4/3 Div 1': 3.0,
    '4/3 Div 2': 0.0,
    '4/3 Div 3': -1.0,
    '5/2 Div 1': 3.0,
    '5/2 Div 2': 0.0,
    '5/2 Div 3': -1.0
}
alphas = []
for i in range(len(teams)):
    if i in winloss['Index A'].values:
        index = winloss['Index A'] == i
        div = winloss.loc[index, 'Div A'].unique()[0]
        alpha = rating_prior_div[div]
        alphas.append(alpha)
    else:
        index = winloss['Index B'] == i
        div = winloss.loc[index, 'Div B'].unique()[0]
        alpha = rating_prior_div[div]
        alphas.append(alpha)

In [9]:
teamA = winloss['Index A'].values
teamB = winloss['Index B'].values

with pm.Model() as model:
    
#     sharpness = pm.HalfStudentT('sharpness', sd=2.5, nu=3)
    ratings = pm.Normal('ratings', mu=alphas, shape=len(teams))
    
    deltaRating = ratings[teamA] - ratings[teamB]

    p = 1 / (1 + np.exp(-deltaRating))
    win = pm.Bernoulli('win', p, observed=winloss['Team A Wins'].values)

In [11]:
with model:
    trace = pm.sample(1000)


Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [ratings]
100%|██████████| 1500/1500 [00:02<00:00, 512.73it/s]

In [44]:
# meanratings = trace.get_values('ratings').mean(axis=0)
meanratings = np.percentile(trace.get_values('ratings'), 50, axis=0)

In [13]:
names = []
for i, meanrating in enumerate(meanratings):
    if i in winloss['Index A'].values:
        index = winloss['Index A'] == i
        name = winloss.loc[index, 'Team A'].unique()[0]
    else:
        index = winloss['Index B'] == i
        name = winloss.loc[index, 'Team B'].unique()[0]
    names.append(name)
#     print("{}: {:.3f}".format(name, meanrating))

In [14]:
ratingsdf = pd.DataFrame({
    'mean_rating': meanratings
}, index=names)

In [15]:
plusminus = pd.read_csv(os.path.join(interim_dir, 'plusminus_{}.csv'.format(league_id)), index_col='Team Name')

In [16]:
totaldf = plusminus.join(ratingsdf, how='inner')

In [18]:
totaldf.sort_values('mean_rating', ascending=False)


Out[18]:
PlusMinus divname Wins Losses Ties mean_rating
AHOC 100 4/3 Div 1 14 0 0 4.579735
SnakeCountryBromance 33 5/2 Div 1 5 0 0 4.035764
Zerg Rush! 22 4/3 Div 1 10 4 0 3.394396
Gothrilla 47 4/3 Div 1 12 4 1 3.305285
FlowChart 29 4/3 Div 1 12 4 0 3.156678
Stonecutters 24 5/2 Div 1 8 4 1 2.897754
GrassBurner 29 4/3 Div 1 11 8 2 2.380597
Injustice League 100 4/3 Div 2 14 1 0 2.267182
Swingers -6 4/3 Div 1 5 7 1 2.152308
TuneSquad -9 4/3 Div 1 6 7 1 1.999032
Puddingstone 98 4/3 Div 2 18 6 0 1.808960
Upstream 119 4/3 Div 2 20 6 0 1.754423
Turtle Boy -12 4/3 Div 1 5 10 0 1.721448
Pink Flamingos 58 5/2 Div 2 10 1 0 1.709737
Maverick 42 5/2 Div 2 9 2 1 1.697760
Jack's Abby HAOS Lager 46 4/3 Div 2 10 2 0 1.688323
JuJu Hex 72 5/2 Div 2 11 3 1 1.596737
BBN -47 4/3 Div 1 2 8 1 1.552703
Shake and Bake -34 5/2 Div 1 3 7 3 1.506293
Live Poultry, Fresh Killed (LPFK) -39 4/3 Div 1 6 13 0 1.292651
Baba Yaga 47 5/2 Div 2 9 3 0 1.155027
License to Kilt (fka Scoobers in Scotland) 46 4/3 Div 2 13 6 0 0.986257
Rubs The Duckie 30 4/3 Div 2 9 4 0 0.853583
Flaming Croissants 66 4/3 Div 2 15 5 0 0.782853
Hippos 26 5/2 Div 2 9 4 0 0.692536
Bacon 18 5/2 Div 2 7 4 0 0.649694
Hucky McThrowandcatch -1 5/2 Div 2 5 5 0 0.631008
JHOP 32 4/3 Div 2 12 8 1 0.582493
Toads 40 4/3 Div 2 15 8 0 0.414964
SHRedline -6 5/2 Div 2 6 8 0 0.359391
... ... ... ... ... ... ...
Fetch 9 4/3 Div 2 8 11 0 -0.060038
Flying Salsa 13 5/2 Div 3 12 8 0 -0.193452
Moose Lightning -65 4/3 Div 2 4 11 0 -0.223091
The Funky Bunch -29 5/2 Div 2 4 6 0 -0.246488
Oddjob 11 4/3 Div 3 13 6 0 -0.286594
A Lil Bit Sticky -43 4/3 Div 2 5 10 0 -0.294299
Granite State Grass Stains 22 5/2 Div 3 6 4 0 -0.441294
Nerd Alert -54 4/3 Div 2 7 12 0 -0.501667
Rabbit Disc 37 5/2 Div 3 12 8 0 -0.720814
Demons 12 5/2 Div 3 5 6 0 -0.904073
Lady and the BAMF -74 4/3 Div 2 5 15 0 -0.931480
Alpha No Beta -7 5/2 Div 3 10 9 0 -1.069364
Iron Squids -33 5/2 Div 2 1 10 0 -1.087758
Baboon Heart -35 4/3 Div 3 5 7 0 -1.123359
Stack to the Future -7 4/3 Div 3 9 12 0 -1.143376
Spam -36 5/2 Div 3 9 12 0 -1.245399
Batman and the Robins -3 4/3 Div 3 6 6 0 -1.326137
Tofu Wolf -15 5/2 Div 3 11 14 0 -1.336264
Hipster Kitties -98 4/3 Div 2 3 15 0 -1.338426
Underwater Monkey Cowboys -24 4/3 Div 3 6 11 1 -1.448993
Tubbs -89 4/3 Div 2 2 15 0 -1.455769
SIR! -53 5/2 Div 3 4 12 0 -1.642587
Top Shelf -35 4/3 Div 3 2 8 0 -1.657297
Jiggy -48 5/2 Div 3 5 14 1 -1.975657
M.O.B. -58 5/2 Div 3 2 10 0 -2.071873
TuneSquad Squared -130 4/3 Div 2 0 17 0 -2.155828
Killer Bees -35 5/2 Div 3 3 12 0 -2.263183
123 Trap! -81 4/3 Div 3 1 9 0 -2.335371
THEM! -35 5/2 Div 3 2 12 0 -2.468607
Spawning Alewives -84 5/2 Div 3 1 11 0 -2.704123

67 rows × 6 columns


In [19]:
totaldf.groupby('divname').mean()


Out[19]:
PlusMinus Wins Losses Ties mean_rating
divname
4/3 Div 1 11.400000 8.300000 6.500000 0.600000 2.553483
4/3 Div 2 2.727273 9.136364 8.818182 0.090909 0.221902
4/3 Div 3 -14.375000 6.625000 7.625000 0.125000 -1.135663
5/2 Div 1 7.666667 5.333333 3.666667 1.333333 2.813270
5/2 Div 2 17.818182 7.363636 4.909091 0.181818 0.664419
5/2 Div 3 -22.076923 6.307692 10.153846 0.076923 -1.464361

In [20]:
trace['ratings'].shape


Out[20]:
(4000, 67)

In [21]:
def get_index(team_name, df):
    team_index = df.loc[team_name, 'Index']
    
    return team_index

In [22]:
def calc_prob(indexA, indexB, trace):
    
#     sharpness = trace['sharpness']
    deltaRating = trace['ratings'][:, indexA] - trace['ratings'][:, indexB]
    probAWins = 1. / (1 + np.exp(-deltaRating))
    return probAWins.mean()

In [23]:
simprobs = calc_prob(0, 1, trace)

In [24]:
simprobs


Out[24]:
0.75424894710448576

In [25]:
winloss_with_dup = pd.read_csv(os.path.join(interim_dir, 'winloss_with_duplicates.csv'))

In [26]:
winloss_with_dup['predicted'] = winloss_with_dup.apply(lambda x: calc_prob(x['Index A'], x['Index B'], trace), axis=1)

In [27]:
winloss_with_dup.head()


Out[27]:
Team A Team B divname Team A Wins Index A Index B Div A Div B predicted
0 AHOC Gothrilla 4/3 Div 1 True 0 1 4/3 Div 1 4/3 Div 1 0.754249
1 AHOC BBN 4/3 Div 1 True 0 9 4/3 Div 1 4/3 Div 1 0.939300
2 AHOC Stonecutters 4/3 Div 1 True 0 41 4/3 Div 1 5/2 Div 1 0.815719
3 AHOC FlowChart 4/3 Div 1 True 0 2 4/3 Div 1 4/3 Div 1 0.779735
4 AHOC Lady and the BAMF 4/3 Div 1 True 0 28 4/3 Div 1 4/3 Div 2 0.994371

In [28]:
winloss_with_dup['predBin'] = winloss_with_dup['predicted'].apply(lambda x: np.round(x, 1))

In [29]:
winloss_with_dup.head()


Out[29]:
Team A Team B divname Team A Wins Index A Index B Div A Div B predicted predBin
0 AHOC Gothrilla 4/3 Div 1 True 0 1 4/3 Div 1 4/3 Div 1 0.754249 0.8
1 AHOC BBN 4/3 Div 1 True 0 9 4/3 Div 1 4/3 Div 1 0.939300 0.9
2 AHOC Stonecutters 4/3 Div 1 True 0 41 4/3 Div 1 5/2 Div 1 0.815719 0.8
3 AHOC FlowChart 4/3 Div 1 True 0 2 4/3 Div 1 4/3 Div 1 0.779735 0.8
4 AHOC Lady and the BAMF 4/3 Div 1 True 0 28 4/3 Div 1 4/3 Div 2 0.994371 1.0

In [30]:
binned = winloss_with_dup.groupby('predBin').mean()
binnedstd = winloss_with_dup.groupby('predBin').std()
binnedhi = binned + binnedstd
binnedlo = binned - binnedstd

In [31]:
sns.set_context('talk')
f, axes = plt.subplots(1, 2, figsize=(13, 6))
ax = axes[0]
ax.plot(winloss_with_dup['predicted'], winloss_with_dup['Team A Wins'], 'o', alpha=0.1)
ax.set_ylabel('Actual Result for Team A')
ax.set_xlabel('Predicted Team A Winning Percentage')

ax = axes[1]
ax.plot(binned['Team A Wins'], 'o-', color='salmon')
ax.fill_between(binned.index, binnedlo['Team A Wins'], binnedhi['Team A Wins'], alpha=0.3, color='salmon')
ax.set_ylim([0, 1])
ax.set_ylabel('Actual Team A Winning Percentage by Bin')
ax.set_xlabel('Predicted Team A Winning Percentage by Bin')
plt.tight_layout()



In [32]:
team_log_loss = winloss_with_dup.groupby('Team A').apply(
    lambda x: log_loss(x['Team A Wins'].astype('int'), x['predicted'], labels=[0, 1]))

In [33]:
team_accuracy = winloss_with_dup.groupby('Team A').apply(
    lambda x: accuracy_score(x['Team A Wins'].astype('int'), np.round(x['predicted'])))

In [34]:
sns.distplot(team_accuracy, kde=False, bins=15)


Out[34]:
<matplotlib.axes._subplots.AxesSubplot at 0x1c1a030950>

In [35]:
sns.distplot(team_log_loss, kde=False, bins=15)


Out[35]:
<matplotlib.axes._subplots.AxesSubplot at 0x1c1d634790>

In [37]:
log_loss(winloss_with_dup['Team A Wins'].astype('int'), winloss_with_dup['predicted'])


Out[37]:
0.42382729656670926

In [38]:
accuracy_score(winloss_with_dup['Team A Wins'].astype('int'), np.round(winloss_with_dup['predicted']))


Out[38]:
0.84482758620689657

In [39]:
def inspect(full_outcomes, team_name):
    index = full_outcomes['Team A'] == team_name
    outcomes_index = full_outcomes[index]
    return outcomes_index.drop(['divname', 'Index A', 'Index B', 'predBin'], axis=1)

In [40]:
ins = inspect(winloss_with_dup, 'Injustice League')
ins


Out[40]:
Team A Team B Team A Wins Div A Div B predicted
180 Injustice League Turtle Boy True 4/3 Div 2 4/3 Div 1 0.622337
181 Injustice League A Lil Bit Sticky True 4/3 Div 2 4/3 Div 2 0.912463
182 Injustice League Jack's Abby HAOS Lager True 4/3 Div 2 4/3 Div 2 0.623212
183 Injustice League Hipster Kitties True 4/3 Div 2 4/3 Div 2 0.965832
184 Injustice League Moose Lightning True 4/3 Div 2 4/3 Div 2 0.906266
185 Injustice League Tubbs True 4/3 Div 2 4/3 Div 2 0.969202
186 Injustice League Booooooosh! True 4/3 Div 2 4/3 Div 2 0.886604
187 Injustice League Live Poultry, Fresh Killed (LPFK) True 4/3 Div 2 4/3 Div 1 0.710943
188 Injustice League Zerg Rush! False 4/3 Div 2 4/3 Div 1 0.273595
189 Injustice League Dominion True 4/3 Div 2 4/3 Div 2 0.858528
190 Injustice League Fetch True 4/3 Div 2 4/3 Div 2 0.893406
191 Injustice League BBN True 4/3 Div 2 4/3 Div 1 0.657330
192 Injustice League Hipster Kitties True 4/3 Div 2 4/3 Div 2 0.965832
193 Injustice League Rogues' Gallery True 4/3 Div 2 4/3 Div 2 0.870767
194 Injustice League Puddingstone True 4/3 Div 2 4/3 Div 2 0.604934

In [41]:
ins = inspect(winloss_with_dup, 'Gothrilla')
ins


Out[41]:
Team A Team B Team A Wins Div A Div B predicted
14 Gothrilla AHOC False 4/3 Div 1 4/3 Div 1 0.245751
15 Gothrilla Swingers False 4/3 Div 1 4/3 Div 1 0.739885
16 Gothrilla FlowChart True 4/3 Div 1 4/3 Div 1 0.537681
17 Gothrilla Hucky McThrowandcatch True 4/3 Div 1 5/2 Div 2 0.918727
18 Gothrilla SnakeCountryBromance False 4/3 Div 1 5/2 Div 1 0.350740
19 Gothrilla Zerg Rush! False 4/3 Div 1 4/3 Div 1 0.483638
20 Gothrilla GrassBurner True 4/3 Div 1 4/3 Div 1 0.702533
21 Gothrilla Turtle Boy True 4/3 Div 1 4/3 Div 1 0.810338
22 Gothrilla Live Poultry, Fresh Killed (LPFK) True 4/3 Div 1 4/3 Div 1 0.866007
23 Gothrilla Upstream True 4/3 Div 1 4/3 Div 2 0.807113
24 Gothrilla Live Poultry, Fresh Killed (LPFK) True 4/3 Div 1 4/3 Div 1 0.866007
25 Gothrilla TuneSquad True 4/3 Div 1 4/3 Div 1 0.765051
26 Gothrilla BBN True 4/3 Div 1 4/3 Div 1 0.832201
27 Gothrilla Swingers True 4/3 Div 1 4/3 Div 1 0.739885
28 Gothrilla Zerg Rush! True 4/3 Div 1 4/3 Div 1 0.483638
29 Gothrilla GrassBurner False 4/3 Div 1 4/3 Div 1 0.702533
30 Gothrilla Shake and Bake True 4/3 Div 1 5/2 Div 1 0.837132

In [42]:
ins = inspect(winloss_with_dup, 'SnakeCountryBromance')
ins


Out[42]:
Team A Team B Team A Wins Div A Div B predicted
666 SnakeCountryBromance Shake and Bake True 5/2 Div 1 5/2 Div 1 0.901068
667 SnakeCountryBromance Zerg Rush! True 5/2 Div 1 4/3 Div 1 0.634575
668 SnakeCountryBromance GrassBurner True 5/2 Div 1 4/3 Div 1 0.808528
669 SnakeCountryBromance Gothrilla True 5/2 Div 1 4/3 Div 1 0.649260
670 SnakeCountryBromance SHRedline True 5/2 Div 1 5/2 Div 2 0.964925

In [43]:
ins = inspect(winloss_with_dup, 'Store Bought Dirt')
ins


Out[43]:
Team A Team B Team A Wins Div A Div B predicted
344 Store Bought Dirt Upstream False 4/3 Div 2 4/3 Div 2 0.177924
345 Store Bought Dirt M.O.B. True 4/3 Div 2 5/2 Div 3 0.878151
346 Store Bought Dirt Disc Envy False 4/3 Div 2 4/3 Div 2 0.483084
347 Store Bought Dirt Underwater Monkey Cowboys True 4/3 Div 2 4/3 Div 3 0.807090
348 Store Bought Dirt Toads True 4/3 Div 2 4/3 Div 2 0.424291
349 Store Bought Dirt JHOP False 4/3 Div 2 4/3 Div 2 0.389224
350 Store Bought Dirt Nerd Alert True 4/3 Div 2 4/3 Div 2 0.634531
351 Store Bought Dirt Dominion False 4/3 Div 2 4/3 Div 2 0.462300
352 Store Bought Dirt Fetch True 4/3 Div 2 4/3 Div 2 0.535141
353 Store Bought Dirt Flaming Croissants True 4/3 Div 2 4/3 Div 2 0.348007
354 Store Bought Dirt License to Kilt (fka Scoobers in Scotland) False 4/3 Div 2 4/3 Div 2 0.305461
355 Store Bought Dirt Rubs The Duckie False 4/3 Div 2 4/3 Div 2 0.336771
356 Store Bought Dirt Jack's Abby HAOS Lager False 4/3 Div 2 4/3 Div 2 0.189217
357 Store Bought Dirt Top Shelf True 4/3 Div 2 4/3 Div 3 0.830439
358 Store Bought Dirt Tubbs True 4/3 Div 2 4/3 Div 2 0.805852
359 Store Bought Dirt Hipster Kitties True 4/3 Div 2 4/3 Div 2 0.789625
360 Store Bought Dirt Toads True 4/3 Div 2 4/3 Div 2 0.424291
361 Store Bought Dirt 123 Trap! True 4/3 Div 2 4/3 Div 3 0.901872
362 Store Bought Dirt Puddingstone False 4/3 Div 2 4/3 Div 2 0.169493
363 Store Bought Dirt TuneSquad Squared True 4/3 Div 2 4/3 Div 2 0.888095
364 Store Bought Dirt Stack to the Future False 4/3 Div 2 4/3 Div 3 0.758086
365 Store Bought Dirt A Lil Bit Sticky False 4/3 Div 2 4/3 Div 2 0.586980
366 Store Bought Dirt Flying Salsa True 4/3 Div 2 5/2 Div 3 0.563853
367 Store Bought Dirt Upstream False 4/3 Div 2 4/3 Div 2 0.177924

In [ ]: