In [1]:
import os
import pandas as pd
import numpy as np
import pymc3 as pm

In [2]:
base_dir = os.path.join("/Users", "sbussmann", "Development", "buda", "buda-ratings")
interim_dir = os.path.join(base_dir, 'data', 'interim')

In [3]:
league_id = 40264
winloss = pd.read_csv(os.path.join(interim_dir, "winloss_simpleprior_{}.csv".format(league_id)))

In [4]:
winloss.head(20)


Out[4]:
Team A Team B Team A Wins Index A Index B
0 AHOC Gothrilla True 0 1
1 AHOC BBN True 0 9
2 AHOC Stonecutters True 0 41
3 AHOC FlowChart True 0 2
4 AHOC Lady and the BAMF True 0 28
5 AHOC Swingers True 0 5
6 AHOC Upstream True 0 10
7 AHOC Tubbs True 0 29
8 AHOC Stonecutters True 0 41
9 AHOC Zerg Rush! True 0 4
10 AHOC Turtle Boy True 0 7
11 AHOC Live Poultry, Fresh Killed (LPFK) True 0 8
12 AHOC TuneSquad True 0 6
13 AHOC JuJu Hex True 0 43
14 Gothrilla Swingers False 1 5
15 Gothrilla FlowChart True 1 2
16 Gothrilla Hucky McThrowandcatch True 1 50
17 Gothrilla SnakeCountryBromance False 1 40
18 Gothrilla Zerg Rush! False 1 4
19 Gothrilla GrassBurner True 1 3

In [5]:
winloss.shape


Out[5]:
(522, 5)

In [6]:
teams = set(np.append(winloss['Team A'].unique(), winloss['Team B'].unique()))

In [17]:
team1 = winloss['Index A'].values
team2 = winloss['Index B'].values

with pm.Model() as model:
    
    ratings = pm.Normal('ratings', mu=0, shape=len(teams))
    
    deltaRating = ratings[team1] - ratings[team2]

    p = 1 / (1 + np.exp(-deltaRating))
    win = pm.Bernoulli('win', p, observed=winloss['Team A Wins'].values)

In [18]:
with model:
    trace = pm.sample(1000)


Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [ratings]
100%|██████████| 1500/1500 [00:02<00:00, 528.58it/s]

In [19]:
meanratings = trace.get_values('ratings').mean(axis=0)

In [11]:
names = []
for i, meanrating in enumerate(meanratings):
    if i in winloss['Index A'].values:
        index = winloss['Index A'] == i
        name = winloss.loc[index, 'Team A'].unique()[0]
    else:
        index = winloss['Index B'] == i
        name = winloss.loc[index, 'Team B'].unique()[0]
    names.append(name)
#     print("{}: {:.3f}".format(name, meanrating))

In [12]:
ratingsdf = pd.DataFrame({
    'mean_rating': meanratings
}, index=names)

In [14]:
plusminus = pd.read_csv(os.path.join(interim_dir, 'plusminus_{}.csv'.format(league_id)), index_col='Team Name')

In [15]:
totaldf = plusminus.join(ratingsdf, how='inner')

In [16]:
totaldf.sort_values('mean_rating', ascending=False)


Out[16]:
PlusMinus divname Wins Losses Ties mean_rating
AHOC 100 4/3 Div 1 14 0 0 2.390247
Injustice League 100 4/3 Div 2 14 1 0 1.743978
Pink Flamingos 58 5/2 Div 2 10 1 0 1.605038
SnakeCountryBromance 33 5/2 Div 1 5 0 0 1.582016
Jack's Abby HAOS Lager 46 4/3 Div 2 10 2 0 1.543060
Maverick 42 5/2 Div 2 9 2 1 1.423889
JuJu Hex 72 5/2 Div 2 11 3 1 1.396716
Zerg Rush! 22 4/3 Div 1 10 4 0 1.338057
Upstream 119 4/3 Div 2 20 6 0 1.329346
Puddingstone 98 4/3 Div 2 18 6 0 1.246406
FlowChart 29 4/3 Div 1 12 4 0 1.246399
Gothrilla 47 4/3 Div 1 12 4 1 1.220385
Baba Yaga 47 5/2 Div 2 9 3 0 1.157522
Stonecutters 24 5/2 Div 1 8 4 1 1.112005
Too Drunk to Fail 59 4/3 Div 3 11 2 0 0.821511
Flaming Croissants 66 4/3 Div 2 15 5 0 0.816797
License to Kilt (fka Scoobers in Scotland) 46 4/3 Div 2 13 6 0 0.807341
Hippos 26 5/2 Div 2 9 4 0 0.780549
Bacon 18 5/2 Div 2 7 4 0 0.643980
Rubs The Duckie 30 4/3 Div 2 9 4 0 0.636779
GrassBurner 29 4/3 Div 1 11 8 2 0.633880
Toads 40 4/3 Div 2 15 8 0 0.416524
JHOP 32 4/3 Div 2 12 8 1 0.356692
Hucky McThrowandcatch -1 5/2 Div 2 5 5 0 0.285291
Flying Salsa 13 5/2 Div 3 12 8 0 0.275635
Oddjob 11 4/3 Div 3 13 6 0 0.228262
Get Flat Ultimate 2 5/2 Div 2 10 8 0 0.223953
Granite State Grass Stains 22 5/2 Div 3 6 4 0 0.184862
Store Bought Dirt 8 4/3 Div 2 13 11 0 0.092143
Disc Envy 31 4/3 Div 2 9 8 0 0.077977
... ... ... ... ... ... ...
Rabbit Disc 37 5/2 Div 3 12 8 0 -0.152927
Fetch 9 4/3 Div 2 8 11 0 -0.200392
Shake and Bake -34 5/2 Div 1 3 7 3 -0.207253
Demons 12 5/2 Div 3 5 6 0 -0.297527
The Funky Bunch -29 5/2 Div 2 4 6 0 -0.317369
Live Poultry, Fresh Killed (LPFK) -39 4/3 Div 1 6 13 0 -0.317664
A Lil Bit Sticky -43 4/3 Div 2 5 10 0 -0.373500
Alpha No Beta -7 5/2 Div 3 10 9 0 -0.419407
Moose Lightning -65 4/3 Div 2 4 11 0 -0.488076
Nerd Alert -54 4/3 Div 2 7 12 0 -0.514968
Baboon Heart -35 4/3 Div 3 5 7 0 -0.582783
Batman and the Robins -3 4/3 Div 3 6 6 0 -0.638839
BBN -47 4/3 Div 1 2 8 1 -0.652875
Spam -36 5/2 Div 3 9 12 0 -0.689189
Stack to the Future -7 4/3 Div 3 9 12 0 -0.698244
Tofu Wolf -15 5/2 Div 3 11 14 0 -0.760735
Underwater Monkey Cowboys -24 4/3 Div 3 6 11 1 -0.923311
Lady and the BAMF -74 4/3 Div 2 5 15 0 -0.973020
SIR! -53 5/2 Div 3 4 12 0 -1.048813
Top Shelf -35 4/3 Div 3 2 8 0 -1.150598
Iron Squids -33 5/2 Div 2 1 10 0 -1.172247
Hipster Kitties -98 4/3 Div 2 3 15 0 -1.282922
Jiggy -48 5/2 Div 3 5 14 1 -1.344969
M.O.B. -58 5/2 Div 3 2 10 0 -1.403709
Tubbs -89 4/3 Div 2 2 15 0 -1.550409
Killer Bees -35 5/2 Div 3 3 12 0 -1.580143
123 Trap! -81 4/3 Div 3 1 9 0 -1.704208
THEM! -35 5/2 Div 3 2 12 0 -1.764171
Spawning Alewives -84 5/2 Div 3 1 11 0 -1.944581
TuneSquad Squared -130 4/3 Div 2 0 17 0 -2.150498

67 rows × 6 columns