In [1]:
import os
import pandas as pd
import numpy as np
import pymc3 as pm
In [2]:
base_dir = os.path.join("/Users", "sbussmann", "Development", "buda", "buda-ratings")
interim_dir = os.path.join(base_dir, 'data', 'interim')
In [3]:
league_id = 40264
winloss = pd.read_csv(os.path.join(interim_dir, "winloss_simpleprior_{}.csv".format(league_id)))
In [4]:
winloss.head(20)
Out[4]:
In [5]:
winloss.shape
Out[5]:
In [6]:
teams = set(np.append(winloss['Team A'].unique(), winloss['Team B'].unique()))
In [17]:
team1 = winloss['Index A'].values
team2 = winloss['Index B'].values
with pm.Model() as model:
ratings = pm.Normal('ratings', mu=0, shape=len(teams))
deltaRating = ratings[team1] - ratings[team2]
p = 1 / (1 + np.exp(-deltaRating))
win = pm.Bernoulli('win', p, observed=winloss['Team A Wins'].values)
In [18]:
with model:
trace = pm.sample(1000)
In [19]:
meanratings = trace.get_values('ratings').mean(axis=0)
In [11]:
names = []
for i, meanrating in enumerate(meanratings):
if i in winloss['Index A'].values:
index = winloss['Index A'] == i
name = winloss.loc[index, 'Team A'].unique()[0]
else:
index = winloss['Index B'] == i
name = winloss.loc[index, 'Team B'].unique()[0]
names.append(name)
# print("{}: {:.3f}".format(name, meanrating))
In [12]:
ratingsdf = pd.DataFrame({
'mean_rating': meanratings
}, index=names)
In [14]:
plusminus = pd.read_csv(os.path.join(interim_dir, 'plusminus_{}.csv'.format(league_id)), index_col='Team Name')
In [15]:
totaldf = plusminus.join(ratingsdf, how='inner')
In [16]:
totaldf.sort_values('mean_rating', ascending=False)
Out[16]: