In [1]:
import pandas as pd
import numpy as np
import datetime as dt
from scipy.stats import norm, bernoulli
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
from spcl_case import *
plt.style.use('fivethirtyeight')
Obtain results of teams within the past year
In [2]:
h_matches = pd.read_csv('hltv_csv/matchResults.csv')
h_matches['Date'] = pd.to_datetime(h_matches['Date'])
h_teams = pd.read_csv('hltv_csv/teams_w_ranking.csv')
h_teams = fix_teams(h_teams.set_index('ID'))
In [3]:
MIN_DATE = dt.datetime(2017,1,1)
EVENT_SET = 'eslpl'
FILTER_TEAMS = {'eslpl': ['OpTic', 'SK', 'Cloud9', 'Liquid', 'Luminosity', 'Misfits', 'Renegades', 'Immortals',
'Splyce', 'compLexity', 'Rogue', 'Ghost', 'CLG', 'NRG', 'FaZe', 'North',
'BIG', 'LDLC', 'mousesports', 'EnVyUs', 'NiP', 'Virtus.pro',
'Astralis', 'G2', 'GODSENT', 'Heroic', 'fnatic', 'NiP', 'Heroic'],
'mdleu': ['Virtus.pro', 'FlipSid3', 'eXtatus', 'AGO', 'Fragsters', 'Gambit', 'PRIDE', '1337HUANIA',
'VITALIS', 'Epsilon', 'CHAOS', 'Crowns', 'MK', 'Japaleno', 'Not Academy', 'aAa', 'Space Soldiers',
'Singularity', 'Nexus', 'Invictus Aquilas', 'Spirit', 'Kinguin', 'Seed', 'Endpoint', 'iGame.com', 'TEAM5',
'ALTERNATE aTTaX'],
'mdlna': ['Gale Force', 'FRENCH CANADIANS', 'Mythic', 'GX', 'Beacon', 'Torqued', 'Rise Nation', 'Denial', 'subtLe',
'SoaR', 'Muffin Lightning', 'Iceberg', 'ex-Nitrious', 'Adaptation', 'Morior Invictus', 'Naventic', 'CheckSix', 'Good People'
, 'LFAO', 'CLG Academy', 'Ambition', 'Mostly Harmless', 'Gorilla Core', 'ex-Nitrious', 'ANTI ECO'],
'mdlau': ['Grayhound', 'Tainted Minds', 'Kings', 'Chiefs', 'Dark Sided', 'seadoggs', 'Athletico', 'Legacy',
'SIN', 'Noxide', 'Control', 'SYF', 'Corvidae', 'Funkd', 'Masterminds', 'Conspiracy', 'AVANT']
}
h_matches = h_matches[h_matches['Date'] >= MIN_DATE]
In [4]:
maps_played = pd.DataFrame([h_matches.groupby('Team 1 ID')['Map'].count(), h_matches.groupby('Team 2 ID')['Map'].count()]).T.fillna(0).sum(axis=1)
maps_played.hist(bins=30)
h_teams['maps played 2017'] = maps_played
np.mean(maps_played > 200)
Out[4]:
In [5]:
#h_filter_teams = h_teams[h_teams['Name'].isin(FILTER_TEAMS[EVENT_SET])]
#h_filter_teams = h_teams.dropna().sort_values('Ranking').iloc[:250]
h_filter_teams = h_teams[h_teams['maps played 2017']> 10].dropna()
print(len(h_filter_teams))
In [6]:
h_matches = h_matches[h_matches['Team 1 ID'].isin(h_filter_teams.index) | h_matches['Team 2 ID'].isin(h_filter_teams.index)]
h_matches['winner'] = h_matches.apply(lambda x: x['Team 1 ID'] if x['Team 1 Score'] > x['Team 2 Score'] else x['Team 2 ID'], axis=1)
h_matches['score_diff'] = h_matches['Team 1 Score'] - h_matches['Team 2 Score']
In [7]:
obs = h_matches[['Date', 'Map', 'Team 1 ID', 'Team 2 ID', 'score_diff', 'winner']]
obs = obs[obs.Map != 'Default']
obs.Date = obs.Date.dt.to_period('M') # date period
obs = obs.sort_values('Date')
obs.head()
Out[7]:
In [8]:
teams = np.sort(np.unique(np.concatenate([h_matches['Team 1 ID'], h_matches['Team 2 ID']])))
maps = obs.Map.unique()
periods = obs.Date.unique()
tmap = {v:k for k,v in dict(enumerate(teams)).items()}
mmap = {v:k for k,v in dict(enumerate(maps)).items()}
pmap = {v:k for k,v in dict(enumerate(periods)).items()}
n_teams = len(teams)
n_maps = len(maps)
n_periods = len(periods)
print('Number of Teams: %i ' % n_teams)
print('Number of Plotted Teams: %i' % len(FILTER_TEAMS[EVENT_SET]))
print('Number of Matches: %i ' % len(h_matches))
print('Number of Maps: %i '% n_maps)
print('Number of Periods: %i '% n_periods)
Determining Binary Win Loss: $wl_{m,i,j}$ $$ \omega, \tau, \sim HC(0.5) \\ R_{k} \sim N(0, \omega^2) \\ \tilde{\theta}_{m,k} \sim N(0,1) \\ R_{m,k} = R_{k} + \tau\tilde{\theta} \\ wl_{m,i,j} \sim B(p = \text{Sig}(R_{m,i}-R_{m,j})) \\ $$
and score difference: $sc_{m,i,j}$
$$ \alpha \sim Gamma(10,5) \\ \kappa_{m,i,j} = 32\text{Sig}(\alpha(R_{m,i}-R_{m,j}))-16 \\ \sigma_{m} \sim HC(0.5) \\ sc_{m,i,j} \sim N(\kappa, \sigma_{m}^2) $$
In [11]:
import pymc3 as pm
import theano
import theano.tensor as tt
In [12]:
obs_map = obs['Map'].map(mmap).values
obs_team_1 = obs['Team 1 ID'].map(tmap).values
obs_team_2 = obs['Team 2 ID'].map(tmap).values
obs_period = obs['Date'].map(pmap).values
In [34]:
a = np.arange(1,n_periods)
with pm.Model() as rating_model:
rho = pm.Normal('rho', 0, 1)
omega = tt.sqrt(pm.InverseGamma('omega', 4, 2))
sigma = tt.sqrt(pm.HalfNormal('sigma', 1.5))
tau = pm.HalfCauchy('tau', 0.5)
theta_tilde = pm.Normal('rate_t', mu=0, sd=1, shape=(n_maps, n_teams))
time_rating = [pm.Normal('rating_0', 0, omega, shape=n_teams)]
time_rating_map = [pm.Deterministic('rating_0 | map', time_rating[0] + tau * theta_tilde)]
for i in a:
time_rating.append(pm.Normal('rating_'+str(i), rho*time_rating[i-1], sigma, shape=n_teams))
time_rating_map.append(pm.Deterministic('rating_'+str(i)+' | map', time_rating[i] + tau * theta_tilde))
diff = [time_rating_map[i][obs_map[obs_period == i], obs_team_1[obs_period == i]] - time_rating_map[i][obs_map[obs_period == i], obs_team_2[obs_period == i]] for i in range(n_periods)]
diff = tt.concatenate(diff)
#p = 0.5*pm.math.tanh(diff)+0.5
kappa = 16.*pm.math.tanh(0.5*diff)
gamma = pm.HalfNormal('gamma', 10)
sc = pm.Normal('observed score diff', kappa, gamma, observed=obs['score_diff'])
#wl = pm.Bernoulli('observed wl', p=p, observed=(obs['Team 1 ID'] == obs['winner']).values)
In [ ]:
# help my models too slow and i am bad at coding, pls just approximate :3
with rating_model:
approx = pm.fit(20000, method='advi')
ap_trace = approx.sample(500)
In [35]:
with rating_model:
trace = pm.sample(1000, init='jitter+adapt_diag', n_init=20000, tune=250, nuts_kwargs={'target_accept': 0.9, 'max_treedepth': 25}) # tune=1000, nuts_kwargs={'target_accept': 0.95}
In [36]:
team_names = h_teams.loc[teams]
filt = team_names[team_names.Name.isin(FILTER_TEAMS[EVENT_SET])]
sns.set_palette('Paired', n_teams)
f, ax = plt.subplots(figsize=(16,10))
ax.set_ylim(0,6.0)
[sns.kdeplot(trace['rating_%s'%max(obs_period-1)][:,tmap[i]], shade=True, alpha=0.55, legend=True, ax=ax, label=v['Name']) for i,v in filt.iterrows()]
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
Out[36]:
In [39]:
num_rows = int(np.ceil(len(filt)/4))
f, ax = plt.subplots(num_rows, 4, figsize=(16,30), sharex=True, sharey=True)
ax = ax.flatten()
condensed_ratings = {j: np.vstack([trace['rating_'+str(i)][:,tmap[j]] for i in range(n_periods)]).T for j,v in filt.iterrows()}
for i,(j,v) in enumerate(filt.iterrows()):
ax[i].set_title(v['Name'])
sns.tsplot(condensed_ratings[j], color='black', ci='sd', ax=ax[i], marker='s', linewidth=1)
In [40]:
EVENT_SET_SAVED = 'all_time_sc'
pm.backends.text.dump('saved_model/'+EVENT_SET_SAVED+'/trace', trace)
np.save('saved_model/'+EVENT_SET_SAVED+'/teams.npy', teams)
np.save('saved_model/'+EVENT_SET_SAVED+'/maps.npy', maps)
np.save('saved_model/'+EVENT_SET_SAVED+'/periods.npy', periods)
np.save('saved_model/'+EVENT_SET_SAVED+'/filter_teams.npy', FILTER_TEAMS[EVENT_SET])
In [7]:
obs[['Date', 'Team 1 ID', 'Team 2 ID', 'winner']].to_csv('data.csv')
In [ ]:
with rating_model:
approx = pm.fit(15000)
ap_trace = approx.sample(5000)
In [ ]:
print('Gelman Rubin: %s' % pm.diagnostics.gelman_rubin(trace))
print('Effective N: %s' % pm.diagnostics.effective_n(trace))
print('Accept Prob: %.4f' % trace.get_sampler_stats('mean_tree_accept').mean())
print('Percentage of Divergent %.5f' % (trace['diverging'].nonzero()[0].size/float(len(trace))))
In [37]:
pm.traceplot(trace, varnames=['rho', 'sigma', 'omega', 'tau', 'gamma'])
Out[37]:
In [38]:
rating_model.profile(pm.gradient(rating_model.logpt, rating_model.vars), n=100).summary()
In [ ]:
rating_model.profile(rating_model.logpt, n=100).summary()
In [ ]:
sns.set_palette('Paired', n_teams)
f, ax = plt.subplots(figsize=(16,10))
ax.set_ylim(0,2.0)
[sns.kdeplot(trace['sigma'][:,i], shade=True, alpha=0.55, legend=True, ax=ax, label=m) for i,m in enumerate(maps)]
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
In [ ]:
f, axes = plt.subplots(n_maps,1,figsize=(12,34), sharex=True)
for m, ax in enumerate(axes):
ax.set_title(dict(enumerate(maps))[m])
ax.set_ylim(0,2.0)
[sns.kdeplot(trace['rating | map'][:,m,tmap[i]], shade=True, alpha=0.55, legend=False ,
ax=ax, label=v['Name']) for i,v in filt.iterrows()]
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
In [ ]:
filt
In [ ]:
i = np.where(teams==7880)
j = np.where(teams==7924)
diff = (trace['rating'][:,j] - trace['rating'][:,i]).flatten()
kappa = 32./(1+np.exp(-1.*trace['alpha']*diff))-16.
fig, (ax1,ax2) = plt.subplots(1,2,figsize=(10,6))
sns.kdeplot(kappa, ax=ax2)
sns.kdeplot(diff, ax=ax1)
In [18]:
a = pd.Series({v['Name']: np.percentile(ap_trace['rating_%s'%max(obs_period)][:,tmap[i]], 75) for i,v in filt.iterrows()}).sort_values(ascending=False)
a = pd.Series(np.arange(1,len(filt)+1), a.index)
b = pd.Series({v['Name']: np.percentile(trace['rating_%s'%max(obs_period)][:,tmap[i]], 75) for i,v in filt.iterrows()}).sort_values(ascending=False)
b = pd.Series(np.arange(1,len(filt)+1), b.index)
pd.DataFrame([a,b], index=['approx', 'nuts']).T.sort_values('nuts')
Out[18]:
In [ ]:
a = np.arange(1,n_periods)
with pm.Model() as rating_model:
rho = pm.Normal('rho', 0, 1)
omega = pm.HalfCauchy('omega', 0.5)
tau = pm.HalfCauchy('tau', 0.5)
gamma = pm.HalfCauchy('gamma', 0.5)
theta_tilde = pm.Normal('rate_t', mu=0, sd=1, shape=(n_maps, n_teams))
time_rating = [pm.Normal('rating_0', 0, omega, shape=n_teams)]
time_variance = [pm.Lognormal('sd_0', tt.log(omega), tau, shape=n_teams)]
time_rating_map = [pm.Deterministic('rating_0 | map', time_rating[0] + gamma * theta_tilde)]
for i in a:
time_variance.append(pm.Lognormal('sd_'+str(i), tt.log(time_variance[i-1]), tau, shape=n_teams))
time_rating.append(pm.Normal('rating_'+str(i), rho*time_rating[i-1], time_variance[i], shape=n_teams))
time_rating_map.append(pm.Deterministic('rating_'+str(i)+' | map', time_rating[i] + gamma * theta_tilde))
diff = [time_rating_map[i][obs_map[obs_period == i], obs_team_1[obs_period == i]] - time_rating_map[i][obs_map[obs_period == i], obs_team_2[obs_period == i]] for i in range(n_periods)]
diff = tt.concatenate(diff)
p = 0.5*pm.math.tanh(diff)+0.5
#alpha = 0.31
#kappa = 16.*pm.math.tanh(alpha*diff)
#tau = pm.HalfNormal('tau', 10)
#sc = pm.Normal('observed score diff', kappa, tau, observed=obs['score_diff'])
wl = pm.Bernoulli('observed wl', p=p, observed=(obs['Team 1 ID'] == obs['winner']).values)