Win/Loss Betting Model

Same as other one but I now filter by teams that have a ranking


In [1]:
import pandas as pd
import numpy as np
import datetime as dt
from scipy.stats import norm, bernoulli
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
from spcl_case import *
plt.style.use('fivethirtyeight')

Obtain results of teams within the past year


In [2]:
h_matches = pd.read_csv('hltv_csv/matchResults.csv')
h_matches['Date'] = pd.to_datetime(h_matches['Date'])
h_teams = pd.read_csv('hltv_csv/teams_w_ranking.csv')
h_teams = fix_teams(h_teams.set_index('ID'))
h_teams = h_teams.dropna()

In [3]:
FILTER_TEAMS = {'eslpl': ['OpTic', 'SK', 'Cloud9', 'Liquid', 'Luminosity', 'Misfits', 'Renegades', 'Immortals', 
                    'Splyce', 'compLexity', 'Rogue', 'Ghost', 'CLG', 'NRG', 'FaZe', 'North',
                    'BIG', 'LDLC', 'mousesports', 'EnVyUs', 'NiP', 'Virtus.pro', 
                    'Astralis', 'G2', 'GODSENT', 'Heroic', 'fnatic', 'NiP', 'Heroic'],
                'mdleu': ['Virtus.pro', 'FlipSid3', 'eXtatus', 'AGO', 'Fragsters', 'Gambit', 'PRIDE', '1337HUANIA', 
                    'VITALIS', 'Epsilon', 'CHAOS', 'Crowns', 'MK', 'Japaleno', 'Not Academy', 'aAa', 'Space Soldiers',
                    'Singularity', 'Nexus', 'Invictus Aquilas', 'Spirit', 'Kinguin', 'Seed', 'Endpoint', 'iGame.com', 'TEAM5',
                    'ALTERNATE aTTaX'],
                'mdlna': ['Gale Force', 'FRENCH CANADIANS', 'Mythic', 'GX', 'Beacon', 'Torqued', 'Rise Nation', 'Denial', 'subtLe', 
                   'SoaR', 'Muffin Lightning', 'Iceberg', 'ex-Nitrious', 'Adaptation', 'Morior Invictus', 'Naventic', 'CheckSix', 'Good People'
                   , 'LFAO', 'CLG Academy', 'Ambition', 'Mostly Harmless', 'Gorilla Core', 'ex-Nitrious', 'ANTI ECO'],
                'mdlau': ['Grayhound', 'Tainted Minds', 'Kings', 'Chiefs', 'Dark Sided', 'seadoggs', 'Athletico', 'Legacy',
                    'SIN', 'Noxide', 'Control', 'SYF', 'Corvidae', 'Funkd', 'Masterminds', 'Conspiracy', 'AVANT']
               }

In [4]:
MIN_DATE = dt.datetime(2017,1,1)
MAX_DATE = dt.datetime.today()
h_matches = h_matches[(h_matches['Date'] >= MIN_DATE) & (h_matches['Date'] <= MAX_DATE)]
h_matches = h_matches[h_matches['Team 1 ID'].isin(h_teams.index) | h_matches['Team 2 ID'].isin(h_teams.index)]
h_matches['winner'] = h_matches.apply(lambda x: x['Team 1 ID'] if x['Team 1 Score'] > x['Team 2 Score'] else x['Team 2 ID'], axis=1)
h_matches['score_diff'] = h_matches['Team 1 Score'] - h_matches['Team 2 Score']

In [5]:
obs = h_matches[['Map', 'Team 1 ID', 'Team 2 ID', 'score_diff', 'winner']]
obs = obs[obs.Map != 'Default']
obs.head()


Out[5]:
Map Team 1 ID Team 2 ID score_diff winner
23834 Cobblestone 6134 4674 14 6134
23835 Overpass 6134 4674 7 6134
23846 Overpass 7220 6134 -10 6134
23848 Train 7517 7104 10 7517
23849 Cache 7517 6134 -7 6134

In [6]:
teams = np.sort(np.unique(np.concatenate([h_matches['Team 1 ID'], h_matches['Team 2 ID']])))
maps = obs.Map.unique()
tmap = {v:k for k,v in dict(enumerate(teams)).items()}
mmap = {v:k for k,v in dict(enumerate(maps)).items()}
n_teams = len(teams)
n_maps = len(maps)
print('Number of Teams: %i ' % n_teams)
print('Number of Filtered Teams: %i' % len(h_teams))
print('Number of Matches: %i ' % len(h_matches))
print('Number of Maps: %i '% n_maps)


Number of Teams: 972 
Number of Filtered Teams: 360
Number of Matches: 14231 
Number of Maps: 8 

Pymc Model

Determining Binary Win Loss: $wl_{m,i,j}$ $$ \omega, \tau, \sim HC(0.5) \\ R_{k} \sim N(0, \omega^2) \\ \tilde{\theta}_{m,k} \sim N(0,1) \\ R_{m,k} = R_{k} + \tau\tilde{\theta} \\ wl_{m,i,j} \sim B(p = \text{Sig}(R_{m,i}-R_{m,j})) \\ $$

and score difference: $sc_{m,i,j}$

$$ \alpha \sim Gamma(10,5) \\ \kappa_{m,i,j} = 32\text{Sig}(\alpha(R_{m,i}-R_{m,j}))-16 \\ \sigma_{m} \sim HC(0.5) \\ sc_{m,i,j} \sim N(\kappa, \sigma_{m}^2) $$

In [7]:
import pymc3 as pm
import theano.tensor as tt

In [43]:
obs_map = obs['Map'].map(mmap).values
obs_team_1 = obs['Team 1 ID'].map(tmap).values
obs_team_2 = obs['Team 2 ID'].map(tmap).values

with pm.Model() as rating_model:
    
    omega = pm.HalfCauchy('omega', 0.5)
    tau = pm.HalfCauchy('tau', 0.5)
    rating = pm.Normal('rating', 0, omega, shape=n_teams)
    theta_tilde = pm.Normal('rate_t', mu=0, sd=1, shape=(n_maps,n_teams))
    rating_map = pm.Deterministic('rating | map', rating + tau * theta_tilde)
    
    r = rating_map.flatten()
    diff = r[obs_map*n_teams+obs_team_1] - r[obs_map*n_teams+obs_team_2]
    
    p = 0.5*tt.tanh(diff)+0.5
    beta = pm.Normal('beta', 0.5, 0.2)
    kappa = 16*tt.tanh(beta*diff)
    sigma = pm.HalfCauchy('sigma', 0.5)
    
    sc = pm.Normal('observed score diff', kappa, sigma, observed=obs['score_diff'])
    wl = pm.Bernoulli('observed wl', p=p, observed=(obs['Team 1 ID'] == obs['winner']).values)

In [44]:
with rating_model:
    #start = approx.sample(1)[0]
    #trace = pm.sample(5000, init='advi', nuts_kwargs={'target_accept': 0.99}, tune=0)
    trace = pm.sample(5000, n_init=20000, init='jitter+adapt_diag', nuts_kwargs={'target_accept': 0.90}, tune=500) # tune=1000, nuts_kwargs={'target_accept': 0.95}


Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
100%|██████████████████████████████████████████████████████████████████████████████| 5500/5500 [26:59<00:00,  3.40it/s]

In [59]:
filt = h_teams[h_teams.Name.isin(FILTER_TEAMS['mdleu'])]
sns.set_palette('Paired', n_teams)

f, ax = plt.subplots(figsize=(16,10))
ax.set_ylim(0,5.0)
curr_trace = trace['rating']
[sns.kdeplot(curr_trace[:,tmap[i]], shade=True, alpha=0.55, legend=True, ax=ax, label=v['Name']) for i,v in filt.iterrows()]
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))


Out[59]:
<matplotlib.legend.Legend at 0x29d740d5be0>

In [65]:
set(filt.Name).symmetric_difference(FILTER_TEAMS['mdleu'])


Out[65]:
{'Japaleno', 'MK', 'VITALIS'}

In [33]:
get_prior_rating = lambda i: [curr_trace[:, tmap[i]].mean(), curr_trace[:, tmap[i]].std()] if i in tmap else [0,1]
params = {v['Name']: get_prior_rating(i) for i,v in h_teams.iterrows()}

In [40]:
pd.DataFrame(params, index=['mu', 'sig']).T.sort_values('mu', ascending=False)


Out[40]:
mu sig
G2 2.278251 0.431518
Astralis 2.145072 0.412375
FaZe 2.113995 0.414242
SK 2.103455 0.410561
fnatic 2.065059 0.395136
North 2.056154 0.409819
Cloud9 1.936638 0.390896
Natus Vincere 1.901194 0.382928
Immortals 1.853441 0.379074
mousesports 1.774430 0.353021
Gambit 1.773562 0.361253
NiP 1.726414 0.352745
Space Soldiers 1.709186 0.349242
HellRaisers 1.697832 0.341155
OpTic 1.693548 0.350950
Heroic 1.689978 0.346352
Liquid 1.684302 0.349802
EnVyUs 1.683816 0.349454
BIG 1.626329 0.333417
CLG 1.609057 0.347207
Virtus.pro 1.575830 0.339758
TyLoo 1.575189 0.344628
Renegades 1.548888 0.327963
AGO 1.454879 0.325261
Rogue 1.436819 0.322271
Misfits 1.428212 0.315122
Flash 1.426636 0.325260
Luminosity 1.420550 0.311942
GODSENT 1.394119 0.311952
Kinguin 1.388395 0.306598
... ... ...
NaCL -0.888746 0.695743
Glacon -0.897788 0.413685
Space Jam -0.903306 0.448765
MISO -0.904796 0.437109
Born in Mongolia -0.909230 0.438271
BBSP -0.911476 0.588445
DreamEaters -0.935184 0.453072
Shooting Monkeys -0.937302 0.428997
BlessE -0.939224 0.726999
Wolfpack -0.950900 0.612001
Viboras -0.960354 0.622581
MER IL ET FOU -0.961572 0.740138
Elite Wolves -0.964729 0.389813
TNC -0.970226 0.664898
Awaken Militia -0.983502 0.711434
SHIPACHI -1.005757 0.587244
Kolding -1.048096 0.696867
Brutality -1.078134 0.411807
Born -1.083320 0.693989
Zeta Strike -1.104110 0.658481
Riot fe -1.110233 0.440967
Beastly -1.140722 0.727858
Nova -1.180048 0.667489
myRevenge -1.217665 0.648058
eLixir fe -1.318123 0.589741
iYATi -1.325809 0.515885
BattleOn -1.464783 0.695559
Wargods -1.566826 0.560227
Hyper -1.609159 0.515736
Aztek -1.817754 0.696969

357 rows × 2 columns

Save Model


In [66]:
EVENT_SET = 'all'
pm.backends.text.dump('saved_model/'+EVENT_SET+'/trace', trace)
np.save('saved_model/'+EVENT_SET+'/teams.npy', teams)
np.save('saved_model/'+EVENT_SET+'/maps.npy', maps)
#np.save('saved_model/'+EVENT_SET+'/filter_teams.npy', FILTER_TEAMS[EVENT_SET])

Diagnostics


In [ ]:
with rating_model:
    approx = pm.fit(15000)
    ap_trace = approx.sample(5000)

In [ ]:
print('Gelman Rubin: %s' % pm.diagnostics.gelman_rubin(trace))
print('Effective N: %s' % pm.diagnostics.effective_n(trace))
print('Accept Prob: %.4f' % trace.get_sampler_stats('mean_tree_accept').mean())
print('Percentage of Divergent %.5f' % (trace['diverging'].nonzero()[0].size/float(len(trace))))

In [45]:
pm.traceplot(trace, varnames=['beta'])


---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-45-90dff28bd6c7> in <module>()
----> 1 pm.traceplot(trace, varnames=['alpha', 'beta'])

~\AppData\Local\Continuum\anaconda3\lib\site-packages\pymc3\plots\traceplot.py in traceplot(trace, varnames, transform, figsize, lines, combined, plot_transformed, grid, alpha, priors, prior_alpha, prior_style, ax, live_plot, skip_first, refresh_every, roll_over)
     88             prior = None
     89         first_time = True
---> 90         for d in trace.get_values(v, combine=combined, squeeze=False):
     91             d = np.squeeze(transform(d))
     92             d = make_2d(d)

~\AppData\Local\Continuum\anaconda3\lib\site-packages\pymc3\backends\base.py in get_values(self, varname, burn, thin, combine, chains, squeeze)
    410         try:
    411             results = [self._straces[chain].get_values(varname, burn, thin)
--> 412                        for chain in chains]
    413         except TypeError:  # Single chain passed.
    414             results = [self._straces[chains].get_values(varname, burn, thin)]

~\AppData\Local\Continuum\anaconda3\lib\site-packages\pymc3\backends\base.py in <listcomp>(.0)
    410         try:
    411             results = [self._straces[chain].get_values(varname, burn, thin)
--> 412                        for chain in chains]
    413         except TypeError:  # Single chain passed.
    414             results = [self._straces[chains].get_values(varname, burn, thin)]

~\AppData\Local\Continuum\anaconda3\lib\site-packages\pymc3\backends\ndarray.py in get_values(self, varname, burn, thin)
    140         A NumPy array
    141         """
--> 142         return self.samples[varname][burn::thin]
    143 
    144     def _slice(self, idx):

KeyError: 'alpha'

In [ ]:
rating_model.profile(pm.gradient(rating_model.logpt, rating_model.vars), n=100).summary()

In [ ]:
rating_model.profile(rating_model.logpt, n=100).summary()

Moar Plots


In [ ]:
sns.set_palette('Paired', n_teams)

f, ax = plt.subplots(figsize=(16,10))
ax.set_ylim(0,2.0)
[sns.kdeplot(trace['sigma'][:,i], shade=True, alpha=0.55, legend=True, ax=ax, label=m) for i,m in enumerate(maps)]
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))

In [ ]:
f, axes = plt.subplots(n_maps,1,figsize=(12,34), sharex=True)
for m, ax in enumerate(axes):
    ax.set_title(dict(enumerate(maps))[m])
    ax.set_ylim(0,2.0)
    [sns.kdeplot(trace['rating | map'][:,m,tmap[i]], shade=True, alpha=0.55, legend=False ,
                 ax=ax, label=v['Name']) for i,v in filt.iterrows()]
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))

In [ ]:
filt

In [ ]:
i = np.where(teams==7880)
j = np.where(teams==7924)

diff = (trace['rating'][:,j] - trace['rating'][:,i]).flatten()
kappa = 32./(1+np.exp(-1.*trace['alpha']*diff))-16.
fig, (ax1,ax2) = plt.subplots(1,2,figsize=(10,6))
sns.kdeplot(kappa, ax=ax2)
sns.kdeplot(diff, ax=ax1)

Non-MCMC Model


In [ ]:
def vec2dict(s, n_teams):
    return {
        'mu': np.array(s[:n_teams]),
        'sigma': np.array(s[n_teams:n_teams*2]),
        'beta': s[-1],
    }

def dict2vec(s):
    return s['mu'] + s['sigma'] + [s['beta']]

In [ ]:
skills_0 = dict2vec({
    'mu': [1000]*n_teams,
    'sigma': [300]*n_teams,
    'beta': 50
})

In [ ]:
from scipy.optimize import minimize


def loglike(y,p):
    return -1.*(np.sum(y*np.log(p)+(1-y)*np.log(1.-p)))

def obj(skills):
    s = vec2dict(skills, n_teams)
    mean_diff = s['mu'][obs['Team 1 ID'].map(tmap).values] - s['mu'][obs['Team 2 ID'].map(tmap).values]
    var_diff = s['sigma'][obs['Team 1 ID'].map(tmap).values]**2 + s['sigma'][obs['Team 2 ID'].map(tmap).values]**2 + skills[-1]**2
    p = 1.-norm.cdf(0., loc=mean_diff, scale = np.sqrt(var_diff))
    return loglike((obs['Team 1 ID'] == obs['winner']).values, p)

In [ ]:
obj(skills_0)

In [ ]:
opt_skill = g.x
print(opt_skill)
plots = norm.rvs(opt_skill[:5], opt_skill[5:-1], size=(2000,5))

f, ax = plt.subplots(figsize=(12,8))
[sns.kdeplot(plots[:,i], shade=True, alpha=0.55, legend=True, ax=ax, label=i) for i in range(5)]