Win/Loss Betting Model


In [1]:
import pandas as pd
import numpy as np
import datetime as dt
from scipy.stats import norm, bernoulli
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
from spcl_case import *
plt.style.use('fivethirtyeight')

Obtain results of teams within the past year


In [2]:
h_matches = pd.read_csv('hltv_csv/matchResults.csv')
h_matches['Date'] = pd.to_datetime(h_matches['Date'])
h_teams = pd.read_csv('hltv_csv/teams.csv')
h_teams = fix_teams(h_teams.set_index('ID'))

In [3]:
MIN_DATE = dt.datetime(2017,1,1)
EVENT_SET = 'eslpl'
FILTER_TEAMS = {'eslpl': ['OpTic', 'SK', 'Cloud9', 'Liquid', 'Luminosity', 'Misfits', 'Renegades', 'Immortals', 
                    'Splyce', 'compLexity', 'Rogue', 'Ghost', 'CLG', 'NRG', 'FaZe', 'North',
                    'BIG', 'LDLC', 'mousesports', 'EnVyUs', 'NiP', 'Virtus.pro', 
                    'Astralis', 'G2', 'GODSENT', 'Heroic', 'fnatic', 'NiP', 'Heroic'],
                'mdleu': ['Virtus.pro', 'FlipSid3', 'eXtatus', 'AGO', 'Fragsters', 'Gambit', 'PRIDE', '1337HUANIA', 
                    'VITALIS', 'Epsilon', 'CHAOS', 'Crowns', 'MK', 'Japaleno', 'Not Academy', 'aAa', 'Space Soldiers',
                    'Singularity', 'Nexus', 'Invictus Aquilas', 'Spirit', 'Kinguin', 'Seed', 'Endpoint', 'iGame.com', 'TEAM5',
                    'ALTERNATE aTTaX'],
                'mdlna': ['Gale Force', 'FRENCH CANADIANS', 'Mythic', 'GX', 'Beacon', 'Torqued', 'Rise Nation', 'Denial', 'subtLe', 
                   'SoaR', 'Muffin Lightning', 'Iceberg', 'ex-Nitrious', 'Adaptation', 'Morior Invictus', 'Naventic', 'CheckSix', 'Good People'
                   , 'LFAO', 'CLG Academy', 'Ambition', 'Mostly Harmless', 'Gorilla Core', 'ex-Nitrious', 'ANTI ECO'],
                'mdlau': ['Grayhound', 'Tainted Minds', 'Kings', 'Chiefs', 'Dark Sided', 'seadoggs', 'Athletico', 'Legacy',
                    'SIN', 'Noxide', 'Control', 'SYF', 'Corvidae', 'Funkd', 'Masterminds', 'Conspiracy', 'AVANT']
               }

h_matches = h_matches[h_matches['Date'] >= MIN_DATE]
h_filter_teams = h_teams[h_teams['Name'].isin(FILTER_TEAMS[EVENT_SET])]
h_matches = h_matches[h_matches['Team 1 ID'].isin(h_filter_teams.index) | h_matches['Team 2 ID'].isin(h_filter_teams.index)]
h_matches['winner'] = h_matches.apply(lambda x: x['Team 1 ID'] if x['Team 1 Score'] > x['Team 2 Score'] else x['Team 2 ID'], axis=1)
h_matches['score_diff'] = h_matches['Team 1 Score'] - h_matches['Team 2 Score']

In [4]:
obs = h_matches[['Map', 'Team 1 ID', 'Team 2 ID', 'score_diff', 'winner']]
obs = obs[obs.Map != 'Default']
obs.head()


Out[4]:
Map Team 1 ID Team 2 ID score_diff winner
23834 Cobblestone 6134 4674 14 6134
23835 Overpass 6134 4674 7 6134
23850 Overpass 5158 4674 -12 4674
23852 Overpass 7157 4674 -11 4674
23858 Train 5158 7157 -9 7157

In [5]:
teams = np.sort(np.unique(np.concatenate([h_matches['Team 1 ID'], h_matches['Team 2 ID']])))
maps = obs.Map.unique()
tmap = {v:k for k,v in dict(enumerate(teams)).items()}
mmap = {v:k for k,v in dict(enumerate(maps)).items()}
n_teams = len(teams)
n_maps = len(maps)
print('Number of Teams: %i ' % n_teams)
print('Number of Filtered Teams: %i' % len(FILTER_TEAMS[EVENT_SET]))
print('Number of Matches: %i ' % len(h_matches))
print('Number of Maps: %i '% n_maps)


Number of Teams: 236 
Number of Filtered Teams: 29
Number of Matches: 4444 
Number of Maps: 8 

Pymc Model

Determining Binary Win Loss: $wl_{m,i,j}$ $$ \omega, \tau, \sim HC(0.5) \\ R_{k} \sim N(0, \omega^2) \\ \tilde{\theta}_{m,k} \sim N(0,1) \\ R_{m,k} = R_{k} + \tau\tilde{\theta} \\ wl_{m,i,j} \sim B(p = \text{Sig}(R_{m,i}-R_{m,j})) \\ $$

and score difference: $sc_{m,i,j}$

$$ \alpha \sim Gamma(10,5) \\ \kappa_{m,i,j} = 32\text{Sig}(\alpha(R_{m,i}-R_{m,j}))-16 \\ \sigma_{m} \sim HC(0.5) \\ sc_{m,i,j} \sim N(\kappa, \sigma_{m}^2) $$

In [14]:
import pymc3 as pm
import theano.tensor as tt

In [15]:
obs_map = obs['Map'].map(mmap).values
obs_team_1 = obs['Team 1 ID'].map(tmap).values
obs_team_2 = obs['Team 2 ID'].map(tmap).values
bin_enc = np.zeros((len(obs), n_maps*n_teams))
bin_enc[np.arange(len(obs)), obs_map*n_teams+obs_team_1] = 1
bin_enc[np.arange(len(obs)), obs_map*n_teams+obs_team_2] = -1
bin_enc = tt.as_tensor_variable(bin_enc)

with pm.Model() as rating_model:
    
    omega = pm.HalfCauchy('omega', 0.5)
    tau = pm.HalfCauchy('tau', 0.5)
    rating = pm.Normal('rating', 0, omega, shape=n_teams)
    theta_tilde = pm.Normal('rate_t', mu=0, sd=1, shape=(n_maps, n_teams))
    rating_map = pm.Deterministic('rating | map', rating + tau * theta_tilde)
    
    #r = rating_map.flatten()
    #diff = tt.dot(bin_enc, r)
    diff = rating_map[obs_map, obs_team_1] - rating_map[obs_map, obs_team_2]
    p = pm.math.sigmoid(diff)
    alpha = 0.3485
    kappa = 32*pm.math.sigmoid(alpha*diff)-16.
    sigma = pm.HalfCauchy('sigma', 0.5, shape=n_maps)
    
    sc = pm.Normal('observed score diff', kappa, sigma[obs_map], observed=obs['score_diff'])
    wl = pm.Bernoulli('observed wl', p=p, observed=(obs['Team 1 ID'] == obs['winner']).values)

In [18]:
with rating_model:
    #start = approx.sample(1)[0]
    #trace = pm.sample(5000, init='advi', nuts_kwargs={'target_accept': 0.99}, tune=0)
    trace = pm.sample(5000, n_init=20000, init='advi+adapt_diag', nuts_kwargs={'target_accept': 0.90}, tune=1000) # tune=1000, nuts_kwargs={'target_accept': 0.95}


Auto-assigning NUTS sampler...
Initializing NUTS using advi+adapt_diag...
INFO (theano.gof.compilelock): Refreshing lock C:\Users\kevin.pei\AppData\Local\Theano\compiledir_Windows-10-10.0.15063-SP0-Intel64_Family_6_Model_78_Stepping_3_GenuineIntel-3.6.2-64\lock_dir\lock
INFO (theano.gof.compilelock): Refreshing lock C:\Users\kevin.pei\AppData\Local\Theano\compiledir_Windows-10-10.0.15063-SP0-Intel64_Family_6_Model_78_Stepping_3_GenuineIntel-3.6.2-64\lock_dir\lock
INFO (theano.gof.compilelock): Refreshing lock C:\Users\kevin.pei\AppData\Local\Theano\compiledir_Windows-10-10.0.15063-SP0-Intel64_Family_6_Model_78_Stepping_3_GenuineIntel-3.6.2-64\lock_dir\lock
Average Loss = 15,809: 100%|████████████████████████████████████████████████████| 20000/20000 [01:22<00:00, 243.26it/s]
Finished [100%]: Average Loss = 15,809
INFO (theano.gof.compilelock): Refreshing lock C:\Users\kevin.pei\AppData\Local\Theano\compiledir_Windows-10-10.0.15063-SP0-Intel64_Family_6_Model_78_Stepping_3_GenuineIntel-3.6.2-64\lock_dir\lock
INFO (theano.gof.compilelock): Refreshing lock C:\Users\kevin.pei\AppData\Local\Theano\compiledir_Windows-10-10.0.15063-SP0-Intel64_Family_6_Model_78_Stepping_3_GenuineIntel-3.6.2-64\lock_dir\lock
INFO (theano.gof.compilelock): Refreshing lock C:\Users\kevin.pei\AppData\Local\Theano\compiledir_Windows-10-10.0.15063-SP0-Intel64_Family_6_Model_78_Stepping_3_GenuineIntel-3.6.2-64\lock_dir\lock
INFO (theano.gof.compilelock): Refreshing lock C:\Users\kevin.pei\AppData\Local\Theano\compiledir_Windows-10-10.0.15063-SP0-Intel64_Family_6_Model_78_Stepping_3_GenuineIntel-3.6.2-64\lock_dir\lock
100%|██████████████████████████████████████████████████████████████████████████████| 6000/6000 [09:07<00:00, 10.96it/s]

In [19]:
team_names = h_teams.loc[teams]
filt = team_names[team_names.Name.isin(FILTER_TEAMS[EVENT_SET])]
sns.set_palette('Paired', n_teams)

f, ax = plt.subplots(figsize=(16,10))
ax.set_ylim(0,2.0)
[sns.kdeplot(trace['rating'][:,tmap[i]], shade=True, alpha=0.55, legend=True, ax=ax, label=v['Name']) for i,v in filt.iterrows()]
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
{v['Name']: trace['rating'][:,tmap[i]].std() for i,v in filt.iterrows()}


Out[19]:
{'Astralis': 0.42359437905941555,
 'BIG': 0.37573225889936712,
 'CLG': 0.42131664899794241,
 'Cloud9': 0.40964035783782338,
 'EnVyUs': 0.39260952789655112,
 'FaZe': 0.39925701892944493,
 'G2': 0.40710454184767692,
 'GODSENT': 0.40817736269331356,
 'Ghost': 0.42305976359025477,
 'Heroic': 0.39886293462752104,
 'Immortals': 0.41961195249935962,
 'LDLC': 0.38757805637087878,
 'Liquid': 0.38894119307475561,
 'Luminosity': 0.37699396156700771,
 'Misfits': 0.38452212688861293,
 'NRG': 0.40475666462213655,
 'NiP': 0.38530972213118553,
 'North': 0.40944078913412751,
 'OpTic': 0.3931722692102253,
 'Renegades': 0.40420502325494267,
 'Rogue': 0.41172721757536801,
 'SK': 0.40624911829608051,
 'Splyce': 0.38873319225957126,
 'Virtus.pro': 0.38984014895580615,
 'compLexity': 0.41832652854313418,
 'fnatic': 0.3967574721301505,
 'mousesports': 0.3876985751121958}

Save Model


In [20]:
pm.backends.text.dump('saved_model/'+EVENT_SET+'/trace', trace)
np.save('saved_model/'+EVENT_SET+'/teams.npy', teams)
np.save('saved_model/'+EVENT_SET+'/maps.npy', maps)
np.save('saved_model/'+EVENT_SET+'/filter_teams.npy', FILTER_TEAMS[EVENT_SET])

In [7]:
obs.to_csv('data.csv')

Diagnostics


In [ ]:
with rating_model:
    approx = pm.fit(15000)
    ap_trace = approx.sample(5000)

In [ ]:
print('Gelman Rubin: %s' % pm.diagnostics.gelman_rubin(trace))
print('Effective N: %s' % pm.diagnostics.effective_n(trace))
print('Accept Prob: %.4f' % trace.get_sampler_stats('mean_tree_accept').mean())
print('Percentage of Divergent %.5f' % (trace['diverging'].nonzero()[0].size/float(len(trace))))

In [ ]:
pm.traceplot(trace, varnames=['tau', 'sigma'])

In [ ]:
rating_model.profile(pm.gradient(rating_model.logpt, rating_model.vars), n=100).summary()

In [ ]:
rating_model.profile(rating_model.logpt, n=100).summary()

Moar Plots


In [ ]:
sns.set_palette('Paired', n_teams)

f, ax = plt.subplots(figsize=(16,10))
ax.set_ylim(0,2.0)
[sns.kdeplot(trace['sigma'][:,i], shade=True, alpha=0.55, legend=True, ax=ax, label=m) for i,m in enumerate(maps)]
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))

In [ ]:
f, axes = plt.subplots(n_maps,1,figsize=(12,34), sharex=True)
for m, ax in enumerate(axes):
    ax.set_title(dict(enumerate(maps))[m])
    ax.set_ylim(0,2.0)
    [sns.kdeplot(trace['rating | map'][:,m,tmap[i]], shade=True, alpha=0.55, legend=False ,
                 ax=ax, label=v['Name']) for i,v in filt.iterrows()]
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))

In [ ]:
filt

In [ ]:
i = np.where(teams==7880)
j = np.where(teams==7924)

diff = (trace['rating'][:,j] - trace['rating'][:,i]).flatten()
kappa = 32./(1+np.exp(-1.*trace['alpha']*diff))-16.
fig, (ax1,ax2) = plt.subplots(1,2,figsize=(10,6))
sns.kdeplot(kappa, ax=ax2)
sns.kdeplot(diff, ax=ax1)

Non-MCMC Model


In [ ]:
def vec2dict(s, n_teams):
    return {
        'mu': np.array(s[:n_teams]),
        'sigma': np.array(s[n_teams:n_teams*2]),
        'beta': s[-1],
    }

def dict2vec(s):
    return s['mu'] + s['sigma'] + [s['beta']]

In [ ]:
skills_0 = dict2vec({
    'mu': [1000]*n_teams,
    'sigma': [300]*n_teams,
    'beta': 50
})

In [ ]:
from scipy.optimize import minimize


def loglike(y,p):
    return -1.*(np.sum(y*np.log(p)+(1-y)*np.log(1.-p)))

def obj(skills):
    s = vec2dict(skills, n_teams)
    mean_diff = s['mu'][obs['Team 1 ID'].map(tmap).values] - s['mu'][obs['Team 2 ID'].map(tmap).values]
    var_diff = s['sigma'][obs['Team 1 ID'].map(tmap).values]**2 + s['sigma'][obs['Team 2 ID'].map(tmap).values]**2 + skills[-1]**2
    p = 1.-norm.cdf(0., loc=mean_diff, scale = np.sqrt(var_diff))
    return loglike((obs['Team 1 ID'] == obs['winner']).values, p)

In [ ]:
obj(skills_0)

In [ ]:
opt_skill = g.x
print(opt_skill)
plots = norm.rvs(opt_skill[:5], opt_skill[5:-1], size=(2000,5))

f, ax = plt.subplots(figsize=(12,8))
[sns.kdeplot(plots[:,i], shade=True, alpha=0.55, legend=True, ax=ax, label=i) for i in range(5)]