Win/Loss Rating Model Prediction

Load the model and make predictions


In [2]:
import requests
import pandas as pd
import numpy as np
import pymc3 as pm
import seaborn as sns
import datetime as dt
import matplotlib.pyplot as plt
from scipy.stats import norm
from spcl_case import *
plt.style.use('fivethirtyeight')
%matplotlib inline

Get a list of all CS Games

Load Ratings Model


In [4]:
TEAM_SET = 'all'

teams = np.load('saved_model/'+TEAM_SET+'/teams.npy')
maps = np.load('saved_model/'+TEAM_SET+'/maps.npy')
#filt = np.load('saved_model/'+TEAM_SET+'/filter_teams.npy')
filt = np.load('saved_model/eslpl/filter_teams.npy')
h_teams = pd.read_csv('hltv_csv/teams_w_ranking.csv')
h_teams = fix_teams(h_teams.set_index('ID').loc[teams])
h_teams_filt = h_teams[h_teams.Name.isin(filt)]

rating_model = prep_pymc_model(len(teams), len(maps))
trace = pm.backends.text.load('saved_model/'+TEAM_SET+'/trace', model=rating_model)

Sample A test set


In [4]:
h_matches = pd.read_csv('hltv_csv/matchResults.csv').set_index('Match ID')
h_matches['Date'] = pd.to_datetime(h_matches.Date)
h_matches = h_matches[(h_matches.Date >= dt.datetime(2017,1,1)) &
         (h_matches['Team 1 ID'].isin(h_teams_filt.index)) &
         (h_matches['Team 2 ID'].isin(h_teams_filt.index))]
sample_valid_set = h_matches.sample(1000, replace=False, random_state=200)
sample_valid_set['Winner'] = sample_valid_set['Team 1 Score'] >= sample_valid_set['Team 2 Score']

In [6]:
sample_valid_set = pd.read_csv('sample_valid_set.csv')
sample_valid_set = sample_valid_set[['Date', 'Team 1 ID', 'Team 2 ID', 'Map', 'Winner']]

Generate Predictions


In [9]:
trace['rating']


Out[9]:
array([[ 1.580634  ,  0.71641272,  0.42534006, ..., -1.01790679,
         1.23343289, -0.56411511],
       [ 1.45174956,  0.69875753,  0.52441144, ..., -0.46399644,
         0.53907162,  0.59899414],
       [ 1.2747215 ,  1.09224677,  0.37941131, ..., -1.40974236,
         0.21294175, -0.51318467],
       ..., 
       [ 1.56102753,  0.65489405,  0.40283862, ..., -0.98159516,
         1.15702963,  0.00461877],
       [ 1.44642639,  0.75952077,  0.70230359, ..., -0.51775032,
         0.42669931,  0.19037142],
       [ 1.38919735,  0.84982759,  0.84162086, ..., -0.08346162,
         1.40910721,  0.46087247]])

In [10]:
def sig(x):
    return 1 / (1 + np.exp(-x))

from tqdm import tqdm_notebook

valid_set = []
t_rating = trace['rating']
t_ratingmap = trace['rating | map']
t_alpha = trace['alpha']
for i,v in tqdm_notebook(sample_valid_set.iterrows()):
    t1_ind = np.where(teams == v['Team 1 ID'])[0][0]; t2_ind = np.where(teams == v['Team 2 ID'])[0][0];
    m_ind = np.where(maps == v['Map'])[0][0]
    
    trace_1 = t_rating[:,t1_ind]; trace_2 = t_rating[:,t2_ind]
    diff = trace_1-trace_2
    p_wl = 0.5*np.tanh(t_alpha*diff)+0.5
    #wr_uncond = p_wl.mean()
    wr_uncond = np.mean(p_wl)
    
    trace_1 = t_ratingmap[:,m_ind,t1_ind]; trace_2 = t_ratingmap[:,m_ind,t2_ind]
    diff = trace_1-trace_2
    p_wl = 0.5*np.tanh(t_alpha*diff)+0.5
    #wr_cond = p_wl.mean()
    wr_cond = np.mean(p_wl)
    
    valid_set.append([int(v['Winner']), wr_uncond, wr_cond])
validation = np.array(valid_set)




In [19]:
from sklearn.metrics import roc_curve, confusion_matrix, log_loss
def print_validation_results(validation):
    print('Log Loss U: %.5f' % log_loss(validation[:,0], validation[:,1]))
    print('Log Loss C: %.5f' % log_loss(validation[:,0], validation[:,2]))

    fpr, tpr, tresh = roc_curve(validation[:,0], validation[:,1])
    fig, ax = plt.subplots(figsize=(8, 6))
    ax.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve - U')
    ax.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    fpr, tpr, tresh = roc_curve(validation[:,0], validation[:,2])
    ax.plot(fpr, tpr, color='orangered', lw=2, label='ROC curve - C')
    ax.legend(loc="lower right")

    validation_binary = validation.copy()
    validation_binary[:,1] = (validation_binary[:,1] > 0.5).astype(int)
    validation_binary[:,2] = (validation_binary[:,2] > 0.5).astype(int)

    cm = confusion_matrix(validation_binary[:,0], validation_binary[:,1])
    print("CM Unconditional")
    print(cm.astype('float') / cm.sum(axis=1)[:, np.newaxis])
    cm = confusion_matrix(validation_binary[:,0], validation_binary[:,2])
    print("CM Conditional")
    print(cm.astype('float') / cm.sum(axis=1)[:, np.newaxis])

    bins = np.vstack([np.arange(0,1,0.1), np.arange(0.1,1.1,0.1)]).T
    binned = {'U': [], 'C': []}
    for r in bins:
        binned['U'].append(validation[(validation[:,1] >= r[0])&(validation[:,1] < r[1]),0].mean())
        binned['C'].append(validation[(validation[:,2] >= r[0])&(validation[:,2] < r[1]),0].mean())

    fig, ax = plt.subplots(figsize=(8, 6))
    ax.plot(bins.sum(axis=1)/2,binned['U'], lw=2, label='U')
    ax.plot(bins.sum(axis=1)/2,binned['C'], lw=2, label='C')
    ax.plot(bins.sum(axis=1)/2,bins.sum(axis=1)/2, lw=2, linestyle='--')
    ax.legend(loc="lower right")

In [7]:



Log Loss U: 0.63267
Log Loss C: 0.57636
CM Unconditional
[[ 0.61440678  0.38559322]
 [ 0.31628788  0.68371212]]
CM Conditional
[[ 0.65677966  0.34322034]
 [ 0.30681818  0.69318182]]
Out[7]:
<matplotlib.legend.Legend at 0x1f3559facf8>

In [20]:
print_validation_results(validation)


Log Loss U: 0.63456
Log Loss C: 0.58294
CM Unconditional
[[ 0.62096774  0.37903226]
 [ 0.32539683  0.67460317]]
CM Conditional
[[ 0.63709677  0.36290323]
 [ 0.29166667  0.70833333]]

In [32]:
from scipy.stats import beta
print("rand guessing")
rand_guess = beta.rvs(a=10, b=10, size=(validation.shape[0],2))
validation[:,[1,2]] = rand_guess
print_validation_results(validation)


rand guessing
Log Loss U: 0.72371
Log Loss C: 0.71387
CM Unconditional
[[ 0.52419355  0.47580645]
 [ 0.49007937  0.50992063]]
CM Conditional
[[ 0.51814516  0.48185484]
 [ 0.49404762  0.50595238]]
C:\Users\kevin.pei\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel_launcher.py:28: RuntimeWarning: Mean of empty slice.
C:\Users\kevin.pei\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel_launcher.py:29: RuntimeWarning: Mean of empty slice.

In [ ]:
sample_valid_set.reset_index(drop=True).to_csv('sample_valid_set.csv',index=False)

In [ ]:


In [ ]: