In [2]:
import requests
import pandas as pd
import numpy as np
import pymc3 as pm
import seaborn as sns
import datetime as dt
import matplotlib.pyplot as plt
from scipy.stats import norm
from wl_model.spcl_case import *
plt.style.use('fivethirtyeight')
%matplotlib inline
In [20]:
TEAM_SET = 'all_player_sc'
players = np.load('wl_model/saved_model/'+TEAM_SET+'/players.npy')
maps = np.load('wl_model/saved_model/'+TEAM_SET+'/maps.npy')
filt = np.load('wl_model/saved_model/eslpl/filter_teams.npy')
rating_model = prep_pymc_model(len(players), len(maps))
trace = pm.backends.text.load('wl_model/saved_model/'+TEAM_SET+'/trace', model=rating_model)
In [4]:
h_teams = pd.read_csv('wl_model/hltv_csv/teams_w_ranking.csv')
h_teams = fix_teams(h_teams.set_index('ID'))
h_teams_filt = h_teams[h_teams.Name.isin(filt)]
h_players = pd.read_csv('wl_model/hltv_csv/matchLineups.csv').set_index('Match ID')
h_player_names = pd.read_csv('wl_model/hltv_csv/players.csv').set_index('ID')
In [5]:
h_matches = pd.read_csv('wl_model/hltv_csv/matchResults.csv').set_index('Match ID')
h_matches['Date'] = pd.to_datetime(h_matches.Date)
h_matches = h_matches[(h_matches.Date >= dt.datetime(2017,1,1)) &
(h_matches['Team 1 ID'].isin(h_teams_filt.index)) &
(h_matches['Team 2 ID'].isin(h_teams_filt.index))]
sample_valid_set = h_matches.sample(1000, replace=False, random_state=200)
sample_valid_set['Winner'] = sample_valid_set['Team 1 Score'] >= sample_valid_set['Team 2 Score']
In [5]:
sample_valid_set = pd.read_csv('sample_valid_set.csv')
sample_valid_set = sample_valid_set[['Date', 'Team 1 ID', 'Team 2 ID', 'Map', 'Winner']]
In [6]:
sample_valid_set = sample_valid_set.join(h_players)
In [7]:
sample_valid_set.head()
Out[7]:
In [30]:
def sig(x):
return 1 / (1 + np.exp(-x))
from tqdm import tqdm_notebook
valid_set = []
t_rating = trace['rating']
t_ratingmap = trace['rating | map']
t_alpha = 0.5
t_sigma = trace['sigma']
n_players = len(players)
player_col_names = ['Team 1 Player 1', 'Team 1 Player 2', 'Team 1 Player 3', 'Team 1 Player 4', 'Team 1 Player 5',
'Team 2 Player 1', 'Team 2 Player 2', 'Team 2 Player 3', 'Team 2 Player 4', 'Team 2 Player 5',]
for i,v in tqdm_notebook(sample_valid_set.iterrows()):
t1_ind =np.searchsorted(players, v[player_col_names[:5]]); t2_ind = np.searchsorted(players, v[player_col_names[-5:]]);
m_ind = np.where(maps == v['Map'])[0][0]
trace_1 = np.sum(t_rating[:,t1_ind], axis=1); trace_2 = np.sum(t_rating[:,t2_ind], axis=1)
diff = trace_1-trace_2
p_sc = 16*np.tanh(t_alpha*diff)
wr_uncond = np.percentile(1.-norm.cdf(0, loc=p_sc, scale=t_sigma), 45)
p_wl = 0.5*np.tanh(diff)+0.5#sig(diff)
#wr_uncond = p_wl.mean()
#wr_uncond = np.percentile(p_wl, 40)
trace_1 = np.sum(t_ratingmap[:,m_ind,t1_ind], axis=1); trace_2 = np.sum(t_ratingmap[:,m_ind,t2_ind], axis=1)
diff = trace_1-trace_2
p_sc = 16*np.tanh(t_alpha*diff)
wr_cond = np.percentile(1.-norm.cdf(0, loc=p_sc, scale=t_sigma), 45)
p_wl = 0.5*np.tanh(diff)+0.5#sig(diff)
#wr_cond = p_wl.mean()
#wr_cond = np.percentile(p_wl, 40)
valid_set.append([int(v['Winner']), wr_uncond, wr_cond])
validation = np.array(valid_set)
In [31]:
plt.hist(validation[:,[1,2]])
Out[31]:
In [32]:
from sklearn.metrics import roc_curve, confusion_matrix, log_loss
def print_validation_results(validation):
print('Log Loss U: %.5f' % log_loss(validation[:,0], validation[:,1]))
print('Log Loss C: %.5f' % log_loss(validation[:,0], validation[:,2]))
fpr, tpr, tresh = roc_curve(validation[:,0], validation[:,1])
fig, ax = plt.subplots(figsize=(8, 6))
ax.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve - U')
ax.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
fpr, tpr, tresh = roc_curve(validation[:,0], validation[:,2])
ax.plot(fpr, tpr, color='orangered', lw=2, label='ROC curve - C')
ax.legend(loc="lower right")
validation_binary = validation.copy()
validation_binary[:,1] = (validation_binary[:,1] > 0.5).astype(int)
validation_binary[:,2] = (validation_binary[:,2] > 0.5).astype(int)
cm = confusion_matrix(validation_binary[:,0], validation_binary[:,1] > 0.5)
print("CM Unconditional")
print(cm.astype('float') / cm.sum(axis=1)[:, np.newaxis])
cm = confusion_matrix(validation_binary[:,0], validation_binary[:,2] > 0.5)
print("CM Conditional")
print(cm.astype('float') / cm.sum(axis=1)[:, np.newaxis])
bins = np.vstack([np.arange(0,1,0.1), np.arange(0.1,1.1,0.1)]).T
binned = {'U': [], 'C': []}
for r in bins:
binned['U'].append(validation[(validation[:,1] >= r[0])&(validation[:,1] < r[1]),0].mean())
binned['C'].append(validation[(validation[:,2] >= r[0])&(validation[:,2] < r[1]),0].mean())
fig, ax = plt.subplots(figsize=(8, 6))
ax.plot(bins.sum(axis=1)/2,binned['U'], lw=2, label='U')
ax.plot(bins.sum(axis=1)/2,binned['C'], lw=2, label='C')
ax.plot(bins.sum(axis=1)/2,bins.sum(axis=1)/2, lw=2, linestyle='--')
ax.legend(loc="lower right")
In [33]:
print_validation_results(validation) # normal score diff
In [15]:
print_validation_results(validation) # sigmoid binary
In [28]:
from scipy.stats import beta
print("rand guessing")
rand_guess = beta.rvs(a=10, b=10, size=(validation.shape[0],2))
validation[:,[1,2]] = rand_guess
print_validation_results(validation)
In [ ]:
sample_valid_set.reset_index(drop=True).to_csv('sample_valid_set.csv',index=False)
In [ ]:
In [ ]: