In [2]:
import requests
import pandas as pd
import numpy as np
import pymc3 as pm
import seaborn as sns
import datetime as dt
import matplotlib.pyplot as plt
from scipy.stats import norm
from spcl_case import *
plt.style.use('fivethirtyeight')
%matplotlib inline
In [4]:
TEAM_SET = 'all'
teams = np.load('saved_model/'+TEAM_SET+'/teams.npy')
maps = np.load('saved_model/'+TEAM_SET+'/maps.npy')
#filt = np.load('saved_model/'+TEAM_SET+'/filter_teams.npy')
filt = np.load('saved_model/eslpl/filter_teams.npy')
h_teams = pd.read_csv('hltv_csv/teams_w_ranking.csv')
h_teams = fix_teams(h_teams.set_index('ID').loc[teams])
h_teams_filt = h_teams[h_teams.Name.isin(filt)]
rating_model = prep_pymc_model(len(teams), len(maps))
trace = pm.backends.text.load('saved_model/'+TEAM_SET+'/trace', model=rating_model)
In [4]:
h_matches = pd.read_csv('hltv_csv/matchResults.csv').set_index('Match ID')
h_matches['Date'] = pd.to_datetime(h_matches.Date)
h_matches = h_matches[(h_matches.Date >= dt.datetime(2017,1,1)) &
(h_matches['Team 1 ID'].isin(h_teams_filt.index)) &
(h_matches['Team 2 ID'].isin(h_teams_filt.index))]
sample_valid_set = h_matches.sample(1000, replace=False, random_state=200)
sample_valid_set['Winner'] = sample_valid_set['Team 1 Score'] >= sample_valid_set['Team 2 Score']
In [6]:
sample_valid_set = pd.read_csv('sample_valid_set.csv')
sample_valid_set = sample_valid_set[['Date', 'Team 1 ID', 'Team 2 ID', 'Map', 'Winner']]
In [9]:
trace['rating']
Out[9]:
In [10]:
def sig(x):
return 1 / (1 + np.exp(-x))
from tqdm import tqdm_notebook
valid_set = []
t_rating = trace['rating']
t_ratingmap = trace['rating | map']
t_alpha = trace['alpha']
for i,v in tqdm_notebook(sample_valid_set.iterrows()):
t1_ind = np.where(teams == v['Team 1 ID'])[0][0]; t2_ind = np.where(teams == v['Team 2 ID'])[0][0];
m_ind = np.where(maps == v['Map'])[0][0]
trace_1 = t_rating[:,t1_ind]; trace_2 = t_rating[:,t2_ind]
diff = trace_1-trace_2
p_wl = 0.5*np.tanh(t_alpha*diff)+0.5
#wr_uncond = p_wl.mean()
wr_uncond = np.mean(p_wl)
trace_1 = t_ratingmap[:,m_ind,t1_ind]; trace_2 = t_ratingmap[:,m_ind,t2_ind]
diff = trace_1-trace_2
p_wl = 0.5*np.tanh(t_alpha*diff)+0.5
#wr_cond = p_wl.mean()
wr_cond = np.mean(p_wl)
valid_set.append([int(v['Winner']), wr_uncond, wr_cond])
validation = np.array(valid_set)
In [19]:
from sklearn.metrics import roc_curve, confusion_matrix, log_loss
def print_validation_results(validation):
print('Log Loss U: %.5f' % log_loss(validation[:,0], validation[:,1]))
print('Log Loss C: %.5f' % log_loss(validation[:,0], validation[:,2]))
fpr, tpr, tresh = roc_curve(validation[:,0], validation[:,1])
fig, ax = plt.subplots(figsize=(8, 6))
ax.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve - U')
ax.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
fpr, tpr, tresh = roc_curve(validation[:,0], validation[:,2])
ax.plot(fpr, tpr, color='orangered', lw=2, label='ROC curve - C')
ax.legend(loc="lower right")
validation_binary = validation.copy()
validation_binary[:,1] = (validation_binary[:,1] > 0.5).astype(int)
validation_binary[:,2] = (validation_binary[:,2] > 0.5).astype(int)
cm = confusion_matrix(validation_binary[:,0], validation_binary[:,1])
print("CM Unconditional")
print(cm.astype('float') / cm.sum(axis=1)[:, np.newaxis])
cm = confusion_matrix(validation_binary[:,0], validation_binary[:,2])
print("CM Conditional")
print(cm.astype('float') / cm.sum(axis=1)[:, np.newaxis])
bins = np.vstack([np.arange(0,1,0.1), np.arange(0.1,1.1,0.1)]).T
binned = {'U': [], 'C': []}
for r in bins:
binned['U'].append(validation[(validation[:,1] >= r[0])&(validation[:,1] < r[1]),0].mean())
binned['C'].append(validation[(validation[:,2] >= r[0])&(validation[:,2] < r[1]),0].mean())
fig, ax = plt.subplots(figsize=(8, 6))
ax.plot(bins.sum(axis=1)/2,binned['U'], lw=2, label='U')
ax.plot(bins.sum(axis=1)/2,binned['C'], lw=2, label='C')
ax.plot(bins.sum(axis=1)/2,bins.sum(axis=1)/2, lw=2, linestyle='--')
ax.legend(loc="lower right")
In [7]:
Out[7]:
In [20]:
print_validation_results(validation)
In [32]:
from scipy.stats import beta
print("rand guessing")
rand_guess = beta.rvs(a=10, b=10, size=(validation.shape[0],2))
validation[:,[1,2]] = rand_guess
print_validation_results(validation)
In [ ]:
sample_valid_set.reset_index(drop=True).to_csv('sample_valid_set.csv',index=False)
In [ ]:
In [ ]: