In [2]:
import pandas as pd
import numpy as np
import pymc3 as pm
import seaborn as sns
import datetime as dt
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
%matplotlib inline
In [3]:
h_bp = pd.read_csv('hltv_csv/picksAndBans.csv').set_index('Match ID')
h_matches = pd.read_csv('hltv_csv/matchResults.csv').set_index('Match ID')[['Date', 'Team 1 ID', 'Team 2 ID', 'Map']]
h_matches.columns = ['Date', 'Team 1 ID', 'Team 2 ID', 'Map Played']
h_bp = h_bp.join(h_matches, how='left')
h_bp['Date'] = pd.to_datetime(h_bp['Date'])
h_matches['Date'] = pd.to_datetime(h_matches['Date'])
h_teams = pd.read_csv('hltv_csv/teams.csv').set_index('ID')
In [4]:
train = h_bp[(h_bp.Date < dt.datetime(2017,6,1)) & (h_bp.Date >= dt.datetime(2017,1,1))]
train_matches = h_matches[(h_matches.Date < dt.datetime(2017,6,1)) & (h_matches.Date >= dt.datetime(2017,1,1))]
valid = h_bp[h_bp.Date >= dt.datetime(2017,6,1)]
valid_matches = h_matches[(h_matches.Date >= dt.datetime(2017,6,1))]
In [5]:
filt = np.load('saved_model/eslpl/filter_teams.npy')
team_ids = np.load('saved_model/eslpl/teams.npy')
ht_filt = h_teams.loc[team_ids]
ht_filt = ht_filt[ht_filt.Name.isin(filt)]
In [59]:
def model_mp(train, t1, t2):
tab = train[train['Team'].isin([t1, t2])].groupby(['Team', ' Pick Type', 'Map'])['Date'].count().unstack([' Pick Type', 'Team']).fillna(0)
tab = (tab['picked']/tab['picked'].sum(axis=0)).mean(axis=1)# get average
return (tab/tab.sum(axis=0)) # normalize
def model_mb(train, t1, t2):
tab = train[train['Team'].isin([t1, t2])].groupby(['Team', ' Pick Type', 'Map'])['Date'].count().unstack([' Pick Type', 'Team']).fillna(0)
tab = (tab['removed']/tab['removed'].sum(axis=0)).mean(axis=1)# get average
return (tab/tab.sum(axis=0)) # normalize
def model_mix(train, t1, t2):
tab = train[train['Team'].isin([t1, t2])].groupby(['Team', ' Pick Type', 'Map'])['Date'].count().unstack([' Pick Type', 'Team']).fillna(0)
tab = (tab/tab.sum(axis=0)).mean(level=0,axis=1)
tab['removed'] = (tab['removed']**-1)/(tab['removed']**-1).sum(axis=0)
return tab.mean(axis=1)
def model_played(train, t1, t2):
a = train[train['Team 1 ID'].isin([t1,t2])].groupby(['Team 1 ID', 'Map Played'])['Date'].count()
b = train[train['Team 2 ID'].isin([t1,t2])].groupby(['Team 2 ID', 'Map Played'])['Date'].count()
c = pd.DataFrame([a,b], index=['a','b']).T.fillna(0)
c = (c['a']+c['b']).unstack(level=0).fillna(0)
return (c/c.sum()).mean(axis=1)
In [62]:
err = []
for i in range(len(filt)):
for j in range(i+1,len(filt)):
t1 = ht_filt[ht_filt.Name == filt[i]].index[0]; t2 = ht_filt[ht_filt.Name == filt[j]].index[0]
hup_m = valid_matches[(((valid_matches['Team 1 ID'] == t1)&(valid_matches['Team 2 ID'] == t2)) |
((valid_matches['Team 2 ID'] == t1)&(valid_matches['Team 1 ID'] == t2)))]
if(len(hup_m) >= 8):
predicted = model_played(train_matches, t1, t2)
actual = hup_m.groupby('Map Played')['Date'].count()/hup_m.groupby('Map Played')['Date'].count().sum()
df = pd.DataFrame([predicted, actual], index=['pred', 'y']).fillna(0).T.sort_values('pred', ascending=False)
print('%s vs %s' % (filt[i], filt[j]))
print(df)
err.append(((df['pred']-df['y'])**2).sum())
In [65]:
print(np.mean(err))
In [118]:
err = []
for i,r in valid_matches[valid_matches['Team 1 ID'].isin(ht_filt.index) & valid_matches['Team 2 ID'].isin(ht_filt.index)].iterrows():
t1 = r['Team 1 ID']; t2 = r['Team 2 ID']
t1_name = ht_filt.loc[t1].Name; t2_name = ht_filt.loc[t2].Name;
predicted = model_played(train_matches, t1, t2).sort_values().tail(1).index[0]
#predicted = model_mp(train, t1_name, t2_name).sort_values().tail(1).index[0]
err.append([predicted, r['Map Played']])
err = np.array(err)
In [119]:
from sklearn.metrics import accuracy_score
accuracy_score(err[:,1], err[:,0])
Out[119]:
In [117]:
accuracy_score(err[:,1], err[:,0])
Out[117]:
In [111]:
accuracy_score(err[:,1], err[:,0])
Out[111]:
In [115]:
accuracy_score(err[:,1], err[:,0])
Out[115]:
In [ ]: