In [1]:
%matplotlib inline
In [2]:
import os
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from dota.api import DetailsResponse
In [3]:
store = Path(os.path.expanduser('~/sandbox/dota/data/pro/pro.h5'))
In [4]:
with pd.get_store(str(store)) as s:
df = s.select('drs')
In [5]:
df['net_worth'] = df.gold + df.gold_spent
df = df.sort(['match_id', 'team', 'net_worth'])
We can estimate the Gini coefficient $G$ by $G = \frac{\sum_{i=1}^n (2i - n - 1)x_i^T}{n^2 \mu}$.
In [8]:
def gini(team):
n = team.shape[0]
mu = team.net_worth.mean()
num = np.arange(2 - n - 1, n, 2).dot(team.net_worth.values)
denom = n ** 2 * mu
return num / denom
In [9]:
gini(df.iloc[:5])
Out[9]:
In [10]:
dota_gini = df.groupby(['match_id', 'team']).apply(gini)
In [11]:
import pandas.io.wb as wb
In [12]:
countries = wb.get_countries()
world = wb.download(country=countries, indicator=['SI.POV.GINI', 'NY.GDP.MKTP.KD']).dropna()
world = world.rename(columns={'SI.POV.GINI': 'gini', 'NY.GDP.MKTP.KD': 'gdp'})
world['gini'] = world['gini'] / 100
In [13]:
fig, ax = plt.subplots()
sns.kdeplot(world.gini, shade=True, label='World')
sns.kdeplot(dota_gini, shade=True, label='DOTA')
Out[13]:
In [14]:
g = sns.jointplot("gini", "gdp", data=world, kind="reg",
size=7)
In [15]:
win = df.groupby(['match_id', 'team'])['win'].apply(lambda x: x.all())
by_win = pd.concat([dota_gini, win], axis=1, keys=['gini', 'win'])
In [16]:
# greed is good
gr = sns.FacetGrid(data=by_win, hue='win', aspect=2.5)
gr.map(sns.kdeplot, 'gini', shade=True)
plt.legend()
Out[16]:
In [17]:
sns.lmplot("gini", "win", by_win, logistic=True, y_jitter=.05)
Out[17]:
In [18]:
sns.lmplot("gini", "win", by_win, logistic=True, x_bins=10, truncate=True);
In [19]:
df['percentile'] = df.groupby(['match_id', 'team'])['net_worth'].apply(lambda x: x / x.sum())
df['nw_rank'] = df.groupby(['match_id', 'team'])['net_worth'].rank()
df['nw_rank'] = df.nw_rank.round() # ties
In [20]:
df.plot(kind='scatter', x='kills', y='net_worth', color='k', alpha=.4)
Out[20]:
In [21]:
df.plot(kind='scatter', x='nw_rank', y='net_worth', color='k', alpha=.4)
Out[21]:
In [22]:
sns.factorplot("nw_rank", "net_worth", data=df, palette="PuBu_d", join=False)
Out[22]:
In [23]:
ax = df.plot(kind='hexbin', x='percentile', y='net_worth', cmap=plt.cm.PuBu_r)
ax.set_xlim(0, .5)
ax.set_ylim(0, 40000)
Out[23]:
In [24]:
by_win['gdp'] = df.groupby(['match_id', 'team'])['net_worth'].sum()
g = sns.jointplot("gdp", "gini", data=by_win, kind="reg",
size=7)
In [25]:
sns.lmplot("gdp", "gini", data=by_win, hue="win")
Out[25]:
In [26]:
# more measures of inequality:
stds = df.groupby(['match_id', 'team'])[['kills', 'deaths', 'assists', 'last_hits', 'gold']].std()
by_win = by_win.merge(stds, left_index=True, right_index=True)
In [27]:
import statsmodels.api as sm
In [28]:
by_win['win_int'] = by_win.win.astype(int)
mod = sm.Logit.from_formula('win_int ~ gini + kills + deaths + assists + last_hits + gold', by_win)
res = mod.fit()
res.summary()
Out[28]:
In [29]:
sns.lmplot("last_hits", "win", by_win, logistic=True, y_jitter=.025)
Out[29]:
In [30]:
g = sns.FacetGrid(data=by_win, hue="win", aspect=3)
g.map(sns.kdeplot, "last_hits", shade=True)
plt.legend()
Out[30]:
In [52]:
# What if the losing team are all just poor?
cols = [['kills', 'deaths', 'assists', 'last_hits', 'gold']]
g = df.groupby(['match_id', 'team'])
agged = g[cols].agg(['sum', 'mean', 'std'])
agged.columns = ['_'.join(y) for y in x.columns.tolist()]
agged.head()
Out[52]:
In [59]:
by_win = by_win.merge(agged, left_index=True, right_index=True)
In [62]:
formula = ('win_int ~ gini + gdp + kills_sum + kills_std '
'+ assists_sum + assists_std + deaths_sum + deaths_std '
'+ last_hits_sum + last_hits_std')
mod = sm.Logit.from_formula(formula, data=by_win)
res = mod.fit()
res.summary()
Out[62]:
In [63]:
sns.lmplot("kills_mean", "win", by_win, logistic=True, y_jitter=.025)
Out[63]:
In [ ]: