In [1]:
%matplotlib inline

In [2]:
import os
from pathlib import Path

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from dota.api import DetailsResponse

In [3]:
store = Path(os.path.expanduser('~/sandbox/dota/data/pro/pro.h5'))

In [4]:
with pd.get_store(str(store)) as s:
    df = s.select('drs')

In [5]:
df['net_worth'] = df.gold + df.gold_spent
df = df.sort(['match_id', 'team', 'net_worth'])

We can estimate the Gini coefficient $G$ by $G = \frac{\sum_{i=1}^n (2i - n - 1)x_i^T}{n^2 \mu}$.


In [8]:
def gini(team):
    n = team.shape[0]
    mu = team.net_worth.mean()
    num = np.arange(2 - n - 1, n, 2).dot(team.net_worth.values)
    denom = n ** 2 * mu
    return num / denom

In [9]:
gini(df.iloc[:5])


Out[9]:
0.16709051412020276

In [10]:
dota_gini = df.groupby(['match_id', 'team']).apply(gini)

In [11]:
import pandas.io.wb as wb

In [12]:
countries = wb.get_countries()
world = wb.download(country=countries, indicator=['SI.POV.GINI', 'NY.GDP.MKTP.KD']).dropna()
world = world.rename(columns={'SI.POV.GINI': 'gini', 'NY.GDP.MKTP.KD': 'gdp'})
world['gini'] = world['gini'] / 100


Invalid ISO-2 codes: adminregion capitalCity incomeLevel iso2c iso3c latitude lendingType longitude name region

In [13]:
fig, ax = plt.subplots()
sns.kdeplot(world.gini, shade=True, label='World')
sns.kdeplot(dota_gini, shade=True, label='DOTA')


Out[13]:
<matplotlib.axes._subplots.AxesSubplot at 0x10fb281d0>

In [14]:
g = sns.jointplot("gini", "gdp", data=world, kind="reg",
                  size=7)



In [15]:
win = df.groupby(['match_id', 'team'])['win'].apply(lambda x: x.all())
by_win = pd.concat([dota_gini, win], axis=1, keys=['gini', 'win'])

In [16]:
# greed is good

gr = sns.FacetGrid(data=by_win, hue='win', aspect=2.5)
gr.map(sns.kdeplot, 'gini', shade=True)
plt.legend()


Out[16]:
<matplotlib.legend.Legend at 0x10ca7ce90>

In [17]:
sns.lmplot("gini", "win", by_win, logistic=True, y_jitter=.05)


Out[17]:
<seaborn.axisgrid.FacetGrid at 0x10fe5d250>

In [18]:
sns.lmplot("gini", "win", by_win, logistic=True, x_bins=10, truncate=True);



In [19]:
df['percentile'] = df.groupby(['match_id', 'team'])['net_worth'].apply(lambda x: x / x.sum())
df['nw_rank'] = df.groupby(['match_id', 'team'])['net_worth'].rank()
df['nw_rank'] = df.nw_rank.round()  # ties

In [20]:
df.plot(kind='scatter', x='kills', y='net_worth', color='k', alpha=.4)


Out[20]:
<matplotlib.axes._subplots.AxesSubplot at 0x10cc61590>

In [21]:
df.plot(kind='scatter', x='nw_rank', y='net_worth', color='k', alpha=.4)


Out[21]:
<matplotlib.axes._subplots.AxesSubplot at 0x10c796f50>

In [22]:
sns.factorplot("nw_rank", "net_worth", data=df, palette="PuBu_d", join=False)


Out[22]:
<seaborn.axisgrid.FacetGrid at 0x10d0ec610>

In [23]:
ax = df.plot(kind='hexbin', x='percentile', y='net_worth', cmap=plt.cm.PuBu_r)
ax.set_xlim(0, .5)
ax.set_ylim(0, 40000)


Out[23]:
(0, 40000)

In [24]:
by_win['gdp'] = df.groupby(['match_id', 'team'])['net_worth'].sum()
g = sns.jointplot("gdp", "gini", data=by_win, kind="reg",
                  size=7)



In [25]:
sns.lmplot("gdp", "gini", data=by_win, hue="win")


Out[25]:
<seaborn.axisgrid.FacetGrid at 0x10c858d10>

In [26]:
# more measures of inequality:

stds = df.groupby(['match_id', 'team'])[['kills', 'deaths', 'assists', 'last_hits', 'gold']].std()
by_win = by_win.merge(stds, left_index=True, right_index=True)

In [27]:
import statsmodels.api as sm

In [28]:
by_win['win_int'] = by_win.win.astype(int)
mod = sm.Logit.from_formula('win_int ~ gini + kills + deaths + assists + last_hits + gold', by_win)
res = mod.fit()
res.summary()


Optimization terminated successfully.
         Current function value: 0.339185
         Iterations 7
Out[28]:
Logit Regression Results
Dep. Variable: win_int No. Observations: 17032
Model: Logit Df Residuals: 17025
Method: MLE Df Model: 6
Date: Fri, 11 Apr 2014 Pseudo R-squ.: 0.5107
Time: 09:05:12 Log-Likelihood: -5777.0
converged: True LL-Null: -11806.
LLR p-value: 0.000
coef std err z P>|z| [95.0% Conf. Int.]
Intercept -1.2898 0.107 -12.000 0.000 -1.500 -1.079
gini -9.4976 0.632 -15.024 0.000 -10.737 -8.259
kills 0.6126 0.021 28.885 0.000 0.571 0.654
deaths -1.0672 0.035 -30.704 0.000 -1.135 -0.999
assists 0.3768 0.023 16.604 0.000 0.332 0.421
last_hits -0.0106 0.001 -13.275 0.000 -0.012 -0.009
gold 0.0032 6.13e-05 51.546 0.000 0.003 0.003

In [29]:
sns.lmplot("last_hits", "win", by_win, logistic=True, y_jitter=.025)


Out[29]:
<seaborn.axisgrid.FacetGrid at 0x10cc735d0>

In [30]:
g = sns.FacetGrid(data=by_win, hue="win", aspect=3)
g.map(sns.kdeplot, "last_hits", shade=True)
plt.legend()


Out[30]:
<matplotlib.legend.Legend at 0x10f57f190>

In [52]:
# What if the losing team are all just poor?
cols = [['kills', 'deaths', 'assists', 'last_hits', 'gold']]
g = df.groupby(['match_id', 'team'])
agged = g[cols].agg(['sum', 'mean', 'std'])
agged.columns = ['_'.join(y) for y in x.columns.tolist()]
agged.head()


Out[52]:
kills_sum kills_mean kills_std deaths_sum deaths_mean deaths_std assists_sum assists_mean assists_std last_hits_sum last_hits_mean last_hits_std gold_sum gold_mean gold_std
match_id team
10963 Dire 9 1.8 1.303840 22 4.4 1.140175 20 4.0 1.224745 433 86.6 54.975449 3913 782.6 574.060363
Radiant 21 4.2 1.095445 9 1.8 1.643168 53 10.6 2.792848 621 124.2 61.961278 16520 3304.0 335.206653
10967 Dire 9 1.8 1.303840 30 6.0 1.000000 11 2.2 1.303840 436 87.2 50.395436 1561 312.2 181.988186
Radiant 30 6.0 9.082951 9 1.8 1.303840 74 14.8 6.379655 509 101.8 83.646279 11215 2243.0 1800.871039
10976 Dire 26 5.2 3.492850 19 3.8 2.387467 67 13.4 3.049590 760 152.0 104.252098 18564 3712.8 1493.578823

5 rows × 15 columns


In [59]:
by_win = by_win.merge(agged, left_index=True, right_index=True)

In [62]:
formula = ('win_int ~ gini + gdp + kills_sum + kills_std '
           '+ assists_sum + assists_std + deaths_sum + deaths_std '
           '+ last_hits_sum + last_hits_std')

mod = sm.Logit.from_formula(formula, data=by_win)
res = mod.fit()
res.summary()


Optimization terminated successfully.
         Current function value: 0.146222
         Iterations 8
Out[62]:
Logit Regression Results
Dep. Variable: win_int No. Observations: 17032
Model: Logit Df Residuals: 17021
Method: MLE Df Model: 10
Date: Fri, 11 Apr 2014 Pseudo R-squ.: 0.7890
Time: 09:20:58 Log-Likelihood: -2490.4
converged: True LL-Null: -11806.
LLR p-value: 0.000
coef std err z P>|z| [95.0% Conf. Int.]
Intercept -1.9841 0.207 -9.604 0.000 -2.389 -1.579
gini 1.8566 1.099 1.690 0.091 -0.297 4.010
gdp 0.0001 5.02e-06 24.000 0.000 0.000 0.000
kills_sum 0.1219 0.012 10.579 0.000 0.099 0.144
kills_std 0.1718 0.037 4.664 0.000 0.100 0.244
assists_sum 0.0125 0.004 2.831 0.005 0.004 0.021
assists_std -0.0159 0.038 -0.415 0.678 -0.091 0.059
deaths_sum -0.2513 0.006 -44.479 0.000 -0.262 -0.240
deaths_std 0.2713 0.058 4.651 0.000 0.157 0.386
last_hits_sum -0.0073 0.000 -17.414 0.000 -0.008 -0.006
last_hits_std 0.0008 0.002 0.413 0.680 -0.003 0.005

In [63]:
sns.lmplot("kills_mean", "win", by_win, logistic=True, y_jitter=.025)


Out[63]:
<seaborn.axisgrid.FacetGrid at 0x11e2048d0>

In [ ]: