In [1]:

%matplotlib inline




In [2]:

import os
from pathlib import Path

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from dota.api import DetailsResponse




In [3]:

store = Path(os.path.expanduser('~/sandbox/dota/data/pro/pro.h5'))




In [4]:

with pd.get_store(str(store)) as s:
df = s.select('drs')




In [5]:

df['net_worth'] = df.gold + df.gold_spent
df = df.sort(['match_id', 'team', 'net_worth'])



We can estimate the Gini coefficient $G$ by $G = \frac{\sum_{i=1}^n (2i - n - 1)x_i^T}{n^2 \mu}$.



In [8]:

def gini(team):
n = team.shape[0]
mu = team.net_worth.mean()
num = np.arange(2 - n - 1, n, 2).dot(team.net_worth.values)
denom = n ** 2 * mu
return num / denom




In [9]:

gini(df.iloc[:5])




Out[9]:

0.16709051412020276




In [10]:

dota_gini = df.groupby(['match_id', 'team']).apply(gini)




In [11]:

import pandas.io.wb as wb




In [12]:

countries = wb.get_countries()
world = world.rename(columns={'SI.POV.GINI': 'gini', 'NY.GDP.MKTP.KD': 'gdp'})
world['gini'] = world['gini'] / 100




Invalid ISO-2 codes: adminregion capitalCity incomeLevel iso2c iso3c latitude lendingType longitude name region




In [13]:

fig, ax = plt.subplots()




Out[13]:

<matplotlib.axes._subplots.AxesSubplot at 0x10fb281d0>




In [14]:

g = sns.jointplot("gini", "gdp", data=world, kind="reg",
size=7)







In [15]:

win = df.groupby(['match_id', 'team'])['win'].apply(lambda x: x.all())
by_win = pd.concat([dota_gini, win], axis=1, keys=['gini', 'win'])




In [16]:

# greed is good

gr = sns.FacetGrid(data=by_win, hue='win', aspect=2.5)
plt.legend()




Out[16]:

<matplotlib.legend.Legend at 0x10ca7ce90>




In [17]:

sns.lmplot("gini", "win", by_win, logistic=True, y_jitter=.05)




Out[17]:

<seaborn.axisgrid.FacetGrid at 0x10fe5d250>




In [18]:

sns.lmplot("gini", "win", by_win, logistic=True, x_bins=10, truncate=True);







In [19]:

df['percentile'] = df.groupby(['match_id', 'team'])['net_worth'].apply(lambda x: x / x.sum())
df['nw_rank'] = df.groupby(['match_id', 'team'])['net_worth'].rank()
df['nw_rank'] = df.nw_rank.round()  # ties




In [20]:

df.plot(kind='scatter', x='kills', y='net_worth', color='k', alpha=.4)




Out[20]:

<matplotlib.axes._subplots.AxesSubplot at 0x10cc61590>




In [21]:

df.plot(kind='scatter', x='nw_rank', y='net_worth', color='k', alpha=.4)




Out[21]:

<matplotlib.axes._subplots.AxesSubplot at 0x10c796f50>




In [22]:

sns.factorplot("nw_rank", "net_worth", data=df, palette="PuBu_d", join=False)




Out[22]:

<seaborn.axisgrid.FacetGrid at 0x10d0ec610>




In [23]:

ax = df.plot(kind='hexbin', x='percentile', y='net_worth', cmap=plt.cm.PuBu_r)
ax.set_xlim(0, .5)
ax.set_ylim(0, 40000)




Out[23]:

(0, 40000)




In [24]:

by_win['gdp'] = df.groupby(['match_id', 'team'])['net_worth'].sum()
g = sns.jointplot("gdp", "gini", data=by_win, kind="reg",
size=7)







In [25]:

sns.lmplot("gdp", "gini", data=by_win, hue="win")




Out[25]:

<seaborn.axisgrid.FacetGrid at 0x10c858d10>




In [26]:

# more measures of inequality:

stds = df.groupby(['match_id', 'team'])[['kills', 'deaths', 'assists', 'last_hits', 'gold']].std()
by_win = by_win.merge(stds, left_index=True, right_index=True)




In [27]:

import statsmodels.api as sm




In [28]:

by_win['win_int'] = by_win.win.astype(int)
mod = sm.Logit.from_formula('win_int ~ gini + kills + deaths + assists + last_hits + gold', by_win)
res = mod.fit()
res.summary()




Optimization terminated successfully.
Current function value: 0.339185
Iterations 7

Out[28]:

Logit Regression Results

Dep. Variable:      win_int       No. Observations:     17032

Model:               Logit        Df Residuals:         17025

Method:               MLE         Df Model:                 6

Date:          Fri, 11 Apr 2014   Pseudo R-squ.:       0.5107

Time:              09:05:12       Log-Likelihood:      -5777.0

converged:           True         LL-Null:             -11806.

LLR p-value:          0.000

coef     std err      z      P>|z| [95.0% Conf. Int.]

Intercept    -1.2898     0.107   -12.000  0.000    -1.500    -1.079

gini         -9.4976     0.632   -15.024  0.000   -10.737    -8.259

kills         0.6126     0.021    28.885  0.000     0.571     0.654

deaths       -1.0672     0.035   -30.704  0.000    -1.135    -0.999

assists       0.3768     0.023    16.604  0.000     0.332     0.421

last_hits    -0.0106     0.001   -13.275  0.000    -0.012    -0.009

gold          0.0032  6.13e-05    51.546  0.000     0.003     0.003




In [29]:

sns.lmplot("last_hits", "win", by_win, logistic=True, y_jitter=.025)




Out[29]:

<seaborn.axisgrid.FacetGrid at 0x10cc735d0>




In [30]:

g = sns.FacetGrid(data=by_win, hue="win", aspect=3)
plt.legend()




Out[30]:

<matplotlib.legend.Legend at 0x10f57f190>




In [52]:

# What if the losing team are all just poor?
cols = [['kills', 'deaths', 'assists', 'last_hits', 'gold']]
g = df.groupby(['match_id', 'team'])
agged = g[cols].agg(['sum', 'mean', 'std'])
agged.columns = ['_'.join(y) for y in x.columns.tolist()]




Out[52]:

kills_sum
kills_mean
kills_std
deaths_sum
deaths_mean
deaths_std
assists_sum
assists_mean
assists_std
last_hits_sum
last_hits_mean
last_hits_std
gold_sum
gold_mean
gold_std

match_id
team

10963
Dire
9
1.8
1.303840
22
4.4
1.140175
20
4.0
1.224745
433
86.6
54.975449
3913
782.6
574.060363

21
4.2
1.095445
9
1.8
1.643168
53
10.6
2.792848
621
124.2
61.961278
16520
3304.0
335.206653

10967
Dire
9
1.8
1.303840
30
6.0
1.000000
11
2.2
1.303840
436
87.2
50.395436
1561
312.2
181.988186

30
6.0
9.082951
9
1.8
1.303840
74
14.8
6.379655
509
101.8
83.646279
11215
2243.0
1800.871039

10976
Dire
26
5.2
3.492850
19
3.8
2.387467
67
13.4
3.049590
760
152.0
104.252098
18564
3712.8
1493.578823

5 rows × 15 columns




In [59]:

by_win = by_win.merge(agged, left_index=True, right_index=True)




In [62]:

formula = ('win_int ~ gini + gdp + kills_sum + kills_std '
'+ assists_sum + assists_std + deaths_sum + deaths_std '
'+ last_hits_sum + last_hits_std')

mod = sm.Logit.from_formula(formula, data=by_win)
res = mod.fit()
res.summary()




Optimization terminated successfully.
Current function value: 0.146222
Iterations 8

Out[62]:

Logit Regression Results

Dep. Variable:      win_int       No. Observations:     17032

Model:               Logit        Df Residuals:         17021

Method:               MLE         Df Model:                10

Date:          Fri, 11 Apr 2014   Pseudo R-squ.:       0.7890

Time:              09:20:58       Log-Likelihood:      -2490.4

converged:           True         LL-Null:             -11806.

LLR p-value:          0.000

coef     std err      z      P>|z| [95.0% Conf. Int.]

Intercept        -1.9841     0.207    -9.604  0.000    -2.389    -1.579

gini              1.8566     1.099     1.690  0.091    -0.297     4.010

gdp               0.0001  5.02e-06    24.000  0.000     0.000     0.000

kills_sum         0.1219     0.012    10.579  0.000     0.099     0.144

kills_std         0.1718     0.037     4.664  0.000     0.100     0.244

assists_sum       0.0125     0.004     2.831  0.005     0.004     0.021

assists_std      -0.0159     0.038    -0.415  0.678    -0.091     0.059

deaths_sum       -0.2513     0.006   -44.479  0.000    -0.262    -0.240

deaths_std        0.2713     0.058     4.651  0.000     0.157     0.386

last_hits_sum    -0.0073     0.000   -17.414  0.000    -0.008    -0.006

last_hits_std     0.0008     0.002     0.413  0.680    -0.003     0.005




In [63]:

sns.lmplot("kills_mean", "win", by_win, logistic=True, y_jitter=.025)




Out[63]:

<seaborn.axisgrid.FacetGrid at 0x11e2048d0>




In [ ]: