notebook.community

Edit and run



In [1]:

    
%matplotlib inline



In [2]:

    
import os
from pathlib import Path

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from dota.api import DetailsResponse



In [3]:

    
store = Path(os.path.expanduser('~/sandbox/dota/data/pro/pro.h5'))



In [4]:

    
with pd.get_store(str(store)) as s:
    df = s.select('drs')



In [5]:

    
df['net_worth'] = df.gold + df.gold_spent
df = df.sort(['match_id', 'team', 'net_worth'])

We can estimate the Gini coefficient $G$ by $G = \frac{\sum_{i=1}^n (2i - n - 1)x_i^T}{n^2 \mu}$.



In [8]:

    
def gini(team):
    n = team.shape[0]
    mu = team.net_worth.mean()
    num = np.arange(2 - n - 1, n, 2).dot(team.net_worth.values)
    denom = n ** 2 * mu
    return num / denom



In [9]:

    
gini(df.iloc[:5])









    Out[9]:





0.16709051412020276



In [10]:

    
dota_gini = df.groupby(['match_id', 'team']).apply(gini)



In [11]:

    
import pandas.io.wb as wb



In [12]:

    
countries = wb.get_countries()
world = wb.download(country=countries, indicator=['SI.POV.GINI', 'NY.GDP.MKTP.KD']).dropna()
world = world.rename(columns={'SI.POV.GINI': 'gini', 'NY.GDP.MKTP.KD': 'gdp'})
world['gini'] = world['gini'] / 100









    



Invalid ISO-2 codes: adminregion capitalCity incomeLevel iso2c iso3c latitude lendingType longitude name region



In [13]:

    
fig, ax = plt.subplots()
sns.kdeplot(world.gini, shade=True, label='World')
sns.kdeplot(dota_gini, shade=True, label='DOTA')









    Out[13]:





<matplotlib.axes._subplots.AxesSubplot at 0x10fb281d0>



In [14]:

    
g = sns.jointplot("gini", "gdp", data=world, kind="reg",
                  size=7)



In [15]:

    
win = df.groupby(['match_id', 'team'])['win'].apply(lambda x: x.all())
by_win = pd.concat([dota_gini, win], axis=1, keys=['gini', 'win'])



In [16]:

    
# greed is good

gr = sns.FacetGrid(data=by_win, hue='win', aspect=2.5)
gr.map(sns.kdeplot, 'gini', shade=True)
plt.legend()









    Out[16]:





<matplotlib.legend.Legend at 0x10ca7ce90>



In [17]:

    
sns.lmplot("gini", "win", by_win, logistic=True, y_jitter=.05)









    Out[17]:





<seaborn.axisgrid.FacetGrid at 0x10fe5d250>



In [18]:

    
sns.lmplot("gini", "win", by_win, logistic=True, x_bins=10, truncate=True);



In [19]:

    
df['percentile'] = df.groupby(['match_id', 'team'])['net_worth'].apply(lambda x: x / x.sum())
df['nw_rank'] = df.groupby(['match_id', 'team'])['net_worth'].rank()
df['nw_rank'] = df.nw_rank.round()  # ties



In [20]:

    
df.plot(kind='scatter', x='kills', y='net_worth', color='k', alpha=.4)









    Out[20]:





<matplotlib.axes._subplots.AxesSubplot at 0x10cc61590>



In [21]:

    
df.plot(kind='scatter', x='nw_rank', y='net_worth', color='k', alpha=.4)









    Out[21]:





<matplotlib.axes._subplots.AxesSubplot at 0x10c796f50>



In [22]:

    
sns.factorplot("nw_rank", "net_worth", data=df, palette="PuBu_d", join=False)









    Out[22]:





<seaborn.axisgrid.FacetGrid at 0x10d0ec610>



In [23]:

    
ax = df.plot(kind='hexbin', x='percentile', y='net_worth', cmap=plt.cm.PuBu_r)
ax.set_xlim(0, .5)
ax.set_ylim(0, 40000)









    Out[23]:





(0, 40000)



In [24]:

    
by_win['gdp'] = df.groupby(['match_id', 'team'])['net_worth'].sum()
g = sns.jointplot("gdp", "gini", data=by_win, kind="reg",
                  size=7)



In [25]:

    
sns.lmplot("gdp", "gini", data=by_win, hue="win")









    Out[25]:





<seaborn.axisgrid.FacetGrid at 0x10c858d10>



In [26]:

    
# more measures of inequality:

stds = df.groupby(['match_id', 'team'])[['kills', 'deaths', 'assists', 'last_hits', 'gold']].std()
by_win = by_win.merge(stds, left_index=True, right_index=True)



In [27]:

    
import statsmodels.api as sm



In [28]:

    
by_win['win_int'] = by_win.win.astype(int)
mod = sm.Logit.from_formula('win_int ~ gini + kills + deaths + assists + last_hits + gold', by_win)
res = mod.fit()
res.summary()









    



Optimization terminated successfully.
         Current function value: 0.339185
         Iterations 7






    Out[28]:





Logit Regression Results

  Dep. Variable:       win_int        No. Observations:     17032 


  Model:                Logit         Df Residuals:         17025 


  Method:                MLE          Df Model:                 6 


  Date:           Fri, 11 Apr 2014    Pseudo R-squ.:       0.5107 


  Time:               09:05:12        Log-Likelihood:      -5777.0


  converged:            True          LL-Null:             -11806.


                                    LLR p-value:          0.000 




               coef      std err       z       P>|z|  [95.0% Conf. Int.] 


  Intercept     -1.2898      0.107    -12.000   0.000     -1.500    -1.079


  gini          -9.4976      0.632    -15.024   0.000    -10.737    -8.259


  kills          0.6126      0.021     28.885   0.000      0.571     0.654


  deaths        -1.0672      0.035    -30.704   0.000     -1.135    -0.999


  assists        0.3768      0.023     16.604   0.000      0.332     0.421


  last_hits     -0.0106      0.001    -13.275   0.000     -0.012    -0.009


  gold           0.0032   6.13e-05     51.546   0.000      0.003     0.003



In [29]:

    
sns.lmplot("last_hits", "win", by_win, logistic=True, y_jitter=.025)









    Out[29]:





<seaborn.axisgrid.FacetGrid at 0x10cc735d0>



In [30]:

    
g = sns.FacetGrid(data=by_win, hue="win", aspect=3)
g.map(sns.kdeplot, "last_hits", shade=True)
plt.legend()









    Out[30]:





<matplotlib.legend.Legend at 0x10f57f190>



In [52]:

    
# What if the losing team are all just poor?
cols = [['kills', 'deaths', 'assists', 'last_hits', 'gold']]
g = df.groupby(['match_id', 'team'])
agged = g[cols].agg(['sum', 'mean', 'std'])
agged.columns = ['_'.join(y) for y in x.columns.tolist()]
agged.head()









    Out[52]:






  
    
      
      
      kills_sum
      kills_mean
      kills_std
      deaths_sum
      deaths_mean
      deaths_std
      assists_sum
      assists_mean
      assists_std
      last_hits_sum
      last_hits_mean
      last_hits_std
      gold_sum
      gold_mean
      gold_std
    
    
      match_id
      team
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      10963
      Dire
        9
       1.8
       1.303840
       22
       4.4
       1.140175
       20
        4.0
       1.224745
       433
        86.6
        54.975449
        3913
        782.6
        574.060363
    
    
      Radiant
       21
       4.2
       1.095445
        9
       1.8
       1.643168
       53
       10.6
       2.792848
       621
       124.2
        61.961278
       16520
       3304.0
        335.206653
    
    
      10967
      Dire
        9
       1.8
       1.303840
       30
       6.0
       1.000000
       11
        2.2
       1.303840
       436
        87.2
        50.395436
        1561
        312.2
        181.988186
    
    
      Radiant
       30
       6.0
       9.082951
        9
       1.8
       1.303840
       74
       14.8
       6.379655
       509
       101.8
        83.646279
       11215
       2243.0
       1800.871039
    
    
      10976
      Dire
       26
       5.2
       3.492850
       19
       3.8
       2.387467
       67
       13.4
       3.049590
       760
       152.0
       104.252098
       18564
       3712.8
       1493.578823
    
  

5 rows × 15 columns



In [59]:

    
by_win = by_win.merge(agged, left_index=True, right_index=True)



In [62]:

    
formula = ('win_int ~ gini + gdp + kills_sum + kills_std '
           '+ assists_sum + assists_std + deaths_sum + deaths_std '
           '+ last_hits_sum + last_hits_std')

mod = sm.Logit.from_formula(formula, data=by_win)
res = mod.fit()
res.summary()









    



Optimization terminated successfully.
         Current function value: 0.146222
         Iterations 8






    Out[62]:





Logit Regression Results

  Dep. Variable:       win_int        No. Observations:     17032 


  Model:                Logit         Df Residuals:         17021 


  Method:                MLE          Df Model:                10 


  Date:           Fri, 11 Apr 2014    Pseudo R-squ.:       0.7890 


  Time:               09:20:58        Log-Likelihood:      -2490.4


  converged:            True          LL-Null:             -11806.


                                    LLR p-value:          0.000 




                   coef      std err       z       P>|z|  [95.0% Conf. Int.] 


  Intercept         -1.9841      0.207     -9.604   0.000     -2.389    -1.579


  gini               1.8566      1.099      1.690   0.091     -0.297     4.010


  gdp                0.0001   5.02e-06     24.000   0.000      0.000     0.000


  kills_sum          0.1219      0.012     10.579   0.000      0.099     0.144


  kills_std          0.1718      0.037      4.664   0.000      0.100     0.244


  assists_sum        0.0125      0.004      2.831   0.005      0.004     0.021


  assists_std       -0.0159      0.038     -0.415   0.678     -0.091     0.059


  deaths_sum        -0.2513      0.006    -44.479   0.000     -0.262    -0.240


  deaths_std         0.2713      0.058      4.651   0.000      0.157     0.386


  last_hits_sum     -0.0073      0.000    -17.414   0.000     -0.008    -0.006


  last_hits_std      0.0008      0.002      0.413   0.680     -0.003     0.005



In [63]:

    
sns.lmplot("kills_mean", "win", by_win, logistic=True, y_jitter=.025)









    Out[63]:





<seaborn.axisgrid.FacetGrid at 0x11e2048d0>



In [ ]:

Dep. Variable:	win_int	No. Observations:	17032
Model:	Logit	Df Residuals:	17025
Method:	MLE	Df Model:	6
Date:	Fri, 11 Apr 2014	Pseudo R-squ.:	0.5107
Time:	09:05:12	Log-Likelihood:	-5777.0
converged:	True	LL-Null:	-11806.
		LLR p-value:	0.000

	coef	std err	z	P>\|z\|	[95.0% Conf. Int.]
Intercept	-1.2898	0.107	-12.000	0.000	-1.500 -1.079
gini	-9.4976	0.632	-15.024	0.000	-10.737 -8.259
kills	0.6126	0.021	28.885	0.000	0.571 0.654
deaths	-1.0672	0.035	-30.704	0.000	-1.135 -0.999
assists	0.3768	0.023	16.604	0.000	0.332 0.421
last_hits	-0.0106	0.001	-13.275	0.000	-0.012 -0.009
gold	0.0032	6.13e-05	51.546	0.000	0.003 0.003

		kills_sum	kills_mean	kills_std	deaths_sum	deaths_mean	deaths_std	assists_sum	assists_mean	assists_std	last_hits_sum	last_hits_mean	last_hits_std	gold_sum	gold_mean	gold_std
match_id	team
10963	Dire	9	1.8	1.303840	22	4.4	1.140175	20	4.0	1.224745	433	86.6	54.975449	3913	782.6	574.060363
10963	Radiant	21	4.2	1.095445	9	1.8	1.643168	53	10.6	2.792848	621	124.2	61.961278	16520	3304.0	335.206653
10967	Dire	9	1.8	1.303840	30	6.0	1.000000	11	2.2	1.303840	436	87.2	50.395436	1561	312.2	181.988186
10967	Radiant	30	6.0	9.082951	9	1.8	1.303840	74	14.8	6.379655	509	101.8	83.646279	11215	2243.0	1800.871039
10976	Dire	26	5.2	3.492850	19	3.8	2.387467	67	13.4	3.049590	760	152.0	104.252098	18564	3712.8	1493.578823