In [1]:
    
%matplotlib inline
    
In [2]:
    
import os
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from dota.api import DetailsResponse
    
In [3]:
    
store = Path(os.path.expanduser('~/sandbox/dota/data/pro/pro.h5'))
    
In [4]:
    
with pd.get_store(str(store)) as s:
    df = s.select('drs')
    
In [5]:
    
df['net_worth'] = df.gold + df.gold_spent
df = df.sort(['match_id', 'team', 'net_worth'])
    
We can estimate the Gini coefficient $G$ by $G = \frac{\sum_{i=1}^n (2i - n - 1)x_i^T}{n^2 \mu}$.
In [8]:
    
def gini(team):
    n = team.shape[0]
    mu = team.net_worth.mean()
    num = np.arange(2 - n - 1, n, 2).dot(team.net_worth.values)
    denom = n ** 2 * mu
    return num / denom
    
In [9]:
    
gini(df.iloc[:5])
    
    Out[9]:
In [10]:
    
dota_gini = df.groupby(['match_id', 'team']).apply(gini)
    
In [11]:
    
import pandas.io.wb as wb
    
In [12]:
    
countries = wb.get_countries()
world = wb.download(country=countries, indicator=['SI.POV.GINI', 'NY.GDP.MKTP.KD']).dropna()
world = world.rename(columns={'SI.POV.GINI': 'gini', 'NY.GDP.MKTP.KD': 'gdp'})
world['gini'] = world['gini'] / 100
    
    
In [13]:
    
fig, ax = plt.subplots()
sns.kdeplot(world.gini, shade=True, label='World')
sns.kdeplot(dota_gini, shade=True, label='DOTA')
    
    Out[13]:
    
In [14]:
    
g = sns.jointplot("gini", "gdp", data=world, kind="reg",
                  size=7)
    
    
In [15]:
    
win = df.groupby(['match_id', 'team'])['win'].apply(lambda x: x.all())
by_win = pd.concat([dota_gini, win], axis=1, keys=['gini', 'win'])
    
In [16]:
    
# greed is good
gr = sns.FacetGrid(data=by_win, hue='win', aspect=2.5)
gr.map(sns.kdeplot, 'gini', shade=True)
plt.legend()
    
    Out[16]:
    
In [17]:
    
sns.lmplot("gini", "win", by_win, logistic=True, y_jitter=.05)
    
    Out[17]:
    
In [18]:
    
sns.lmplot("gini", "win", by_win, logistic=True, x_bins=10, truncate=True);
    
    
In [19]:
    
df['percentile'] = df.groupby(['match_id', 'team'])['net_worth'].apply(lambda x: x / x.sum())
df['nw_rank'] = df.groupby(['match_id', 'team'])['net_worth'].rank()
df['nw_rank'] = df.nw_rank.round()  # ties
    
In [20]:
    
df.plot(kind='scatter', x='kills', y='net_worth', color='k', alpha=.4)
    
    Out[20]:
    
In [21]:
    
df.plot(kind='scatter', x='nw_rank', y='net_worth', color='k', alpha=.4)
    
    Out[21]:
    
In [22]:
    
sns.factorplot("nw_rank", "net_worth", data=df, palette="PuBu_d", join=False)
    
    Out[22]:
    
In [23]:
    
ax = df.plot(kind='hexbin', x='percentile', y='net_worth', cmap=plt.cm.PuBu_r)
ax.set_xlim(0, .5)
ax.set_ylim(0, 40000)
    
    Out[23]:
    
In [24]:
    
by_win['gdp'] = df.groupby(['match_id', 'team'])['net_worth'].sum()
g = sns.jointplot("gdp", "gini", data=by_win, kind="reg",
                  size=7)
    
    
In [25]:
    
sns.lmplot("gdp", "gini", data=by_win, hue="win")
    
    Out[25]:
    
In [26]:
    
# more measures of inequality:
stds = df.groupby(['match_id', 'team'])[['kills', 'deaths', 'assists', 'last_hits', 'gold']].std()
by_win = by_win.merge(stds, left_index=True, right_index=True)
    
In [27]:
    
import statsmodels.api as sm
    
In [28]:
    
by_win['win_int'] = by_win.win.astype(int)
mod = sm.Logit.from_formula('win_int ~ gini + kills + deaths + assists + last_hits + gold', by_win)
res = mod.fit()
res.summary()
    
    
    Out[28]:
In [29]:
    
sns.lmplot("last_hits", "win", by_win, logistic=True, y_jitter=.025)
    
    Out[29]:
    
In [30]:
    
g = sns.FacetGrid(data=by_win, hue="win", aspect=3)
g.map(sns.kdeplot, "last_hits", shade=True)
plt.legend()
    
    Out[30]:
    
In [52]:
    
# What if the losing team are all just poor?
cols = [['kills', 'deaths', 'assists', 'last_hits', 'gold']]
g = df.groupby(['match_id', 'team'])
agged = g[cols].agg(['sum', 'mean', 'std'])
agged.columns = ['_'.join(y) for y in x.columns.tolist()]
agged.head()
    
    Out[52]:
In [59]:
    
by_win = by_win.merge(agged, left_index=True, right_index=True)
    
In [62]:
    
formula = ('win_int ~ gini + gdp + kills_sum + kills_std '
           '+ assists_sum + assists_std + deaths_sum + deaths_std '
           '+ last_hits_sum + last_hits_std')
mod = sm.Logit.from_formula(formula, data=by_win)
res = mod.fit()
res.summary()
    
    
    Out[62]:
In [63]:
    
sns.lmplot("kills_mean", "win", by_win, logistic=True, y_jitter=.025)
    
    Out[63]:
    
In [ ]: