In [1]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import footballdata as foo
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
sns.set_context("notebook")
sns.set_style("darkgrid")
In [2]:
five38 = foo.FiveThirtyEight()
print(five38.__doc__)
In [3]:
leagues = five38.read_leagues()
leagues
Out[3]:
In [4]:
games = five38.read_games()
games.sample(5)
Out[4]:
In [5]:
forecasts = five38.read_forecasts()
forecasts.sample(5)
Out[5]:
In [6]:
epl_top3_contenders = (forecasts
.loc['ENG-Premier League']
.reset_index()
.assign(top3_chance=lambda x: x.position_1 + x.position_2 + x.position_3)
.assign(points=lambda x: x.current_wins * 3 + x.current_ties)
.sort_values(['last_updated', 'points'], ascending=False)
)
epl_top3_contenders.head()
Out[6]:
Halfway through the season, at least five teams had a shot at making the top 3 of the Premier League:
In [7]:
pal = sns.color_palette(n_colors=len(epl_top3_contenders.team.unique()))
sns.set_style("ticks")
g = sns.FacetGrid(epl_top3_contenders, hue='team', size=7, aspect=1.5, palette=pal)
g.map(plt.plot, 'last_updated', 'top3_chance').add_legend()
g.ax.set(xlabel='Date',
ylabel='Chance',
title='Premier League season 2016-2017: Forecast to make top 3')
sns.despine(offset=5, trim=True);
...while as usual, La Liga showed a very different picture:
In [8]:
laliga_top3_contenders = (forecasts
.loc['ESP-La Liga']
.reset_index()
.assign(top3_chance=lambda x: x.position_1 + x.position_2 + x.position_3)
.assign(points=lambda x: x.current_wins * 3 + x.current_ties)
.sort_values(['last_updated', 'points'], ascending=False)
)
pal = sns.color_palette(n_colors=len(laliga_top3_contenders.team.unique()))
sns.set_style("ticks")
g = sns.FacetGrid(laliga_top3_contenders, hue='team', size=7, aspect=1.5, palette=pal)
g.map(plt.plot, 'last_updated', 'top3_chance').add_legend()
g.ax.set(xlabel='Date',
ylabel='Chance',
title='La Liga season 2016-2017: Forecast to make top 3')
sns.despine(offset=5, trim=True);
When we look at actually winning La Liga, only Real and Barça ever had a chance. This has been the case for many years now in La Liga, so no surprise there.
In [9]:
laliga_win_contenders = (forecasts
.loc['ESP-La Liga']
.reset_index()
.assign(points=lambda x: x.current_wins * 3 + x.current_ties)
.sort_values(['last_updated', 'points'], ascending=False)
)
pal = sns.color_palette(n_colors=len(laliga_top3_contenders.team.unique()))
sns.set_style("ticks")
g = sns.FacetGrid(laliga_win_contenders, hue='team', size=7, aspect=1.5, palette=pal)
g.map(plt.plot, 'last_updated', 'position_1').add_legend()
g.ax.set(xlabel='Date',
ylabel='Chance',
title='La Liga season 2016-2017: Forecast to win')
sns.despine(offset=5, trim=True);
...But the EPL winner for this season was decided as early as December:
In [10]:
epl_win_contenders = (forecasts
.loc['ENG-Premier League']
.reset_index()
.assign(points=lambda x: x.current_wins * 3 + x.current_ties)
.sort_values(['last_updated', 'points'], ascending=False)
)
pal = sns.color_palette(n_colors=len(laliga_top3_contenders.team.unique()))
sns.set_style("ticks")
g = sns.FacetGrid(epl_win_contenders, hue='team', size=7, aspect=1.5, palette=pal)
g.map(plt.plot, 'last_updated', 'position_1').add_legend()
g.ax.set(xlabel='Date',
ylabel='Chance',
title='Premier League season 2016-2017: Forecast to win')
sns.despine(offset=5, trim=True);
In [11]:
print(foo.MatchHistory.__doc__)
In [12]:
foo.MatchHistory.available_leagues()
Out[12]:
We all know sports teams have an advantage when playing at home. Here's a look at home team advantage for 3 years of the Eredivisie, inspired by this great example by Tom Augspurger.
Travel distances in the Netherlands are small, it would be interesting to compare this to competitions where away teams often have to travel further.
In [13]:
eredivisie = foo.MatchHistory('NED-Eredivisie', range(2014, 2017)).read_games()
eredivisie.sample(5)
Out[13]:
In [14]:
def home_away_results(games: pd.DataFrame):
"""Returns aggregated home/away results per team"""
res = (
pd.melt(games.reset_index(),
id_vars=['Date', 'FTR'],
value_name='team', var_name='is_home',
value_vars=['home_team', 'away_team']))
res.is_home = res.is_home.replace(['home_team', 'away_team'], ['Home', 'Away'])
res['win'] = res['lose'] = res['draw'] = 0
res.loc[(res['is_home'] == 'Home') & (res['FTR'] == 'H'), 'win'] = 1
res.loc[(res['is_home'] == 'Away') & (res['FTR'] == 'A'), 'win'] = 1
res.loc[(res['is_home'] == 'Home') & (res['FTR'] == 'A'), 'lose'] = 1
res.loc[(res['is_home'] == 'Away') & (res['FTR'] == 'H'), 'lose'] = 1
res.loc[res['FTR'] == 'D', 'draw'] = 1
g = res.groupby(['team', 'is_home'])
w = (g.win.agg(['sum', 'mean'])
.rename(columns={'sum': 'n_win', 'mean': 'win_pct'}))
l = (g.lose.agg(['sum', 'mean'])
.rename(columns={'sum': 'n_lose', 'mean': 'lose_pct'}))
d = (g.draw.agg(['sum', 'mean'])
.rename(columns={'sum': 'n_draw', 'mean': 'draw_pct'}))
res = pd.concat([w, l, d], axis=1)
return res
In [15]:
results = home_away_results(eredivisie)
results.head(6)
Out[15]:
The overall picture shows most teams have a clear advantage at home:
In [16]:
g = sns.FacetGrid(results.reset_index(), hue='team', palette=['k'], size=6, aspect=.5)
g.map(sns.pointplot, 'is_home', 'win_pct', scatter_kws={'alpha':0.3})
g.set_axis_labels('', 'win %');
But there are a few exceptions, namely AZ and NAC Breda.
Now NAC win only about 17% of their games overall, so not much so say there. But AZ is a top 5 contender who in the past 3 years have won less games at home than away. Meanwhile have a look at NEC Nijmegen's home-away spread: their supporters seem to be doing a much better job.
In [17]:
g = sns.FacetGrid(results.reset_index(),
col='team', hue='team', col_wrap=5)
g.map(sns.pointplot, 'is_home', 'win_pct')
g.set_axis_labels('', 'win %');
In [18]:
results.loc['NEC Nijmegen']
Out[18]:
In [19]:
print(foo.ClubElo.__doc__)
In [30]:
elo = foo.ClubElo()
current_elo = elo.read_by_date()
current_elo.head()
Out[30]:
In [31]:
num_teams = 5
smoothing = 100
elo_top_development = pd.concat(
[elo.read_team_history(team)['Elo'].rolling(smoothing).mean()
for team in current_elo.reset_index()['team'][:num_teams]
],
axis=1)
elo_top_development.columns = current_elo.reset_index()['team'][:num_teams]
elo_top_development.fillna(method='ffill')
fig = plt.figure(figsize=(16, 10))
ax1 = fig.add_subplot(111, ylabel='ELO rolling avg.', xlabel='Date')
elo_top_development.plot(ax=ax1)
sns.despine();
In [ ]: