The Brazil-Germany semifinal will go down in history as one of the most stunning upsets of the tournament, but I wanted to (literally) see exactly how lopsided the result is when compared to earlier matches.
In [1]:
import urllib2
import json
import numpy as np
import pandas as pd
from collections import OrderedDict
In [2]:
# Data source is (kindly) provided by http://worldcup.sfg.io/
matches_json = urllib2.urlopen('http://worldcup.sfg.io/matches/')
matches = json.load(matches_json)
In [3]:
df = pd.DataFrame(columns=['winner','win_goals','loser','loss_goals'])
for match in matches:
# for matches that have already happened
if match['status'] == 'future':
continue
if match['away_team']['goals'] > match['home_team']['goals']:
winner = match['away_team']
loser = match['home_team']
else:
# ties are absorbed into this block; terminology doesn't really matter
winner = match['home_team']
loser = match['away_team']
df.loc[len(df)] = [winner['country'], int(winner['goals']),
loser['country'], int(loser['goals'])]
Our source is now a four-column dataframe, indexed by match order.
In [4]:
df.head()
Out[4]:
In [5]:
import bokeh
bokeh.print_versions()
In [6]:
import bokeh.plotting as bkp
import bokeh.objects as bko
bkp.output_notebook()
from seabornify import seabornify
# see https://github.com/kdodia/snippets/blob/master/seabornify.py
In [7]:
num_matches = len(df)
win_color = ['#FEFE00' if x == 'Brazil' else
'#D52B1E' if x == 'Germany' else
'slateblue' for x in df.winner]
loss_color = ['#FEFE00' if x == 'Brazil' else
'#D52B1E' if x == 'Germany' else
'coral' for x in df.loser]
# Ties should have a neutral color representation
win_color = ['gray' if df.win_goals[i] == df.loss_goals[i] else
win_color[i] for i in xrange(num_matches)]
loss_color = ['gray' if df.win_goals[i] == df.loss_goals[i] else
loss_color[i] for i in xrange(num_matches)]
For plotting purposes, we'll give "zero goal" teams an 0.1 buffer so they can be represented on the plot.
In [8]:
df = df.replace(0, 0.1)
Set up our ColumnDataSource objects so that each index in the columns is implicitly connected. This will be leveraged for the hover tooltip below. (See hover tutorial)
In [9]:
win_source = bko.ColumnDataSource(data=dict(
left = df.index,
right = df.index+1,
top = df.win_goals,
bottom = [0]*num_matches,
color = win_color,
country = df.winner))
loss_source = bko.ColumnDataSource(data=dict(
left = df.index,
right = df.index+1,
top = [0]*num_matches,
bottom = -df.loss_goals,
color = loss_color,
country = df.loser))
In [10]:
# Initialize new figure
bkp.figure(title="World Cup Goals per Match",
x_axis_label="Match number",
y_axis_label="Goals scored",
plot_height=500,
plot_width=800,
tools="hover")
# Plot all following glyphs on the same figure
bkp.hold()
# Winners
bkp.quad(left='left', right='right', top='top', bottom='bottom',
color='color', source=win_source, line_color=None)
# "Losers"
bkp.quad(left='left', right='right', top='top', bottom='bottom',
color='color', source=loss_source, line_color=None)
# Direction indication
bkp.text([-1.5],[0.2],text="Winning team", angle=np.pi/2)
bkp.text([-1.5],[-2.4],text="Losing team", angle=np.pi/2)
# Simple visual separator
bkp.line([0, num_matches-1],
[0,0],
color='black');
Here we'll attach the country name to the hover tooltip for each glyph.
In [11]:
hover = [t for t in bkp.curplot().tools if isinstance(t, bko.HoverTool)][0]
hover.tooltips = OrderedDict([
("Country", "@country")
])
Aaaand show()
! Germany in red and Brazil in yellow; hover for country information.
In [12]:
seabornify(bkp.curplot())
bkp.show()
This game was exceptional.
In [13]:
from IPython.display import Image
Image(url="http://i.imgur.com/10gvO.gif")
Out[13]: