Brazil-Germany Semifinal

The Brazil-Germany semifinal will go down in history as one of the most stunning upsets of the tournament, but I wanted to (literally) see exactly how lopsided the result is when compared to earlier matches.


import urllib2
import json

import numpy as np
import pandas as pd
from collections import OrderedDict

# Data source is (kindly) provided by
matches_json = urllib2.urlopen('')
matches = json.load(matches_json)

df = pd.DataFrame(columns=['winner','win_goals','loser','loss_goals'])

for match in matches:
    # for matches that have already happened
    if match['status'] == 'future':

    if match['away_team']['goals'] > match['home_team']['goals']:
        winner = match['away_team']
        loser = match['home_team']
        # ties are absorbed into this block; terminology doesn't really matter
        winner = match['home_team']
        loser = match['away_team']
    df.loc[len(df)] = [winner['country'], int(winner['goals']),
                        loser['country'], int(loser['goals'])]

Our source is now a four-column dataframe, indexed by match order.

winner win_goals loser loss_goals
0 Brazil 3 Croatia 1
1 Mexico 1 Cameroon 0
2 Netherlands 5 Spain 1
3 Chile 3 Australia 1
4 Colombia 3 Greece 0


import bokeh

import bokeh.plotting as bkp
import bokeh.objects as bko

from seabornify import seabornify
# see

num_matches = len(df)

win_color = ['#FEFE00' if x == 'Brazil' else
             '#D52B1E' if x == 'Germany' else
             'slateblue' for x in df.winner]

loss_color = ['#FEFE00' if x == 'Brazil' else
             '#D52B1E' if x == 'Germany' else
             'coral' for x in df.loser]

# Ties should have a neutral color representation
win_color = ['gray' if df.win_goals[i] == df.loss_goals[i] else
             win_color[i] for i in xrange(num_matches)]
loss_color = ['gray' if df.win_goals[i] == df.loss_goals[i] else
              loss_color[i] for i in xrange(num_matches)]

For plotting purposes, we'll give "zero goal" teams an 0.1 buffer so they can be represented on the plot.

df = df.replace(0, 0.1)

Set up our ColumnDataSource objects so that each index in the columns is implicitly connected. This will be leveraged for the hover tooltip below. (See hover tutorial)

win_source = bko.ColumnDataSource(data=dict(
    left = df.index,
    right = df.index+1,
    top = df.win_goals,
    bottom = [0]*num_matches,
    color = win_color,
    country = df.winner))

loss_source = bko.ColumnDataSource(data=dict(
    left = df.index,
    right = df.index+1,
    top = [0]*num_matches,
    bottom = -df.loss_goals,
    color = loss_color,
    country = df.loser))

# Initialize new figure
bkp.figure(title="World Cup Goals per Match",
           x_axis_label="Match number",
           y_axis_label="Goals scored",

# Plot all following glyphs on the same figure

# Winners
bkp.quad(left='left', right='right', top='top', bottom='bottom',
         color='color', source=win_source, line_color=None)

# "Losers"
bkp.quad(left='left', right='right', top='top', bottom='bottom',
         color='color', source=loss_source, line_color=None)

# Direction indication
bkp.text([-1.5],[0.2],text="Winning team", angle=np.pi/2)
bkp.text([-1.5],[-2.4],text="Losing team", angle=np.pi/2)

# Simple visual separator
bkp.line([0, num_matches-1],

Here we'll attach the country name to the hover tooltip for each glyph.

hover = [t for t in bkp.curplot().tools if isinstance(t, bko.HoverTool)][0]

hover.tooltips = OrderedDict([
    ("Country", "@country")

Aaaand show()! Germany in red and Brazil in yellow; hover for country information.

This game was exceptional.

from IPython.display import Image
