In [1]:
from IPython.display import IFrame, display, HTML
import pandas as pd
import numpy as np
from bokeh.embed import file_html
from bokeh.models import ColumnDataSource, Plot, Circle, Range1d, LinearAxis, TapTool, HoverTool, Text
from bokeh.models.actions import Callback
from bokeh.models.widgets import Slider
from bokeh.palettes import Spectral6
from bokeh.plotting import vplot
from bokeh.resources import INLINE
In [2]:
# Links via http://www.gapminder.org/data/
"""
population_url = "http://spreadsheets.google.com/pub?key=phAwcNAVuyj0XOoBL_n5tAQ&output=xls"
fertility_url = "http://spreadsheets.google.com/pub?key=phAwcNAVuyj0TAlJeCEzcGQ&output=xls"
life_expectancy_url = "http://spreadsheets.google.com/pub?key=tiAiXcrneZrUnnJ9dBU-PAw&output=xls"
def get_data(url):
# Get the data from the url and return only 1962 - 2013
df = pd.read_excel(url, index_col=0)
df = df.unstack().unstack()
df = df[(df.index >= 1964) & (df.index <= 2013)]
df = df.unstack().unstack()
return df
fertility_df = get_data(fertility_url)
life_expectancy_df = get_data(life_expectancy_url)
population_df = get_data(population_url)
fertility_df.to_hdf('fertility_df.hdf', 'df')
life_expectancy_df.to_hdf('life_expectancy_df.hdf', 'df')
population_df.to_hdf('population_df.hdf', 'df')
"""
fertility_df = pd.read_hdf('fertility_df.hdf', 'df')
life_expectancy_df = pd.read_hdf('life_expectancy_df.hdf', 'df')
population_df = pd.read_hdf('population_df.hdf', 'df')
In [3]:
# have common countries across all data
fertility_df = fertility_df.drop(fertility_df.index.difference(life_expectancy_df.index))
population_df = population_df.drop(population_df.index.difference(life_expectancy_df.index))
# get a size value based on population, but don't let it get too small
population_df_size = np.sqrt(population_df/np.pi)/200
min_size = 3
population_df_size = population_df_size.where(population_df_size >= min_size).fillna(min_size)
Get the regions and color them
In [6]:
regions_url = "https://docs.google.com/spreadsheets/d/1OxmGUNWeADbPJkQxVPupSOK5MbAECdqThnvyPrwG5Os/pub?gid=1&output=xls"
regions_df = pd.read_excel(regions_url, index_col=0)
regions_df = regions_df.drop(regions_df.index.difference(life_expectancy_df.index))
regions_df.Group = regions_df.Group.astype('category')
cats = list(regions_df.Group.cat.categories)
def get_color(r):
index = cats.index(r.Group)
return Spectral6[cats.index(r.Group)]
regions_df['region_color'] = regions_df.apply(get_color, axis=1)
In [7]:
# Set up the data.
#
# We make a dictionary of sources that can then be passed to the callback so they are ready for JS object to use.
#
# Dictionary_of_sources is:
# {
# 1962: '_1962',
# 1963: '_1963',
# ....
# }
# We turn this into a string and replace '_1962' with _1962. So the end result is js_source_array:
# '{1962: _1962, 1963: _1963, ....}'
#
# When this is passed into the callback and then accessed at runtime,
# the _1962, _1963 are replaced with the actual source objects that are passed in as args.
sources = {}
years = list(fertility_df.columns)
region_color = regions_df['region_color']
region_color.name = 'region_color'
for year in years:
fertility = fertility_df[year]
fertility.name = 'fertility'
life = life_expectancy_df[year]
life.name = 'life'
population = population_df_size[year]
population.name = 'population'
new_df = pd.concat([fertility, life, population, region_color], axis=1)
sources['_' + str(year)] = ColumnDataSource(new_df)
dictionary_of_sources = dict(zip([x for x in years], ['_%s' % x for x in years]))
js_source_array = str(dictionary_of_sources).replace("'", "")
# Set up the plot
xdr = Range1d(1, 8)
ydr = Range1d(20, 85)
plot = Plot(
x_range=xdr,
y_range=ydr,
title="",
plot_width=800,
plot_height=400,
outline_line_color=None,
toolbar_location=None,
)
xaxis = LinearAxis()
yaxis = LinearAxis()
plot.add_layout(xaxis, 'left')
plot.add_layout(yaxis, 'below')
tooltips = "@index"
plot.add_tools(HoverTool(tooltips=tooltips))
# Add the circle
renderer_source = sources['_%s' % years[0]]
highlighted = Circle(
x='fertility', y='life', size='population',
fill_color='region_color', fill_alpha=0.5,
line_color='#7c7e71', line_width=0.5, line_alpha=0.5)
plot.add_glyph(renderer_source, highlighted)
# Add the slider
code = """
var key = slider.get('value'),
sources = %s,
new_source_data = sources[key].get('data');
renderer_source.set('data', new_source_data);
renderer_source.trigger('change');
""" % js_source_array
callback = Callback(args=sources, code=code)
slider = Slider(start=years[0], end=years[-1], value=1, step=1, title="Year", callback=callback)
callback.args["slider"] = slider
callback.args["renderer_source"] = renderer_source
layout = vplot(plot, slider)
html = file_html(layout, INLINE, "gapminder")
In [8]:
display(HTML(html))
In [ ]: