In [1]:
from IPython.display import display, HTML
import pandas as pd
import numpy as np
from jinja2 import Template
from bokeh.models import (
ColumnDataSource, Plot, Circle, Range1d,
LinearAxis, TapTool, HoverTool, Text,
SingleIntervalTicker,
)
from bokeh.models.actions import Callback
from bokeh.models.widgets import Slider
from bokeh.palettes import Spectral6
from bokeh.plotting import vplot, hplot
from bokeh.resources import Resources
from bokeh.embed import file_html
from bokeh.browserlib import view
In [2]:
fertility_df = pd.read_csv('../slider_example/data/fertility.csv', index_col='Country')
life_expectancy_df = pd.read_csv('../slider_example/data/life_expectancy.csv', index_col='Country')
population_df = pd.read_csv('../slider_example/data/population.csv', index_col='Country')
regions_df = pd.read_csv('../slider_example/data/regions.csv', index_col='Country')
Make the column names ints not strings for handling
In [3]:
columns = list(fertility_df.columns)
years = list(range(int(columns[0]), int(columns[-1])))
rename_dict = dict(zip(columns, years))
fertility_df = fertility_df.rename(columns=rename_dict)
life_expectancy_df = life_expectancy_df.rename(columns=rename_dict)
population_df = population_df.rename(columns=rename_dict)
regions_df = regions_df.rename(columns=rename_dict)
Turn population into bubble sizes. Use min_size and factor to tweak.
In [4]:
scale_factor = 200
population_df_size = np.sqrt(population_df/np.pi)/scale_factor
min_size = 3
population_df_size = population_df_size.where(population_df_size >= min_size).fillna(min_size)
Use pandas categories and categorize & color the regions
In [5]:
regions_df.Group = regions_df.Group.astype('category')
regions = list(regions_df.Group.cat.categories)
def get_color(r):
index = regions.index(r.Group)
return Spectral6[regions.index(r.Group)]
regions_df['region_color'] = regions_df.apply(get_color, axis=1)
The plot animates with the slider showing the data over time from 1964 to 2013. We can think of each year as a seperate static plot, and when the slider moves, we use the Callback to change the data source that is driving the plot.
We could use bokeh-server to drive this change, but as the data is not too big we can also pass all the datasets to the javascript at once and switch between them on the client side.
This means that we need to build one data source for each year that we have data for and are going to switch between using the slider. We build them and add them to a dictionary sources
that holds them under a key that is the name of the year preficed with a _
.
In [6]:
sources = {}
region_color = regions_df['region_color']
region_color.name = 'region_color'
for year in years:
fertility = fertility_df[year]
fertility.name = 'fertility'
life = life_expectancy_df[year]
life.name = 'life'
population = population_df_size[year]
population.name = 'population'
new_df = pd.concat([fertility, life, population, region_color], axis=1)
sources['_' + str(year)] = ColumnDataSource(new_df)
sources looks like this
{'_1964': <bokeh.models.sources.ColumnDataSource at 0x7f7e7d165cc0>,
'_1965': <bokeh.models.sources.ColumnDataSource at 0x7f7e7d165b00>,
'_1966': <bokeh.models.sources.ColumnDataSource at 0x7f7e7d1656a0>,
'_1967': <bokeh.models.sources.ColumnDataSource at 0x7f7e7d165ef0>,
'_1968': <bokeh.models.sources.ColumnDataSource at 0x7f7e7e9dac18>,
'_1969': <bokeh.models.sources.ColumnDataSource at 0x7f7e7e9da9b0>,
'_1970': <bokeh.models.sources.ColumnDataSource at 0x7f7e7e9da668>,
'_1971': <bokeh.models.sources.ColumnDataSource at 0x7f7e7e9da0f0>...
We will pass this dictionary to the Callback. In doing so, we will find that in our javascript we have an object called, for example 1964 that refers to our ColumnDataSource. Note that we needed the prefixing as JS objects cannot begin with a number.
Finally we construct a string that we can insert into our javascript code to define an object.
The string looks like this: {1962: _1962, 1963: _1963, ....}
Note the keys of this object are integers and the values are the references to our ColumnDataSources from above. So that now, in our JS code, we have an object that's storing all of our ColumnDataSources and we can look them up.
In [7]:
dictionary_of_sources = dict(zip([x for x in years], ['_%s' % x for x in years]))
js_source_array = str(dictionary_of_sources).replace("'", "")
In [8]:
# Set up the plot
xdr = Range1d(1, 9)
ydr = Range1d(20, 100)
plot = Plot(
x_range=xdr,
y_range=ydr,
title="",
plot_width=800,
plot_height=400,
outline_line_color=None,
toolbar_location=None,
)
AXIS_FORMATS = dict(
minor_tick_in=None,
minor_tick_out=None,
major_tick_in=None,
major_label_text_font_size="10pt",
major_label_text_font_style="normal",
axis_label_text_font_size="10pt",
axis_line_color='#AAAAAA',
major_tick_line_color='#AAAAAA',
major_label_text_color='#666666',
major_tick_line_cap="round",
axis_line_cap="round",
axis_line_width=1,
major_tick_line_width=1,
)
xaxis = LinearAxis(SingleIntervalTicker(interval=1), axis_label="Children per woman (total fertility)", **AXIS_FORMATS)
yaxis = LinearAxis(SingleIntervalTicker(interval=20), axis_label="Life expectancy at birth (years)", **AXIS_FORMATS)
plot.add_layout(xaxis, 'below')
plot.add_layout(yaxis, 'left')
In [9]:
# Add the year in background (add before circle)
text_source = ColumnDataSource({'year': ['%s' % years[0]]})
text = Text(x=2, y=35, text='year', text_font_size='150pt', text_color='#EEEEEE')
plot.add_glyph(text_source, text)
Out[9]:
We add the bubbles using the Circle glyph. We start from the first year of data and that is our source that drives the circles (the other sources will be used later).
plot.add_glyph returns the renderer, and we pass this to the HoverTool so that hover only happens for the bubbles on the page and not other glyph elements.
In [10]:
# Add the circle
renderer_source = sources['_%s' % years[0]]
circle_glyph = Circle(
x='fertility', y='life', size='population',
fill_color='region_color', fill_alpha=0.8,
line_color='#7c7e71', line_width=0.5, line_alpha=0.5)
circle_renderer = plot.add_glyph(renderer_source, circle_glyph)
# Add the hover (only against the circle and not other plot elements)
tooltips = "@index"
plot.add_tools(HoverTool(tooltips=tooltips, renderers=[circle_renderer]))
In [11]:
text_x = 7
text_y = 95
for i, region in enumerate(regions):
plot.add_glyph(Text(x=text_x, y=text_y, text=[region], text_font_size='10pt', text_color='#666666'))
plot.add_glyph(Circle(x=text_x - 0.1, y=text_y + 2, fill_color=Spectral6[i], size=10, line_color=None, fill_alpha=0.8))
text_y = text_y - 5
Last, but not least, we add the slider widget and the JS callback code which changes the data of the renderer_source (powering the bubbles / circles) and the data of the text_source (powering background text). After we've set() the data
we need to trigger() a change
. slider, renderer_source, text_source are all available because we add them as args to Callback.
It is the combination of sources = %s % (js_source_array)
in the JS and Callback(args=sources...)
that provides the ability to look-up, by year, the JS version of our python-made ColumnDataSource.
In [12]:
# Add the slider
code = """
var year = slider.get('value'),
sources = %s,
new_source_data = sources[year].get('data');
renderer_source.set('data', new_source_data);
renderer_source.trigger('change');
text_source.set('data', {'year': [String(year)]});
text_source.trigger('change');
""" % js_source_array
callback = Callback(args=sources, code=code)
slider = Slider(start=years[0], end=years[-1], value=1, step=1, title="Year", callback=callback)
callback.args["slider"] = slider
callback.args["renderer_source"] = renderer_source
callback.args["text_source"] = text_source
In [15]:
# Stick the plot and the slider together
layout = vplot(plot, hplot(slider))
with open('../slider_example/gapminder_template.html', 'r') as f:
template = Template(f.read())
# Use inline resources
resources = Resources(mode='inline')
template_variables = {
'bokeh_min_js': resources.js_raw[0]
}
html = file_html(layout, resources, "Bokeh - Gapminder Bubble Plot", template=template, template_variables=template_variables)
with open('../slider_example/gapminder.html', 'w') as f:
f.write(html)
In [17]:
display(HTML(html))