In [ ]:
import pandas as pd
from bokeh.embed import file_html
from bokeh.io import output_notebook, show
from bokeh.layouts import layout
from bokeh.models import (
ColumnDataSource, Plot, Circle, Range1d, LinearAxis, HoverTool,
Text, SingleIntervalTicker, Slider, CustomJS, Legend, LegendItem, CategoricalColorMapper)
from bokeh.palettes import Spectral6
from data import process_data
In [ ]:
from bokeh.resources import INLINE
output_notebook(resources=INLINE)
The plot animates with the slider showing the data over time from 1964 to 2013. We can think of each year as a seperate static plot, and when the slider moves, we use the Callback to change the data source that is driving the plot.
We could use bokeh-server to drive this change, but as the data is not too big we can also pass all the datasets to the javascript at once and switch between them on the client side.
This means that we need to build one data source for each year that we have data for and are going to switch between using the slider. We build them and add them to a dictionary sources
that holds them under a key that is the name of the year preficed with a _
.
In [ ]:
fertility_df, life_expectancy_df, population_df_size, regions_df, years, regions_list = process_data()
sources = {}
region_name = regions_df.Group
region_name.name = 'region'
for year in years:
fertility = fertility_df[year]
fertility.name = 'fertility'
life = life_expectancy_df[year]
life.name = 'life'
population = population_df_size[year]
population.name = 'population'
new_df = pd.concat([fertility, life, population, region_name], axis=1)
sources['_' + str(year)] = ColumnDataSource(new_df)
sources looks like this
{'_1964': <bokeh.models.sources.ColumnDataSource at 0x7f7e7d165cc0>,
'_1965': <bokeh.models.sources.ColumnDataSource at 0x7f7e7d165b00>,
'_1966': <bokeh.models.sources.ColumnDataSource at 0x7f7e7d1656a0>,
'_1967': <bokeh.models.sources.ColumnDataSource at 0x7f7e7d165ef0>,
'_1968': <bokeh.models.sources.ColumnDataSource at 0x7f7e7e9dac18>,
'_1969': <bokeh.models.sources.ColumnDataSource at 0x7f7e7e9da9b0>,
'_1970': <bokeh.models.sources.ColumnDataSource at 0x7f7e7e9da668>,
'_1971': <bokeh.models.sources.ColumnDataSource at 0x7f7e7e9da0f0>...
We will pass this dictionary to the Callback. In doing so, we will find that in our javascript we have an object called, for example 1964 that refers to our ColumnDataSource. Note that we needed the prefixing as JS objects cannot begin with a number.
Finally we construct a string that we can insert into our javascript code to define an object.
The string looks like this: {1962: _1962, 1963: _1963, ....}
Note the keys of this object are integers and the values are the references to our ColumnDataSources from above. So that now, in our JS code, we have an object that's storing all of our ColumnDataSources and we can look them up.
In [ ]:
dictionary_of_sources = dict(zip([x for x in years], ['_%s' % x for x in years]))
js_source_array = str(dictionary_of_sources).replace("'", "")
In [ ]:
# Set up the plot
xdr = Range1d(1, 9)
ydr = Range1d(20, 100)
plot = Plot(
x_range=xdr,
y_range=ydr,
plot_width=800,
plot_height=400,
outline_line_color=None,
toolbar_location=None,
min_border=20,
)
In [ ]:
AXIS_FORMATS = dict(
minor_tick_in=None,
minor_tick_out=None,
major_tick_in=None,
major_label_text_font_size="10pt",
major_label_text_font_style="normal",
axis_label_text_font_size="10pt",
axis_line_color='#AAAAAA',
major_tick_line_color='#AAAAAA',
major_label_text_color='#666666',
major_tick_line_cap="round",
axis_line_cap="round",
axis_line_width=1,
major_tick_line_width=1,
)
xaxis = LinearAxis(ticker=SingleIntervalTicker(interval=1), axis_label="Children per woman (total fertility)", **AXIS_FORMATS)
yaxis = LinearAxis(ticker=SingleIntervalTicker(interval=20), axis_label="Life expectancy at birth (years)", **AXIS_FORMATS)
plot.add_layout(xaxis, 'below')
plot.add_layout(yaxis, 'left')
In [ ]:
# Add the year in background (add before circle)
text_source = ColumnDataSource({'year': ['%s' % years[0]]})
text = Text(x=2, y=35, text='year', text_font_size='150pt', text_color='#EEEEEE')
plot.add_glyph(text_source, text)
We add the bubbles using the Circle glyph. We start from the first year of data and that is our source that drives the circles (the other sources will be used later).
plot.add_glyph
returns the renderer, and we pass this to the HoverTool so that hover only happens for the bubbles on the page and not other glyph elements.
We want the circles to be colored by the region they're in, so we use a CategoricalColorMapper
to build the map and apply it to fill color in the transform field.
In [ ]:
# Make a ColorMapper
color_mapper = CategoricalColorMapper(palette=Spectral6, factors=regions_list)
# Add the circle
renderer_source = sources['_%s' % years[0]]
circle_glyph = Circle(
x='fertility', y='life', size='population',
fill_color={'field': 'region', 'transform': color_mapper},
fill_alpha=0.8,
line_color='#7c7e71', line_width=0.5, line_alpha=0.5)
circle_renderer = plot.add_glyph(renderer_source, circle_glyph)
# Add the hover (only against the circle and not other plot elements)
tooltips = "@index"
plot.add_tools(HoverTool(tooltips=tooltips, renderers=[circle_renderer]))
# We want a legend for the circles. The legend will be populated based on the label='region'
# which is a column of the data source - it will take only the unique values.
plot.add_layout(Legend(items=[LegendItem(label='region', renderers=[circle_renderer])]))
Next we add the slider widget and the JS callback code which changes the data of the renderer_source (powering the bubbles / circles) and the data of the text_source (powering background text). After we've set() the data
we need to trigger() a change
. slider, renderer_source, text_source are all available because we add them as args to Callback.
It is the combination of sources = %s % (js_source_array)
in the JS and Callback(args=sources...)
that provides the ability to look-up, by year, the JS version of our python-made ColumnDataSource.
In [ ]:
# Add the slider
code = """
var year = slider.value,
sources = %s,
new_source_data = sources[year].data;
renderer_source.data = new_source_data;
text_source.data = {'year': [String(year)]};
""" % js_source_array
callback = CustomJS(args=sources, code=code)
slider = Slider(start=years[0], end=years[-1], value=1, step=1, title="Year", callback=callback)
callback.args["renderer_source"] = renderer_source
callback.args["slider"] = slider
callback.args["text_source"] = text_source
In [ ]:
# Stick the plot and the slider together
show(layout([[plot], [slider]], sizing_mode='scale_width'))
In [ ]:
In [ ]: