In [ ]:
from bokeh.io import show, output_notebook
from bokeh.models import CategoricalColorMapper, ColumnDataSource, FactorRange
from bokeh.plotting import figure
output_notebook()
To create a basic Bar Plot, typically all that is needed is to call vbar
with x
and top
, and values, or hbar
with y
and right
and values. The default width
or height
may also be supplied if something different than the default value of 1 is desired.
The example below plots vertical bars representing counts for different types of fruit on a categorical range:
x_range = ['Apples', 'Pears', 'Nectarines', 'Plums', 'Grapes', 'Strawberries']
In [ ]:
fruits = ['Apples', 'Pears', 'Nectarines', 'Plums', 'Grapes', 'Strawberries']
counts = [5, 3, 4, 2, 4, 6]
p = figure(x_range=fruits, plot_height=250, toolbar_location=None, title="Fruit Counts")
p.vbar(x=fruits, top=counts, width=0.9)
p.xgrid.grid_line_color = None
p.y_range.start = 0
show(p)
Bokeh displays the bars in the order the factors are given for the range. So, "sorting" bars in a bar plot is identical to sorting the factors for the range.
In the example below the fruit factors are sorted in increasing order according to their corresponing counts, causing the bars to be sorted.
In [ ]:
sorted_fruits = sorted(fruits, key=lambda x: counts[fruits.index(x)])
p = figure(x_range=sorted_fruits, plot_height=250, toolbar_location=None, title="Fruit Counts")
p.vbar(x=fruits, top=counts, width=0.9)
p.xgrid.grid_line_color = None
p.y_range.start = 0
show(p)
To set the color of each bar, you can pass explicit color values to the color
option (which is shorthand for setting both the fill_color
and line_color
).
In the example below add shading to the previous plot, but now all the data (including the explicit colors) is put inside a ColumnDataSource
which is passed to vbar
as the source
argument.
In [ ]:
from bokeh.palettes import Spectral6
fruits = ['Apples', 'Pears', 'Nectarines', 'Plums', 'Grapes', 'Strawberries']
counts = [5, 3, 4, 2, 4, 6]
source = ColumnDataSource(data=dict(fruits=fruits, counts=counts, color=Spectral6))
p = figure(x_range=fruits, plot_height=250, toolbar_location=None, title="Fruit Counts")
p.vbar(x='fruits', top='counts', width=0.9, color='color', legend="fruits", source=source)
p.xgrid.grid_line_color = None
p.y_range.start = 0
p.y_range.end = 9
p.legend.orientation = "horizontal"
p.legend.location = "top_center"
show(p)
Another way to shade bars different colors is to provide a colormapper. The factor_cmap
transform can be applied to map a categorical value into a colot. Other transorm include linear_cmap
and log_cmap
which can be used to map continuous numercical values to colors.
The example below reproduces previous example using a factor_cmap
to convert fruit types into colors.
In [ ]:
from bokeh.transform import factor_cmap
fruits = ['Apples', 'Pears', 'Nectarines', 'Plums', 'Grapes', 'Strawberries']
counts = [5, 3, 4, 2, 4, 6]
source = ColumnDataSource(data=dict(fruits=fruits, counts=counts))
p = figure(x_range=fruits, plot_height=250, toolbar_location=None, title="Fruit Counts")
p.vbar(x='fruits', top='counts', width=0.9, source=source, legend="fruits",
line_color='white', fill_color=factor_cmap('fruits', palette="Spectral6", factors=fruits))
p.xgrid.grid_line_color = None
p.y_range.start = 0
p.y_range.end = 9
p.legend.orientation = "horizontal"
p.legend.location = "top_center"
show(p)
Often categorical data is arranged into hierarchies, for instance we might have fruit counts, per year. To represent this kind of hierarchy, our range becomes a list of tuples:
x_range = [ ("Apples", "2015"), ("Apples", "2016"), ("Apples", "2017"), ... ]
The coordinates for the bars should be these same tuple values. When we create a hierarchcal range in this way, Bokeh will automatically create a visually grouped axis.
The plot below displays fruit counts per year.
In [ ]:
fruits = ['Apples', 'Pears', 'Nectarines', 'Plums', 'Grapes', 'Strawberries']
years = ['2015', '2016', '2017']
data = {'fruits' : fruits,
'2015' : [2, 1, 4, 3, 2, 4],
'2016' : [5, 3, 3, 2, 4, 6],
'2017' : [3, 2, 4, 4, 5, 3]}
# this creates [ ("Apples", "2015"), ("Apples", "2016"), ("Apples", "2017"), ("Pears", "2015), ... ]
x = [ (fruit, year) for fruit in fruits for year in years ]
counts = sum(zip(data['2015'], data['2016'], data['2017']), ()) # like an hstack
source = ColumnDataSource(data=dict(x=x, counts=counts))
p = figure(x_range=FactorRange(*x), plot_height=250,
toolbar_location=None, title="Fruit Counts by Year")
p.vbar(x='x', top='counts', width=0.9, source=source)
p.x_range.range_padding = 0.1
p.xgrid.grid_line_color = None
p.y_range.start = 0
p.xaxis.major_label_orientation = 1
show(p)
We can combine a color mapper with hierachical ranges, and in fact we can choose to apply a color mapping based on only "part" of a categorical factor.
In the example below, the arguments start=1, end=2
are passed to factor_cmap
. This means that for each factor value (which is a tuple), the value factor[1:2]
is what shoud be used for colormapping. In this specific case, that translates to shading each bar according to the "year" portion.
In [ ]:
from bokeh.transform import factor_cmap
fruits = ['Apples', 'Pears', 'Nectarines', 'Plums', 'Grapes', 'Strawberries']
years = ['2015', '2016', '2017']
data = {'fruits' : fruits,
'2015' : [2, 1, 4, 3, 2, 4],
'2016' : [5, 3, 3, 2, 4, 6],
'2017' : [3, 2, 4, 4, 5, 3]}
# this creates [ ("Apples", "2015"), ("Apples", "2016"), ("Apples", "2017"), ("Pears", "2015), ... ]
x = [ (fruit, year) for fruit in fruits for year in years ]
counts = sum(zip(data['2015'], data['2016'], data['2017']), ()) # like an hstack
source = ColumnDataSource(data=dict(x=x, counts=counts))
p = figure(x_range=FactorRange(*x), plot_height=250, toolbar_location=None, title="Fruit Counts by Year")
p.vbar(x='x', top='counts', width=0.9, source=source, line_color="white",
fill_color=factor_cmap('x', palette=["#c9d9d3", "#718dbf", "#e84d60"], factors=years, start=1, end=2))
p.x_range.range_padding = 0.1
p.xgrid.grid_line_color = None
p.y_range.start = 0
p.xaxis.major_label_orientation = 1
show(p)
Some times we may wish to have "grouped" bars without a visually grouped axis. For instance, we may wish to indicate groups by colormapping or other means. This can be accomplished in Bokeh by providing "flat" (i.e. non-tuple) factors, and using the dodge
transform to shift the bars by an arbitrary amount.
The example below also shows fruit counts per year, grouping the bars with dodge
on the flat categorical range from the original example above.
In [ ]:
from bokeh.core.properties import value
from bokeh.transform import dodge, factor_cmap
fruits = ['Apples', 'Pears', 'Nectarines', 'Plums', 'Grapes', 'Strawberries']
years = ['2015', '2016', '2017']
data = {'fruits' : fruits,
'2015' : [2, 1, 4, 3, 2, 4],
'2016' : [5, 3, 3, 2, 4, 6],
'2017' : [3, 2, 4, 4, 5, 3]}
source = ColumnDataSource(data=data)
p = figure(x_range=fruits, plot_height=250, toolbar_location=None, title="Fruit Counts by Year")
p.vbar(x=dodge('fruits', -0.25, range=p.x_range), top='2015', width=0.2, source=source,
color="#c9d9d3", legend=value("2015"))
p.vbar(x=dodge('fruits', 0.0, range=p.x_range), top='2016', width=0.2, source=source,
color="#718dbf", legend=value("2016"))
p.vbar(x=dodge('fruits', 0.25, range=p.x_range), top='2017', width=0.2, source=source,
color="#e84d60", legend=value("2017"))
p.x_range.range_padding = 0.1
p.xgrid.grid_line_color = None
p.y_range.start = 0
p.y_range.end = 10
p.legend.location = "top_left"
p.legend.orientation = "horizontal"
show(p)
We may also wish to stack bars, instead of grouping them. Bokeh provides vbar_stack
and hbar_stack
to help with this. To use these functions we pass a list of "stackers" which is a sequence of column names for columns in our data source. Each column represents one "layer" across all of our stacked bars, and each column is added to the previous columns to position the next layer.
The example below shows out fruit counts per year, this time stacked by year instead of grouped.
In [ ]:
from bokeh.core.properties import value
from bokeh.models import ColumnDataSource
from bokeh.plotting import figure
fruits = ['Apples', 'Pears', 'Nectarines', 'Plums', 'Grapes', 'Strawberries']
years = ["2015", "2016", "2017"]
colors = ["#c9d9d3", "#718dbf", "#e84d60"]
data = {'fruits' : fruits,
'2015' : [2, 1, 4, 3, 2, 4],
'2016' : [5, 3, 4, 2, 4, 6],
'2017' : [3, 2, 4, 4, 5, 3]}
source = ColumnDataSource(data=data)
p = figure(x_range=fruits, plot_height=250,
toolbar_location=None, title="Fruit Counts by Year")
p.vbar_stack(years, x='fruits', width=0.9, color=colors, source=source, legend=[value(x) for x in years])
p.x_range.range_padding = 0.1
p.xgrid.grid_line_color = None
p.y_range.start = 0
p.legend.location = "top_left"
p.legend.orientation = "horizontal"
p.axis.minor_tick_line_color = None
p.outline_line_color = None
show(p)
In [ ]:
from bokeh.models import ColumnDataSource
from bokeh.palettes import GnBu3, OrRd3
from bokeh.plotting import figure
fruits = ['Apples', 'Pears', 'Nectarines', 'Plums', 'Grapes', 'Strawberries']
years = ["2015", "2016", "2017"]
exports = {'fruits' : fruits,
'2015' : [2, 1, 4, 3, 2, 4],
'2016' : [5, 3, 4, 2, 4, 6],
'2017' : [3, 2, 4, 4, 5, 3]}
imports = {'fruits' : fruits,
'2015' : [-1, 0, -1, -3, -2, -1],
'2016' : [-2, -1, -3, -1, -2, -2],
'2017' : [-1, -2, -1, 0, -2, -2]}
p = figure(y_range=fruits, plot_height=250, x_range=(-16, 16), title="Fruit import/export, by year",
toolbar_location=None)
p.hbar_stack(years, y='fruits', height=0.9, color=GnBu3, source=ColumnDataSource(exports),
legend=["%s exports" % x for x in years])
p.hbar_stack(years, y='fruits', height=0.9, color=OrRd3, source=ColumnDataSource(imports),
legend=["%s imports" % x for x in years])
p.y_range.range_padding = 0.1
p.ygrid.grid_line_color = None
p.legend.location = "top_left"
p.axis.minor_tick_line_color = None
p.outline_line_color = None
show(p)
Whenever we use hierarchical categories, it is possible to use coordinates that refer to only the first portions of a factor. In this case, coordinates are centered inside the group appropriately.
The example below uses bars to show sales values for every month, grouped by quarter. Each bar has coordinates such as ("Q1", "jan")
, etc. Additionally a line displays the quarterly average trends, by using coordinates such as "Q1"
.
In [ ]:
factors = [
("Q1", "jan"), ("Q1", "feb"), ("Q1", "mar"),
("Q2", "apr"), ("Q2", "may"), ("Q2", "jun"),
("Q3", "jul"), ("Q3", "aug"), ("Q3", "sep"),
("Q4", "oct"), ("Q4", "nov"), ("Q4", "dec"),
]
p = figure(x_range=FactorRange(*factors), plot_height=250,
toolbar_location=None, tools="")
x = [ 10, 12, 16, 9, 10, 8, 12, 13, 14, 14, 12, 16 ]
p.vbar(x=factors, top=x, width=0.9, alpha=0.5)
p.line(x=["Q1", "Q2", "Q3", "Q4"], y=[12, 9, 13, 14], color="red", line_width=2)
p.y_range.start = 0
p.x_range.range_padding = 0.1
p.xaxis.major_label_orientation = 1
p.xgrid.grid_line_color = None
show(p)
In [ ]:
p = figure(x_range=FactorRange(*factors), plot_height=250,
toolbar_location=None, tools="")
regions = ['east', 'west']
source = ColumnDataSource(data=dict(
x=factors,
east=[ 5, 5, 6, 5, 5, 4, 5, 6, 7, 8, 6, 9 ],
west=[ 5, 7, 9, 4, 5, 4, 7, 7, 7, 6, 6, 7 ],
))
p.vbar_stack(regions, x='x', width=0.9, alpha=0.5, color=["blue", "red"], source=source,
legend=[value(x) for x in regions])
p.y_range.start = 0
p.y_range.end = 18
p.x_range.range_padding = 0.1
p.xaxis.major_label_orientation = 1
p.xgrid.grid_line_color = None
p.legend.location = "top_center"
p.legend.orientation = "horizontal"
show(p)
In [ ]:
from bokeh.sampledata.sprint import sprint
sprint.Year = sprint.Year.astype(str)
group = sprint.groupby('Year')
source = ColumnDataSource(group)
p = figure(y_range=group, x_range=(9.5,12.7), plot_width=400, plot_height=550, toolbar_location=None,
title="Time Spreads for Sprint Medalists (by Year)")
p.ygrid.grid_line_color = None
p.xaxis.axis_label = "Time (seconds)"
p.outline_line_color = None
p.hbar(y="Year", left='Time_min', right='Time_max', height=0.4, source=source)
show(p)
Although Pandas is not required to use Bokeh, using Pandas can make many things simpler. For instance, Pandas GroupBy
objects can be passed as the source
argument to a glyph (or used to initialize a ColumnDataSource
. When this is done, summary statistics for each group are automatically availanle in the data source.
In the example below we pass autompg.groupby(('cyl'))
as our source. Since the "autompg" DataFrame has and mpg
column, our grouped data source automatically has an mpg_mean
column we can use to drive glyphs.
In [ ]:
from bokeh.sampledata.autompg import autompg_clean as df
df.cyl = df.cyl.astype(str)
df.yr = df.yr.astype(str)
In [ ]:
from bokeh.palettes import Spectral5
from bokeh.transform import factor_cmap
group = df.groupby(('cyl'))
source = ColumnDataSource(group)
cyl_cmap = factor_cmap('cyl', palette=Spectral5, factors=sorted(df.cyl.unique()))
p = figure(plot_height=350, x_range=group, toolbar_location=None)
p.vbar(x='cyl', top='mpg_mean', width=1, line_color="white",
fill_color=cyl_cmap, source=source)
p.xgrid.grid_line_color = None
p.y_range.start = 0
p.xaxis.axis_label = "some stuff"
p.xaxis.major_label_orientation = 1.2
p.outline_line_color = None
show(p)
In [ ]:
from bokeh.models import HoverTool
from bokeh.palettes import Spectral5
from bokeh.transform import factor_cmap
group = df.groupby(by=['cyl', 'mfr'])
source = ColumnDataSource(group)
index_cmap = factor_cmap('cyl_mfr', palette=Spectral5, factors=sorted(df.cyl.unique()), end=1)
p = figure(plot_width=900, plot_height=400, x_range=group, toolbar_location=None,
title="Mean MPG by # Cylinders and Manufacturer")
p.vbar(x='cyl_mfr', top='mpg_mean', width=1, line_color="white",
fill_color=index_cmap, source=source)
p.x_range.range_padding = 0.05
p.xgrid.grid_line_color = None
p.y_range.start = 0
p.xaxis.axis_label = "Manufacturer grouped by # Cylinders"
p.xaxis.major_label_orientation = 1.2
p.outline_line_color = None
p.add_tools(HoverTool(tooltips=[("MPG", "@mpg_mean"), ("Cyl, Mfr", "@cyl_mfr")]))
show(p)
So far we have mostly plotted bars on categorical ranges, but other glyphs work as well. For instance we could plot a scatter plot of circles against a categorical range. Often times, such plots are improved by jittering the data along the categorical range. Bokeh provides a jitter
transform that can accomplish that.
The example below shows an individual GitHub commit history grouped by day of the week, and jittered to improve readability.
In [ ]:
import pandas as pd
from bokeh.transform import jitter
from bokeh.sampledata.commits import data
DAYS = ['Sun', 'Sat', 'Fri', 'Thu', 'Wed', 'Tue', 'Mon']
source = ColumnDataSource(data)
p = figure(plot_width=800, plot_height=300, y_range=DAYS, x_axis_type='datetime',
title="Commits by Time of Day (US/Central) 2012—2016")
p.circle(x='time', y=jitter('day', width=0.6, range=p.y_range), source=source, alpha=0.3)
p.xaxis[0].formatter.days = ['%Hh']
p.x_range.range_padding = 0
p.ygrid.grid_line_color = None
show(p)
Alternatively we might show the same data using bars, only giving a count per day.
In [ ]:
group = data.groupby('day')
source = ColumnDataSource(group)
p = figure(plot_width=800, plot_height=300, y_range=DAYS, x_range=(0, 1010),
title="Commits by Day of the Week, 2012—2016", toolbar_location=None)
p.hbar(y='day', right='time_count', height=0.9, source=source)
p.ygrid.grid_line_color = None
p.outline_line_color = None
show(p)
Another kind of common categorical plot is the Categorical Heatmap, which has categorical ranges on both axes. Typically colormapped or shaded rectangles are diplayed for each (x,y) categorical comnination.
The examples below demonstrates a catgorical heatmap using unemployment data.
In [ ]:
import pandas as pd
from bokeh.io import show
from bokeh.models import BasicTicker, ColorBar, ColumnDataSource, LinearColorMapper, PrintfTickFormatter
from bokeh.plotting import figure
from bokeh.sampledata.unemployment1948 import data
from bokeh.transform import transform
data.Year = data.Year.astype(str)
data = data.set_index('Year')
data.drop('Annual', axis=1, inplace=True)
data.columns.name = 'Month'
# reshape to 1D array or rates with a month and year for each row.
df = pd.DataFrame(data.stack(), columns=['rate']).reset_index()
source = ColumnDataSource(df)
# this is the colormap from the original NYTimes plot
colors = ["#75968f", "#a5bab7", "#c9d9d3", "#e2e2e2", "#dfccce", "#ddb7b1", "#cc7878", "#933b41", "#550b1d"]
mapper = LinearColorMapper(palette=colors, low=df.rate.min(), high=df.rate.max())
p = figure(title="US Unemployment 1948—2016", toolbar_location=None, tools="",
x_range=list(data.index), y_range=list(reversed(data.columns)),
x_axis_location="above", plot_width=900, plot_height=400)
p.axis.axis_line_color = None
p.axis.major_tick_line_color = None
p.axis.major_label_text_font_size = "5pt"
p.axis.major_label_standoff = 0
p.xaxis.major_label_orientation = 1.0
p.rect(x="Year", y="Month", width=1, height=1, source=source,
line_color=None, fill_color=transform('rate', mapper))
color_bar = ColorBar(color_mapper=mapper, location=(0, 0),
ticker=BasicTicker(desired_num_ticks=len(colors)),
formatter=PrintfTickFormatter(format="%d%%"))
p.add_layout(color_bar, 'right')
show(p)
In addition to heatmaps that use colormapping to shade each rectangle, a similar technique can be used to create various kinds of illustrations, for instance the example below uses Bokeh to make an interactive periodic table.
In [ ]:
from bokeh.io import output_file, show
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.plotting import figure
from bokeh.sampledata.periodic_table import elements
from bokeh.transform import dodge, factor_cmap
periods = ["I", "II", "III", "IV", "V", "VI", "VII"]
groups = [str(x) for x in range(1, 19)]
df = elements.copy()
df["atomic mass"] = df["atomic mass"].astype(str)
df["group"] = df["group"].astype(str)
df["period"] = [periods[x-1] for x in df.period]
df = df[df.group != "-"]
df = df[df.symbol != "Lr"]
df = df[df.symbol != "Lu"]
cmap = {
"alkali metal" : "#a6cee3",
"alkaline earth metal" : "#1f78b4",
"metal" : "#d93b43",
"halogen" : "#999d9a",
"metalloid" : "#e08d79",
"noble gas" : "#eaeaea",
"nonmetal" : "#f1d4Af",
"transition metal" : "#599d7A",
}
source = ColumnDataSource(df)
p = figure(title="Periodic Table (omitting LA and AC Series)", plot_width=900, plot_height=500,
tools="", toolbar_location=None,
x_range=groups, y_range=list(reversed(periods)))
p.rect("group", "period", 0.95, 0.95, source=source, fill_alpha=0.6, legend="metal",
color=factor_cmap('metal', palette=list(cmap.values()), factors=list(cmap.keys())))
text_props = {"source": source, "text_align": "left", "text_baseline": "middle"}
x = dodge("group", -0.4, range=p.x_range)
r = p.text(x=x, y="period", text="symbol", **text_props)
r.glyph.text_font_style="bold"
r = p.text(x=x, y=dodge("period", 0.3, range=p.y_range), text="atomic number", **text_props)
r.glyph.text_font_size="8pt"
r = p.text(x=x, y=dodge("period", -0.35, range=p.y_range), text="name", **text_props)
r.glyph.text_font_size="5pt"
r = p.text(x=x, y=dodge("period", -0.2, range=p.y_range), text="atomic mass", **text_props)
r.glyph.text_font_size="5pt"
p.text(x=["3", "3"], y=["VI", "VII"], text=["LA", "AC"], text_align="center", text_baseline="middle")
p.add_tools(HoverTool(tooltips = [
("Name", "@name"),
("Atomic number", "@{atomic number}"),
("Atomic mass", "@{atomic mass}"),
("Type", "@metal"),
("CPK color", "$color[hex, swatch]:CPK"),
("Electronic configuration", "@{electronic configuration}"),
]))
p.outline_line_color = None
p.grid.grid_line_color = None
p.axis.axis_line_color = None
p.axis.major_tick_line_color = None
p.axis.major_label_standoff = 0
p.legend.orientation = "horizontal"
p.legend.location ="top_center"
show(p)
We have seen above how the dodge
transform can be used to shift an entire column of categorical values. But it is possible to offset individual coordinates but putting the offset at the end of a tuple with a factor. For instance, if we have catefories "foo"
and "bar"
then
("foo", 0.1), ("foo", 0.2), ("bar", -0.3)
Are all examples of individual coordinates shifted on a per-coordinate basis.
This technique can be used to create "Ridge Plots" which show lines (or filled areas) for different categories.
In [ ]:
import colorcet as cc
from numpy import linspace
from scipy.stats.kde import gaussian_kde
from bokeh.sampledata.perceptions import probly
from bokeh.models import FixedTicker, PrintfTickFormatter
In [ ]:
probly.head()
In [ ]:
def ridge(category, data, scale=20):
''' For a given category and timeseries for that category, return categorical
coordiantes with offsets scaled by the timeseries.
'''
return list(zip([category]*len(data), scale*data))
In [ ]:
cats = list(reversed(probly.keys()))
palette = [cc.rainbow[i*15] for i in range(17)]
x = linspace(-20,110, 500)
source = ColumnDataSource(data=dict(x=x))
p = figure(y_range=cats, plot_width=900, x_range=(-5, 105), toolbar_location=None)
for i, cat in enumerate(reversed(cats)):
pdf = gaussian_kde(probly[cat])
y = ridge(cat, pdf(x))
source.add(y, cat)
p.patch('x', cat, color=palette[i], alpha=0.6, line_color="black", source=source)
p.outline_line_color = None
p.background_fill_color = "#efefef"
p.xaxis.ticker = FixedTicker(ticks=list(range(0, 101, 10)))
p.xaxis.formatter = PrintfTickFormatter(format="%d%%")
p.ygrid.grid_line_color = None
p.xgrid.grid_line_color = "#dddddd"
p.xgrid.ticker = p.xaxis[0].ticker
p.axis.minor_tick_line_color = None
p.axis.major_tick_line_color = None
p.axis.axis_line_color = None
p.y_range.range_padding = 0.12
show(p)
In [ ]: