Creating a waterfall chart using Bokeh

This is the notebook associated with the article at Pbpython.com


In [1]:
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import ColumnDataSource, LabelSet
from bokeh.models.formatters import NumeralTickFormatter
import pandas as pd

In [2]:
output_notebook()


Loading BokehJS ...

In [3]:
# Create the initial dataframe
index = ['sales','returns','credit fees','rebates','late charges','shipping']
data = {'amount': [350000,-30000,-7500,-25000,95000,-7000]}
df = pd.DataFrame(data=data,index=index)

# Determine the total net value by adding the start and all additional transactions
net = df['amount'].sum()

In [4]:
df


Out[4]:
amount
sales 350000
returns -30000
credit fees -7500
rebates -25000
late charges 95000
shipping -7000

In [5]:
# Create additional columns that we will use to build the waterfall
df['running_total'] = df['amount'].cumsum()
df['y_start'] = df['running_total'] - df['amount']

# Where do we want to place the label
df['label_pos'] = df['running_total']

In [6]:
df


Out[6]:
amount running_total y_start label_pos
sales 350000 350000 0 350000
returns -30000 320000 350000 320000
credit fees -7500 312500 320000 312500
rebates -25000 287500 312500 287500
late charges 95000 382500 287500 382500
shipping -7000 375500 382500 375500

In [7]:
# We need to have a net column at the end with the totals and a full bar
df_net = pd.DataFrame.from_records([(net, net, 0, net)], 
                                   columns=['amount', 'running_total', 'y_start', 'label_pos'],
                                   index=["net"])
df = df.append(df_net)

In [8]:
df


Out[8]:
amount running_total y_start label_pos
sales 350000 350000 0 350000
returns -30000 320000 350000 320000
credit fees -7500 312500 320000 312500
rebates -25000 287500 312500 287500
late charges 95000 382500 287500 382500
shipping -7000 375500 382500 375500
net 375500 375500 0 375500

In [9]:
# We want to color the positive values gray and the negative red
df['color'] = 'grey'
df.loc[df.amount < 0, 'color'] = 'red'

# The 10000 factor is used to make the text positioned correctly.
# You will need to modify if the values are significantly different
df.loc[df.amount < 0, 'label_pos'] = df.label_pos - 10000
df["bar_label"] = df["amount"].map('{:,.0f}'.format)

In [10]:
df


Out[10]:
amount running_total y_start label_pos color bar_label
sales 350000 350000 0 350000 grey 350,000
returns -30000 320000 350000 310000 red -30,000
credit fees -7500 312500 320000 302500 red -7,500
rebates -25000 287500 312500 277500 red -25,000
late charges 95000 382500 287500 382500 grey 95,000
shipping -7000 375500 382500 365500 red -7,000
net 375500 375500 0 375500 grey 375,500

In [11]:
# Build the Bokeh figure

# Limit the tools to only these three
TOOLS = "box_zoom,reset,save"

# Build the source data off the df dataframe
source = ColumnDataSource(df)

# Create the figure and assign range values that look good for the data set
p = figure(tools=TOOLS, x_range=list(df.index), y_range=(0, net+40000), plot_width=800, title = "Sales Waterfall")
p.grid.grid_line_alpha=0.3

# Add the segments
p.segment(x0='index', y0='y_start', x1="index", y1='running_total', source=source, color="color", line_width=55)

# Format the y-axis as dollars
p.yaxis[0].formatter = NumeralTickFormatter(format="($ 0 a)")
p.xaxis.axis_label = "Transactions"

# Add the labels
labels = LabelSet(x='index', y='label_pos', text='bar_label', text_font_size="8pt", level='glyph',
         x_offset=-20, y_offset=0, source=source)
p.add_layout(labels)

In [12]:
show(p)