This is the notebook associated with the article at Pbpython.com
In [1]:
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import ColumnDataSource, LabelSet
from bokeh.models.formatters import NumeralTickFormatter
import pandas as pd
In [2]:
output_notebook()
In [3]:
# Create the initial dataframe
index = ['sales','returns','credit fees','rebates','late charges','shipping']
data = {'amount': [350000,-30000,-7500,-25000,95000,-7000]}
df = pd.DataFrame(data=data,index=index)
# Determine the total net value by adding the start and all additional transactions
net = df['amount'].sum()
In [4]:
df
Out[4]:
In [5]:
# Create additional columns that we will use to build the waterfall
df['running_total'] = df['amount'].cumsum()
df['y_start'] = df['running_total'] - df['amount']
# Where do we want to place the label
df['label_pos'] = df['running_total']
In [6]:
df
Out[6]:
In [7]:
# We need to have a net column at the end with the totals and a full bar
df_net = pd.DataFrame.from_records([(net, net, 0, net)],
columns=['amount', 'running_total', 'y_start', 'label_pos'],
index=["net"])
df = df.append(df_net)
In [8]:
df
Out[8]:
In [9]:
# We want to color the positive values gray and the negative red
df['color'] = 'grey'
df.loc[df.amount < 0, 'color'] = 'red'
# The 10000 factor is used to make the text positioned correctly.
# You will need to modify if the values are significantly different
df.loc[df.amount < 0, 'label_pos'] = df.label_pos - 10000
df["bar_label"] = df["amount"].map('{:,.0f}'.format)
In [10]:
df
Out[10]:
In [11]:
# Build the Bokeh figure
# Limit the tools to only these three
TOOLS = "box_zoom,reset,save"
# Build the source data off the df dataframe
source = ColumnDataSource(df)
# Create the figure and assign range values that look good for the data set
p = figure(tools=TOOLS, x_range=list(df.index), y_range=(0, net+40000), plot_width=800, title = "Sales Waterfall")
p.grid.grid_line_alpha=0.3
# Add the segments
p.segment(x0='index', y0='y_start', x1="index", y1='running_total', source=source, color="color", line_width=55)
# Format the y-axis as dollars
p.yaxis[0].formatter = NumeralTickFormatter(format="($ 0 a)")
p.xaxis.axis_label = "Transactions"
# Add the labels
labels = LabelSet(x='index', y='label_pos', text='bar_label', text_font_size="8pt", level='glyph',
x_offset=-20, y_offset=0, source=source)
p.add_layout(labels)
In [12]:
show(p)