Creating a waterfall chart using Bokeh

This is the notebook associated with the article at Pbpython.com



In [1]:

    
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import ColumnDataSource, LabelSet
from bokeh.models.formatters import NumeralTickFormatter
import pandas as pd



In [2]:

    
output_notebook()









    





    
        
        Loading BokehJS ...



In [3]:

    
# Create the initial dataframe
index = ['sales','returns','credit fees','rebates','late charges','shipping']
data = {'amount': [350000,-30000,-7500,-25000,95000,-7000]}
df = pd.DataFrame(data=data,index=index)

# Determine the total net value by adding the start and all additional transactions
net = df['amount'].sum()



In [4]:

    
df









    Out[4]:







  
    
      
      amount
    
  
  
    
      sales
      350000
    
    
      returns
      -30000
    
    
      credit fees
      -7500
    
    
      rebates
      -25000
    
    
      late charges
      95000
    
    
      shipping
      -7000



In [5]:

    
# Create additional columns that we will use to build the waterfall
df['running_total'] = df['amount'].cumsum()
df['y_start'] = df['running_total'] - df['amount']

# Where do we want to place the label
df['label_pos'] = df['running_total']



In [6]:

    
df









    Out[6]:







  
    
      
      amount
      running_total
      y_start
      label_pos
    
  
  
    
      sales
      350000
      350000
      0
      350000
    
    
      returns
      -30000
      320000
      350000
      320000
    
    
      credit fees
      -7500
      312500
      320000
      312500
    
    
      rebates
      -25000
      287500
      312500
      287500
    
    
      late charges
      95000
      382500
      287500
      382500
    
    
      shipping
      -7000
      375500
      382500
      375500



In [7]:

    
# We need to have a net column at the end with the totals and a full bar
df_net = pd.DataFrame.from_records([(net, net, 0, net)], 
                                   columns=['amount', 'running_total', 'y_start', 'label_pos'],
                                   index=["net"])
df = df.append(df_net)



In [8]:

    
df









    Out[8]:







  
    
      
      amount
      running_total
      y_start
      label_pos
    
  
  
    
      sales
      350000
      350000
      0
      350000
    
    
      returns
      -30000
      320000
      350000
      320000
    
    
      credit fees
      -7500
      312500
      320000
      312500
    
    
      rebates
      -25000
      287500
      312500
      287500
    
    
      late charges
      95000
      382500
      287500
      382500
    
    
      shipping
      -7000
      375500
      382500
      375500
    
    
      net
      375500
      375500
      0
      375500



In [9]:

    
# We want to color the positive values gray and the negative red
df['color'] = 'grey'
df.loc[df.amount < 0, 'color'] = 'red'

# The 10000 factor is used to make the text positioned correctly.
# You will need to modify if the values are significantly different
df.loc[df.amount < 0, 'label_pos'] = df.label_pos - 10000
df["bar_label"] = df["amount"].map('{:,.0f}'.format)



In [10]:

    
df









    Out[10]:







  
    
      
      amount
      running_total
      y_start
      label_pos
      color
      bar_label
    
  
  
    
      sales
      350000
      350000
      0
      350000
      grey
      350,000
    
    
      returns
      -30000
      320000
      350000
      310000
      red
      -30,000
    
    
      credit fees
      -7500
      312500
      320000
      302500
      red
      -7,500
    
    
      rebates
      -25000
      287500
      312500
      277500
      red
      -25,000
    
    
      late charges
      95000
      382500
      287500
      382500
      grey
      95,000
    
    
      shipping
      -7000
      375500
      382500
      365500
      red
      -7,000
    
    
      net
      375500
      375500
      0
      375500
      grey
      375,500



In [11]:

    
# Build the Bokeh figure

# Limit the tools to only these three
TOOLS = "box_zoom,reset,save"

# Build the source data off the df dataframe
source = ColumnDataSource(df)

# Create the figure and assign range values that look good for the data set
p = figure(tools=TOOLS, x_range=list(df.index), y_range=(0, net+40000), plot_width=800, title = "Sales Waterfall")
p.grid.grid_line_alpha=0.3

# Add the segments
p.segment(x0='index', y0='y_start', x1="index", y1='running_total', source=source, color="color", line_width=55)

# Format the y-axis as dollars
p.yaxis[0].formatter = NumeralTickFormatter(format="($ 0 a)")
p.xaxis.axis_label = "Transactions"

# Add the labels
labels = LabelSet(x='index', y='label_pos', text='bar_label', text_font_size="8pt", level='glyph',
         x_offset=-20, y_offset=0, source=source)
p.add_layout(labels)



In [12]:

    
show(p)

	amount
sales	350000
returns	-30000
credit fees	-7500
rebates	-25000
late charges	95000
shipping	-7000