Bokeh Demo

Portland Data Science Meetup, November 2014


In [9]:
import bokeh.charts as bc
import bokeh
import pandas as pd
import qgrid

bokeh.load_notebook()
qgrid.nbinstall()

# Going to look at the same Snowtel data, using Bokeh renderer
df = pd.read_csv('mthood_snotel.csv', header=7, parse_dates=['Date']).dropna()
indexed = df.set_index('Date')
resampled = indexed.resample('MS')
resampled['Month'] = resampled.index.month
monthly_grouped = resampled.groupby('Month').mean()
qgrid.show_grid(monthly_grouped, remote_js=True)


BokehJS successfully loaded.

Warning: BokehJS previously loaded


In [2]:
# We can use Bokeh to look at temperatures grouped by month
temp_df = monthly_grouped[['Air Temperature Maximum (degF)', 'Air Temperature Minimum (degF)', 
                           'Air Temperature Average (degF)']]
# Hack: Bokeh expects a string index
temp_df.index = temp_df.index.astype(str)
bar = bc.Bar(temp_df, title="Temperatures by Month", 
             xlabel="Months", ylabel="Temps (F)", 
             legend=True, width=800, height=600, notebook=True,
             tools="pan,wheel_zoom,box_zoom,reset,resize")
# I have no idea why the tools don't show up
bar.show()



In [3]:
# It's easy to stack bars as well
bar = bc.Bar(temp_df, title="Temperatures by Month", 
             xlabel="Months", ylabel="Temps (F)", 
             stacked=True,
             legend=True, width=800, height=600, notebook=True)
bar.show()



In [4]:
# Note: Bokeh will *not* take a series here, must be a DataFrame
hist = bc.Histogram(df[['Air Temperature Average (degF)']], 50,
                    xlabel="Average Air Temp (F)", ylabel="Dist", 
                    notebook=True)
hist.show()



In [5]:
# Multiple "layered" histograms
hist = bc.Histogram(df[['Air Temperature Average (degF)', 'Air Temperature Maximum (degF)',
                        'Air Temperature Minimum (degF)']], 50,
                    xlabel="Air Temp (F)", ylabel="Dist", legend=True, 
                    notebook=True)
hist.show()



In [6]:
scatter = bc.Scatter(df[['Air Temperature Average (degF)', 'Precipitation Increment (in)']])
# scatter.show()
# This fails because I do not have a hierarchical index. This should work for simple x vs. y scatterplots
# Which API is the canonical one? It feels like I'm resorting to old API here
import bokeh.plotting as bp
scatterplot = bp.scatter(df['Air Temperature Average (degF)'], df['Precipitation Increment (in)'])
bp.show()
# Why do I get new tools for this chart?

In [7]:
times = bc.TimeSeries(resampled)
# times.show()
# This will fail, again because it expects a Hierarchical index. Why?
import numpy as np
times = bp.line(resampled.index, resampled['Air Temperature Average (degF)'])
# Not sure why the datetime axis is not working.
bp.show()

In [8]:
from IPython.core.display import HTML

# Use the following if running locally:
# styles = open("styles/custom.css", "r").read()

# This is for nbviewer:
styles = open("custom.css", "r").read()

HTML(styles)


Out[8]: