In [ ]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
In [ ]:
allDeaths = pd.read_excel("data/all-deaths.xls")
print(allDeaths.shape)
allDeaths.head()
In [ ]:
allDeathsTOS = allDeaths[allDeaths['EpisodeID'].str.contains("tos")]
print(allDeathsTOS.size)
In [ ]:
totals = allDeathsTOS.groupby('DeathBy')['BodyCount'].sum()
#returned a serires, so build a data frame and then sort in ascending order for plotting later
totalDeaths = pd.DataFrame({'DeathBy': totals.index,'TotalBodyCount': totals.values}).sort_values('TotalBodyCount')
totalDeaths.tail()
The concept for this chart is borrowed from http://thestartrekproject.net/files/Star_Trek/ch4/miscellanea-chapter-mockup%2012.pdf
In [ ]:
from bokeh.plotting import figure, output_notebook, show, ColumnDataSource
from bokeh.models import HoverTool
output_notebook()
# spiral parameters
a = 0.45
b = 0.15
# bubble size and spacing
spacing = 0.01
size=np.log10(1.0+totalDeaths['TotalBodyCount'])
# convert bubble size and spacing to arclengths
arclength = np.cumsum(2*size+spacing)
# solve for polar angle using spiral arclength equation
theta = np.log(b*arclength/(a*np.sqrt(1+np.power(b,2))))/b
# solve for polar radius using logrithmic spiral equation
r = a*np.exp(b*theta)
# cartesian
x=r * np.cos(theta)
y=r * np.sin(theta)
# build column data source for bokeh
source = ColumnDataSource(
data=dict(
x=x,
y=y,
bodyCount=totalDeaths['TotalBodyCount'],
size=size,
color=["#%02x%02x%02x" % (int(red), int(green), 150) for red, green in zip(np.floor(100+2*x), np.floor(30+2*y))],
desc=totalDeaths['DeathBy'].tolist(),
)
)
# setup hover tool for contextual labels
hover = HoverTool(
tooltips=[
("Body Count", "@bodyCount"),
("Desc", "@desc"),
]
)
# create the figure
p = figure(plot_width=800, plot_height=800, tools=[hover],
title="Death By")
# create the bubble scatter plot
p.scatter('x', 'y', radius='size', fill_color='color',
source=source, fill_alpha=0.8, line_color=None)
# display the figure
show(p)
In [ ]: