Star Trek Causes of Death

Data and inspiration from www.thestartrekproject.net

Required Libraries


In [ ]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

Read in the Data


In [ ]:
allDeaths = pd.read_excel("data/all-deaths.xls")
print(allDeaths.shape)
allDeaths.head()

Filter to Just Explore "The Original Star Trek"


In [ ]:
allDeathsTOS = allDeaths[allDeaths['EpisodeID'].str.contains("tos")]
print(allDeathsTOS.size)

Group By Cause of Death and Sum the Body Count


In [ ]:
totals = allDeathsTOS.groupby('DeathBy')['BodyCount'].sum()

#returned a serires, so build a data frame and then sort in ascending order for plotting later
totalDeaths = pd.DataFrame({'DeathBy': totals.index,'TotalBodyCount': totals.values}).sort_values('TotalBodyCount')

totalDeaths.tail()

Build a Spiral Bubble Plot

The concept for this chart is borrowed from http://thestartrekproject.net/files/Star_Trek/ch4/miscellanea-chapter-mockup%2012.pdf


In [ ]:
from bokeh.plotting import figure, output_notebook, show, ColumnDataSource
from bokeh.models import HoverTool

output_notebook()

# spiral parameters
a = 0.45
b = 0.15

# bubble size and spacing
spacing = 0.01
size=np.log10(1.0+totalDeaths['TotalBodyCount'])

# convert bubble size and spacing to arclengths
arclength = np.cumsum(2*size+spacing)

# solve for polar angle using spiral arclength equation
theta = np.log(b*arclength/(a*np.sqrt(1+np.power(b,2))))/b

# solve for polar radius using logrithmic spiral equation
r = a*np.exp(b*theta)

# cartesian
x=r * np.cos(theta)
y=r * np.sin(theta)

# build column data source for bokeh
source = ColumnDataSource(
        data=dict(
            x=x,
            y=y,
            bodyCount=totalDeaths['TotalBodyCount'],
            size=size,
            color=["#%02x%02x%02x" % (int(red), int(green), 150) for red, green in zip(np.floor(100+2*x), np.floor(30+2*y))],
            desc=totalDeaths['DeathBy'].tolist(),
        )
    )

# setup hover tool for contextual labels
hover = HoverTool(
        tooltips=[
            ("Body Count", "@bodyCount"),
            ("Desc", "@desc"),
        ]
    )

# create the figure
p = figure(plot_width=800, plot_height=800, tools=[hover],
           title="Death By")

# create the bubble scatter plot
p.scatter('x', 'y', radius='size', fill_color='color',
          source=source, fill_alpha=0.8, line_color=None)

# display the figure
show(p)

In [ ]: