In [ ]:
# Import the Python libraries we need
import pandas as pd
import numpy as np
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
In [ ]:
# Import the data
accidents_data_file = '/Users/robert.dempsey/Dropbox/Private/Art of Skill Hacking/Books/' \
'Python Business Intelligence Cookbook/Data/Stats19-Data1979-2004/Accidents7904.csv'
accidents = pd.read_csv(accidents_data_file,
sep=',',
header=0,
index_col=False,
parse_dates=True,
tupleize_cols=False,
error_bad_lines=False,
warn_bad_lines=True,
skip_blank_lines=True,
low_memory=False
)
In [ ]:
fig = plt.figure()
ax = fig.add_subplot(111)
ax.hist(accidents['Weather_Conditions'],
range=(accidents['Weather_Conditions'].min(),
accidents['Weather_Conditions'].max()))
counts, bins, patches = ax.hist(accidents['Weather_Conditions'],
facecolor='green',
edgecolor='gray')
ax.set_xticks(bins)
plt.title('Weather Conditions Distribution')
plt.xlabel('Weather Condition')
plt.ylabel('Count of Weather Condition')
plt.savefig('dashboard/charts/weather-conditions-distribution.png')
In [ ]:
accidents.boxplot(column='Light_Conditions',
return_type='dict');
plt.savefig('dashboard/charts/light-conditions-boxplot.png')
In [ ]:
# Create a box plot of the light conditions grouped by weather conditions
accidents.boxplot(column='Light_Conditions',
by = 'Weather_Conditions',
return_type='dict');
plt.savefig('dashboard/charts/lc-by-wc-boxplot.png')
In [ ]:
# Create a dataframe containing the total number of casualties by date
casualty_count = accidents.groupby('Date').agg({'Number_of_Casualties': np.sum})
# Convert the index to a DateTimeIndex
casualty_count.index = pd.to_datetime(casualty_count.index)
# Sort the index so the plot looks correct
casualty_count.sort_index(inplace=True,
ascending=True)
In [ ]:
# Plot all the data
casualty_count.plot(figsize=(18, 4))
plt.savefig('dashboard/charts/casualty-count-all.png')
In [ ]:
# Plot one year of the data
casualty_count['2000'].plot(figsize=(18, 4))
plt.savefig('dashboard/charts/casualty-count-2000.png')
In [ ]:
# Plot the yearly total casualty count for each year in the 1980's
the1980s = casualty_count['1980-01-01':'1989-12-31'].groupby(casualty_count['1980-01-01':'1989-12-31'].index.year).sum()
the1980s.plot(figsize=(18, 4))
plt.savefig('dashboard/charts/casualty-count-1980s.png')
In [ ]: