In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style='whitegrid', color_codes=True)
Federal Office of Road Saftey: THE HISTORY OF ROAD FATALITIES IN AUSTRALIA
https://www.monash.edu/__data/assets/pdf_file/0020/216452/muarc237.pdf
THE VICTORIAN PARLIAMENTARY ROAD SAFETY COMMITTEE – A HISTORY OF INQUIRIES AND OUTCOMES: 2005 By Belinda Clark Narelle Haworth Michael Lenné
https://infrastructure.gov.au/roads/safety/publications/1998/pdf/Stats_Aust_8.pdf
In [2]:
data = pd.read_csv(r'data/Fatalities_July_2016_II.csv')
filter_age = data['Age']>=0
data = data[filter_age]
data.head()
Out[2]:
In [19]:
def hist_plot(feature, data,
title,
rot_x_lbl=False,
x_axis_scale=False,
x_axis_interval=None,
**kwargs):
plot = sns.countplot(x=feature, data=data, **kwargs)
if rot_x_lbl == True:
plt.xticks(rotation = 90)
if x_axis_scale==True:
for label in plot.xaxis.get_ticklabels():
label.set_visible(False)
for label in plot.xaxis.get_ticklabels()[::x_axis_interval]:
label.set_visible(True)
plt.title(title)
plt.show()
In [4]:
hist_plot('Gender', data, 'Fatalities by Gender since 1989')
In [5]:
data_male=data[data['Gender']=='Male']
hist_plot('Age',
data_male,
'Male fatalities by age since 1989',
x_axis_scale=True,
x_axis_interval=5)
In [6]:
data_female=data[data['Gender']=='Female']
hist_plot('Age',
data_female,
'Female fatalities by age since 1989',
x_axis_scale=True,
x_axis_interval=5)
In [21]:
hist_plot('Road_User',
data,
'Fatalities by user type since 1989',
rot_x_lbl=True,
order=['Driver',
'Passenger',
'Pedestrian',
'Motor cycle rider',
'Bicyclist',
'Motor cycle pillion passenger',
'Other/-9',
'-9'])
In [8]:
z=data_male['Age'].isin([i for i in range(18,25)])
data_male_18_to_25=data_male[z]
In [9]:
hist_plot('Year',
data_male_18_to_25,
'Male fatalities in 18-25 age group since 1989',
rot_x_lbl=True)
Females:
In [10]:
z=data_female['Age'].isin([i for i in range(18,25)])
data_female_18_to_25=data_female[z]
In [11]:
hist_plot('Year',
data_female_18_to_25,
'Female fatalities in 18-25 age group since 1989',
rot_x_lbl=True)
In [12]:
hist_plot('Year',
data,
'Aggregate fatalities by year',
rot_x_lbl=True)
Getting Victorian cyclist fatality data
In [13]:
z=data['Road_User']=='Bicyclist'
data_bike=data[z]
z=data_bike['State']=='VIC'
data_bike=data_bike[z]
Plotting:
In [14]:
hist_plot('Year',
data_bike,
'Bicyclist fatalities by year in Victoria',
rot_x_lbl=True)
Interesting to see the number drop after Victoria was the first place in the world to enforce mandatory wearing of bicycle helments on the the road in 1990, although it would be good to compare it with data from pre 1989 to ensure 1989 and 1990 are not outliers.
In [15]:
z=data['Road_User']=='Bicyclist'
data_bike=data[z]
In [16]:
hist_plot('Year',
data_bike,
'Cyclist fatalities by year in Australia',
rot_x_lbl=True)