In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style='whitegrid', color_codes=True)

Federal Office of Road Saftey: THE HISTORY OF ROAD FATALITIES IN AUSTRALIA

https://www.monash.edu/__data/assets/pdf_file/0020/216452/muarc237.pdf

THE VICTORIAN PARLIAMENTARY ROAD SAFETY COMMITTEE – A HISTORY OF INQUIRIES AND OUTCOMES: 2005 By Belinda Clark Narelle Haworth Michael Lenné

https://infrastructure.gov.au/roads/safety/publications/1998/pdf/Stats_Aust_8.pdf

Import data


In [2]:
data = pd.read_csv(r'data/Fatalities_July_2016_II.csv')
filter_age = data['Age']>=0
data  = data[filter_age]
data.head()


/home/jake/anaconda3/lib/python3.5/site-packages/IPython/core/interactiveshell.py:2723: DtypeWarning: Columns (11) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)
Out[2]:
State Date Day Month Year Dayweek Time Hour Minute Crash_Type Bus_Involvement Hvy_Rigid_Truck_Involvement Articulated_Truck_Involvement Speed_Limit Road_User Gender Age
0 SA 1-Jan-16 1 January 2016 Friday 1:00 1 0 Single No No No 110 Driver Male 40
1 VIC 1-Jan-16 1 January 2016 Friday 20:30 20 30 Single No No No 80 Motor cycle rider Male 25
2 NSW 2-Jan-16 2 January 2016 Saturday 0:30 0 30 Single No No No 100 Passenger Male 18
3 WA 2-Jan-16 2 January 2016 Saturday 17:20 17 20 Multiple No No Yes 110 Driver Male 53
4 TAS 2-Jan-16 2 January 2016 Saturday 19:58 19 58 Multiple No No No 80 Motor cycle rider Male 17

Make histogram Pipeline


In [19]:
def hist_plot(feature, data,
              title,
              rot_x_lbl=False,
              x_axis_scale=False,
              x_axis_interval=None,
              **kwargs):
    plot = sns.countplot(x=feature, data=data, **kwargs)
    if rot_x_lbl == True:
        plt.xticks(rotation = 90)
    if x_axis_scale==True:
        for label in plot.xaxis.get_ticklabels():
            label.set_visible(False)
        for label in plot.xaxis.get_ticklabels()[::x_axis_interval]:
            label.set_visible(True)
    plt.title(title)
    plt.show()

Generate gender plot


In [4]:
hist_plot('Gender', data, 'Fatalities by Gender since 1989')


Gender and agegroup plots


In [5]:
data_male=data[data['Gender']=='Male']
hist_plot('Age',
          data_male,
          'Male fatalities by age since 1989',
          x_axis_scale=True,
          x_axis_interval=5)



In [6]:
data_female=data[data['Gender']=='Female']
hist_plot('Age',
          data_female,
          'Female fatalities by age since 1989',
          x_axis_scale=True,
          x_axis_interval=5)


Generate plot for fatalaties by road user


In [21]:
hist_plot('Road_User',
          data,
          'Fatalities by user type since 1989',
          rot_x_lbl=True,
         order=['Driver', 
                'Passenger',
                'Pedestrian',
                'Motor cycle rider',
                'Bicyclist',
                'Motor cycle pillion passenger',
                'Other/-9',
                '-9'])


plots for 18 - 25 y.o. by gender

males:


In [8]:
z=data_male['Age'].isin([i for i in range(18,25)])
data_male_18_to_25=data_male[z]

In [9]:
hist_plot('Year',
          data_male_18_to_25,
          'Male fatalities in 18-25 age group since 1989',
          rot_x_lbl=True)


Females:


In [10]:
z=data_female['Age'].isin([i for i in range(18,25)])
data_female_18_to_25=data_female[z]

In [11]:
hist_plot('Year',
          data_female_18_to_25,
          'Female fatalities in 18-25 age group since 1989',
          rot_x_lbl=True)



In [12]:
hist_plot('Year',
         data,
         'Aggregate fatalities by year',
         rot_x_lbl=True)


Getting Victorian cyclist fatality data


In [13]:
z=data['Road_User']=='Bicyclist'
data_bike=data[z]
z=data_bike['State']=='VIC'
data_bike=data_bike[z]

Plotting:


In [14]:
hist_plot('Year',
         data_bike,
         'Bicyclist fatalities by year in Victoria',
         rot_x_lbl=True)


Interesting to see the number drop after Victoria was the first place in the world to enforce mandatory wearing of bicycle helments on the the road in 1990, although it would be good to compare it with data from pre 1989 to ensure 1989 and 1990 are not outliers.


In [15]:
z=data['Road_User']=='Bicyclist'
data_bike=data[z]

In [16]:
hist_plot('Year',
         data_bike,
         'Cyclist fatalities by year in Australia',
         rot_x_lbl=True)