In [1]:
%matplotlib inline
from datetime import datetime
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
plt.style.use('ggplot')
In [2]:
cols = ['id', 'type', 'number', 'date','location', 'country',
'lat', 'long', 'airport_code', 'airport_name', 'injury_severity',
'aircraft_damage', 'aircraft_cat', 'reg_no', 'make', 'model',
'amateur_built', 'no_engines', 'engine_type', 'FAR_desc', 'schedule',
'purpose', 'air_carrier', 'fatal','serious', 'minor', 'uninjured',
'weather', 'broad_phase', 'report_status', 'pub_date', 'none'] # causion: 'none' col is needed
aaData = pd.read_csv('aviation.csv', delimiter='|', skiprows=1, names=cols)
In [3]:
aaData.head()
Out[3]:
In [4]:
aaData['datetime'] = [datetime.strptime(d, ' %m/%d/%Y ') for d in aaData['date']]
aaData['datetime'].head()
Out[4]:
In [5]:
aaData['month'] = [int(d.month) for d in aaData['datetime']]
aaData['year'] = [int(d.year) for d in aaData['datetime']]
In [6]:
def decyear(date):
start = datetime(year=date.year, month=1, day=1)
end = datetime(year=date.year+1, month=1, day=1)
decimal = (date - start) / (end - start)
return date.year + decimal
In [7]:
aaData['decyear'] = aaData['datetime'].apply(decyear)
In [8]:
cols = ['lat', 'long', 'fatal', 'serious', 'minor', 'uninjured']
aaData[cols] = aaData[cols].applymap(
lambda x: np.nan if x.isspace() else float(x))
In [9]:
plt.figure(figsize=(9,4.5))
plt.step(aaData['decyear'], aaData['fatal'], lw=1.75, where='mid', alpha=0.5, label='Fatal')
plt.step(aaData['decyear'], aaData['minor']+200, lw=1.75, where='mid', alpha=0.5, label='Minor')
plt.step(aaData['decyear'], aaData['serious']+200*2, lw=1.75, where='mid', alpha=0.5, label='Serious')
plt.xticks(rotation=45);
plt.legend(loc=(0.01,.4),fontsize=13)
plt.ylim((-10,600))
plt.grid(axis='x')
plt.title('Accident injuries {0}-{1}'.format(aaData['year'].min(), aaData['year'].max()))
plt.text(0.2,0.92,'source: NTSB', size=12, transform=plt.gca().transAxes, ha='right')
plt.yticks(np.arange(0,600,100), [0,100,0,100,0,100])
plt.xlabel('Year')
plt.ylabel('# injuries recorded')
plt.xlim((aaData['decyear'].min()-0.5, aaData['decyear'].max()+0.5))
Out[9]:
In [10]:
plt.figure(figsize=(9,3))
plt.subplot(121)
year_selection = (aaData['year']>=1975) & (aaData['year']<=2016)
plt.hist(aaData[year_selection]['year'], bins=np.arange(1975,2016+2,1), align='mid')
plt.xlabel('Year')
plt.xticks(rotation=45);
plt.ylabel('Accidents recorded')
plt.subplot(122)
year_selection = (aaData['year']>=1976) & (aaData['year']<=1986)
plt.hist(aaData[year_selection]['year'], bins=np.arange(1976,1986+2,1), align='mid')
plt.xlabel('Year')
plt.xticks(rotation=45);
In [12]:
print aaData[aaData['year']<=1981].shape
aaData = aaData[aaData['year']>1981]
plt.figure(figsize=(9, 4.5))
plt.step(aaData['decyear'], aaData['fatal'], lw=1.75, where='mid', alpha=0.5, label='Fatal')
plt.step(aaData['decyear'], aaData['minor']+200, lw=1.75, where='mid', alpha=0.5, label='Minor')
plt.step(aaData['decyear'], aaData['serious']+200*2, lw=1.75, where='mid', alpha=0.5, label='Serious')
plt.xticks(rotation=45);
plt.legend(loc=(.8, .74), fontsize=13)
plt.ylim((-10, 600))
plt.grid(axis='x')
plt.title('Accident injuries {0}-{1}'.format(aaData['year'].min(), aaData['year'].max()))
plt.text(0.2, 0.92, 'source: NTSB', size=12, transform=plt.gca().transAxes, ha='right')
plt.yticks(np.arange(0, 600, 100), [0, 100, 0, 100, 0, 100]);
plt.xlabel('Year')
plt.ylabel('# injuries recorded')
plt.xlim((aaData['decyear'].min()-0.5, aaData['decyear'].max()+0.5));
In [13]:
bins = np.arange(aaData.year.min(), aaData.year.max()+1, 1)
yearly_dig = aaData.groupby(np.digitize(aaData.year, bins))
In [15]:
yearly_dig.max().head()
Out[15]:
In [25]:
plt.figure(figsize=(12, 12))
plt.subplot(311)
data = yearly_dig.max()
p = plt.stackplot(data.year, np.row_stack((data['fatal'], data['serious'], data['minor'])))
plt.legend(p, ['fatal', 'serious', 'minor'])
plt.subplot(312)
data = yearly_dig.mean()
p = plt.stackplot(data.year, np.row_stack((data['fatal'], data['serious'], data['minor'])))
plt.legend(p, ['fatal', 'serious', 'minor'])
plt.subplot(313)
data = yearly_dig.min()
p = plt.stackplot(data.year, np.row_stack((data['fatal'], data['serious'], data['minor'])))
plt.legend(p, ['fatal', 'serious', 'minor'])
Out[25]:
In [ ]: