In [1]:
import pandas as pd
%pylab inline
from mpltools import style
style.use('ggplot')
pylab.rcParams['figure.figsize'] = 16, 12
In [2]:
import glob
names = ['date', 'type', 'registration', 'operator' ,'fat', 'cat']
data = pd.DataFrame(columns=names)
for f in glob.glob('data/airline/*.csv'):
tmp = pd.read_csv(f, names=names, parse_dates=[0])
data = data.append(tmp, ignore_index=True)
data.head()
Out[2]:
In [3]:
data = data.dropna()
data.head()
Out[3]:
In [4]:
data.dtypes
Out[4]:
In [5]:
data.describe()
Out[5]:
In [6]:
fatal_accidents = data[data.fat>0]
In [7]:
fatal_accidents.describe()
Out[7]:
In [8]:
fatal_accidents['year'] = pd.DatetimeIndex(fatal_accidents['date']).year
In [9]:
deaths = fatal_accidents.groupby('year')['fat'].sum()
In [10]:
deaths.plot(kind='bar')
plt.xlabel('Year')
plt.ylabel('Deaths')
plt.title('No of deaths in airline accidents')
Out[10]:
In [11]:
fatal_accidents[(fatal_accidents.year == 1996) & (fatal_accidents.fat>50)]
Out[11]:
In [12]:
fatal_accidents['month'] = pd.DatetimeIndex(fatal_accidents['date']).month
deaths = fatal_accidents.groupby('month')['fat'].sum()
deaths.plot(kind='bar')
plt.xlabel('Month')
plt.ylabel('Deaths')
plt.title('No of deaths in airline accidents')
Out[12]:
In [13]:
data.registration
Out[13]: