In [1]:
    
import pandas as pd
%pylab inline
from mpltools import style
style.use('ggplot')
pylab.rcParams['figure.figsize'] = 16, 12
    
    
In [2]:
    
import glob
names = ['date', 'type', 'registration', 'operator' ,'fat', 'cat']
data = pd.DataFrame(columns=names)
for f in glob.glob('data/airline/*.csv'):
    tmp = pd.read_csv(f, names=names, parse_dates=[0])
    data = data.append(tmp, ignore_index=True)
    
data.head()
    
    Out[2]:
In [3]:
    
data = data.dropna()
data.head()
    
    Out[3]:
In [4]:
    
data.dtypes
    
    Out[4]:
In [5]:
    
data.describe()
    
    Out[5]:
In [6]:
    
fatal_accidents = data[data.fat>0]
    
In [7]:
    
fatal_accidents.describe()
    
    Out[7]:
In [8]:
    
fatal_accidents['year'] = pd.DatetimeIndex(fatal_accidents['date']).year
    
In [9]:
    
deaths = fatal_accidents.groupby('year')['fat'].sum()
    
In [10]:
    
deaths.plot(kind='bar')
plt.xlabel('Year')
plt.ylabel('Deaths')
plt.title('No of deaths in airline accidents')
    
    Out[10]:
    
In [11]:
    
fatal_accidents[(fatal_accidents.year == 1996) & (fatal_accidents.fat>50)]
    
    Out[11]:
In [12]:
    
fatal_accidents['month'] = pd.DatetimeIndex(fatal_accidents['date']).month
deaths = fatal_accidents.groupby('month')['fat'].sum()
deaths.plot(kind='bar')
plt.xlabel('Month')
plt.ylabel('Deaths')
plt.title('No of deaths in airline accidents')
    
    Out[12]:
    
In [13]:
    
data.registration
    
    Out[13]: