In [1]:
import pandas as pd
%pylab inline
from mpltools import style
style.use('ggplot')
pylab.rcParams['figure.figsize'] = 16, 12


Populating the interactive namespace from numpy and matplotlib

In [2]:
import glob

names = ['date', 'type', 'registration', 'operator' ,'fat', 'cat']

data = pd.DataFrame(columns=names)

for f in glob.glob('data/airline/*.csv'):
    tmp = pd.read_csv(f, names=names, parse_dates=[0])
    data = data.append(tmp, ignore_index=True)
    
data.head()


Out[2]:
date type registration operator fat cat
0 1995-02-01 Boeing 737-298C 9Q-CNI Air Zaire 0 A1
1 1995-03-01 DHC-6 Twin Otter 310 P2-IAA Islands Nationair 0 A1
2 1995-04-01 Fokker NaN Sudan Airways 0 H2
3 1995-05-01 Lockheed L-1329-25 JetStar II 1003 Iran AF 12 A1
4 1995-05-01 Fokker 50 LN-BBA Norwegian Air Shuttle opf. Braathens 0 A2

In [3]:
data = data.dropna()
data.head()


Out[3]:
date type registration operator fat cat
0 1995-02-01 Boeing 737-298C 9Q-CNI Air Zaire 0 A1
1 1995-03-01 DHC-6 Twin Otter 310 P2-IAA Islands Nationair 0 A1
3 1995-05-01 Lockheed L-1329-25 JetStar II 1003 Iran AF 12 A1
4 1995-05-01 Fokker 50 LN-BBA Norwegian Air Shuttle opf. Braathens 0 A2
5 1995-10-01 DHC-6 Twin Otter 300 PK-NUK Merpati Nusantara 14 A1

In [4]:
data.dtypes


Out[4]:
date            datetime64[ns]
type                    object
registration            object
operator                object
fat                    float64
cat                     object
dtype: object

In [5]:
data.describe()


Out[5]:
fat
count 2403.000000
mean 6.635040
std 24.869157
min 0.000000
25% 0.000000
50% 0.000000
75% 3.000000
max 312.000000

Accident Categories:

A = Accident
I = Incident
H = Hijacking
C = Criminal occurrence (sabotage, shoot down)
O = other occurrence (ground fire, sabotage)

1 = hull-loss
2 = repairable damage

E.g. the A1 category means an Accident resulting in a total loss of the plane.


In [6]:
fatal_accidents = data[data.fat>0]

In [7]:
fatal_accidents.describe()


Out[7]:
fat
count 908.000000
mean 17.559471
std 38.024506
min 1.000000
25% 2.000000
50% 5.000000
75% 13.000000
max 312.000000

In [8]:
fatal_accidents['year'] = pd.DatetimeIndex(fatal_accidents['date']).year

In [9]:
deaths = fatal_accidents.groupby('year')['fat'].sum()

In [10]:
deaths.plot(kind='bar')
plt.xlabel('Year')
plt.ylabel('Deaths')
plt.title('No of deaths in airline accidents')


Out[10]:
<matplotlib.text.Text at 0x109d70990>

In [11]:
fatal_accidents[(fatal_accidents.year == 1996) & (fatal_accidents.fat>50)]


Out[11]:
date type registration operator fat cat year
205 1996-08-01 Antonov 32B RA-26222 Moscow Airlines opf. African Air 237 A1 1996
216 1996-06-02 Boeing 757-225 TC-GEN Birgenair opf. Alas Nacionales 189 A1 1996
230 1996-02-29 Boeing 737-222 OB-1451 Faucett 123 A1 1996
262 1996-03-05 Antonov 24RV ST-FAG Federal Airlines 53 A1 1996
267 1996-11-05 DC-9-32 N904VJ ValuJet Airlines 110 A1 1996
307 1996-07-17 Boeing 747-131 N93119 TWA 230 A1 1996
340 1996-08-29 Tupolev 154M RA-85621 Vnukovo Airlines 141 A1 1996
351 1996-02-10 Boeing 757-23A N52AW AeroPeru 70 A1 1996
366 1996-10-31 Fokker 100 PT-MRK TAM 99 A1 1996
372 1996-07-11 Boeing 727-231 5N-BBG ADC Airlines 144 A1 1996
373 1996-12-11 Boeing 747-168B HZ-AIH Saudi Arabian 312 A1 1996
383 1996-11-23 Boeing 767-260ER ET-AIZ Ethiopian Airlines 125 H1 1996

In [12]:
fatal_accidents['month'] = pd.DatetimeIndex(fatal_accidents['date']).month
deaths = fatal_accidents.groupby('month')['fat'].sum()
deaths.plot(kind='bar')
plt.xlabel('Month')
plt.ylabel('Deaths')
plt.title('No of deaths in airline accidents')


Out[12]:
<matplotlib.text.Text at 0x10a5af910>

In [13]:
data.registration


Out[13]:
0       9Q-CNI
1       P2-IAA
3         1003
4       LN-BBA
5       PK-NUK
6     HK-3839X
7       N746FE
8       C-GPUN
9       N754FE
10    RA-87565
11      N622SP
12      PK-JHF
14      9N-ABI
15      N9461B
16    UR-48074
...
2552     AP-BGN
2553     5Y-CET
2554     9Q-CYA
2555     8R-GGY
2556    19 blue
2557     5A-LAS
2558     5A-DNG
2559     9M-MRD
2560     UR-BNK
2561     5A-LAB
2562     5A-ONF
2563     S2-AFN
2564    B-22810
2565     EC-LTV
2566     PT-TAW
Name: registration, Length: 2403, dtype: object