In [1]:
#imports
import pandas as pd
import numpy as np
import csv
In [2]:
airlines_data = '~/Projects/fdp/data/airlines.csv'
airports_data = '~/Projects/fdp/data/airports.csv'
flights_data = '~/Projects/fdp/data/flights.csv'
In [3]:
airlines_df = pd.read_csv(airlines_data)
In [4]:
airports_df = pd.read_csv(airports_data)
In [5]:
flights_df = pd.read_csv(flights_data, dtype= {'ORIGIN_AIRPORT': str, 'DESTINATION_AIRPORT' : str})
In [6]:
flights_df.columns
Out[6]:
Index(['YEAR', 'MONTH', 'DAY', 'DAY_OF_WEEK', 'AIRLINE', 'FLIGHT_NUMBER',
'TAIL_NUMBER', 'ORIGIN_AIRPORT', 'DESTINATION_AIRPORT',
'SCHEDULED_DEPARTURE', 'DEPARTURE_TIME', 'DEPARTURE_DELAY', 'TAXI_OUT',
'WHEELS_OFF', 'SCHEDULED_TIME', 'ELAPSED_TIME', 'AIR_TIME', 'DISTANCE',
'WHEELS_ON', 'TAXI_IN', 'SCHEDULED_ARRIVAL', 'ARRIVAL_TIME',
'ARRIVAL_DELAY', 'DIVERTED', 'CANCELLED', 'CANCELLATION_REASON',
'AIR_SYSTEM_DELAY', 'SECURITY_DELAY', 'AIRLINE_DELAY',
'LATE_AIRCRAFT_DELAY', 'WEATHER_DELAY'],
dtype='object')
In [7]:
flights_df
Out[7]:
YEAR
MONTH
DAY
DAY_OF_WEEK
AIRLINE
FLIGHT_NUMBER
TAIL_NUMBER
ORIGIN_AIRPORT
DESTINATION_AIRPORT
SCHEDULED_DEPARTURE
...
ARRIVAL_TIME
ARRIVAL_DELAY
DIVERTED
CANCELLED
CANCELLATION_REASON
AIR_SYSTEM_DELAY
SECURITY_DELAY
AIRLINE_DELAY
LATE_AIRCRAFT_DELAY
WEATHER_DELAY
0
2015
1
1
4
AS
98
N407AS
ANC
SEA
5
...
408.0
-22.0
0
0
NaN
NaN
NaN
NaN
NaN
NaN
1
2015
1
1
4
AA
2336
N3KUAA
LAX
PBI
10
...
741.0
-9.0
0
0
NaN
NaN
NaN
NaN
NaN
NaN
2
2015
1
1
4
US
840
N171US
SFO
CLT
20
...
811.0
5.0
0
0
NaN
NaN
NaN
NaN
NaN
NaN
3
2015
1
1
4
AA
258
N3HYAA
LAX
MIA
20
...
756.0
-9.0
0
0
NaN
NaN
NaN
NaN
NaN
NaN
4
2015
1
1
4
AS
135
N527AS
SEA
ANC
25
...
259.0
-21.0
0
0
NaN
NaN
NaN
NaN
NaN
NaN
5
2015
1
1
4
DL
806
N3730B
SFO
MSP
25
...
610.0
8.0
0
0
NaN
NaN
NaN
NaN
NaN
NaN
6
2015
1
1
4
NK
612
N635NK
LAS
MSP
25
...
509.0
-17.0
0
0
NaN
NaN
NaN
NaN
NaN
NaN
7
2015
1
1
4
US
2013
N584UW
LAX
CLT
30
...
753.0
-10.0
0
0
NaN
NaN
NaN
NaN
NaN
NaN
8
2015
1
1
4
AA
1112
N3LAAA
SFO
DFW
30
...
532.0
-13.0
0
0
NaN
NaN
NaN
NaN
NaN
NaN
9
2015
1
1
4
DL
1173
N826DN
LAS
ATL
30
...
656.0
-15.0
0
0
NaN
NaN
NaN
NaN
NaN
NaN
10
2015
1
1
4
DL
2336
N958DN
DEN
ATL
30
...
453.0
-30.0
0
0
NaN
NaN
NaN
NaN
NaN
NaN
11
2015
1
1
4
AA
1674
N853AA
LAS
MIA
35
...
753.0
-10.0
0
0
NaN
NaN
NaN
NaN
NaN
NaN
12
2015
1
1
4
DL
1434
N547US
LAX
MSP
35
...
605.0
-4.0
0
0
NaN
NaN
NaN
NaN
NaN
NaN
13
2015
1
1
4
DL
2324
N3751B
SLC
ATL
40
...
553.0
-22.0
0
0
NaN
NaN
NaN
NaN
NaN
NaN
14
2015
1
1
4
DL
2440
N651DL
SEA
MSP
40
...
557.0
8.0
0
0
NaN
NaN
NaN
NaN
NaN
NaN
15
2015
1
1
4
AS
108
N309AS
ANC
SEA
45
...
455.0
-14.0
0
0
NaN
NaN
NaN
NaN
NaN
NaN
16
2015
1
1
4
DL
1560
N3743H
ANC
SEA
45
...
451.0
-24.0
0
0
NaN
NaN
NaN
NaN
NaN
NaN
17
2015
1
1
4
UA
1197
N78448
SFO
IAH
48
...
619.0
-7.0
0
0
NaN
NaN
NaN
NaN
NaN
NaN
18
2015
1
1
4
AS
122
N413AS
ANC
PDX
50
...
507.0
-18.0
0
0
NaN
NaN
NaN
NaN
NaN
NaN
19
2015
1
1
4
DL
1670
N806DN
PDX
MSP
50
...
551.0
-12.0
0
0
NaN
NaN
NaN
NaN
NaN
NaN
20
2015
1
1
4
NK
520
N525NK
LAS
MCI
55
...
543.0
6.0
0
0
NaN
NaN
NaN
NaN
NaN
NaN
21
2015
1
1
4
AA
371
N3GXAA
SEA
MIA
100
...
939.0
1.0
0
0
NaN
NaN
NaN
NaN
NaN
NaN
22
2015
1
1
4
NK
214
N632NK
LAS
DFW
103
...
529.0
-1.0
0
0
NaN
NaN
NaN
NaN
NaN
NaN
23
2015
1
1
4
AA
115
N3CTAA
LAX
MIA
105
...
839.0
-12.0
0
0
NaN
NaN
NaN
NaN
NaN
NaN
24
2015
1
1
4
DL
1450
N671DN
LAS
MSP
105
...
545.0
-23.0
0
0
NaN
NaN
NaN
NaN
NaN
NaN
25
2015
1
1
4
UA
1545
N76517
LAX
IAH
115
...
607.0
-11.0
0
0
NaN
NaN
NaN
NaN
NaN
NaN
26
2015
1
1
4
AS
130
N457AS
FAI
SEA
115
...
545.0
-3.0
0
0
NaN
NaN
NaN
NaN
NaN
NaN
27
2015
1
1
4
NK
597
N528NK
MSP
FLL
115
...
607.0
25.0
0
0
NaN
25.0
0.0
0.0
0.0
0.0
28
2015
1
1
4
US
413
N571UW
LAS
CLT
120
...
754.0
-31.0
0
0
NaN
NaN
NaN
NaN
NaN
NaN
29
2015
1
1
4
AA
2392
N3HRAA
DEN
MIA
120
...
709.0
2.0
0
0
NaN
NaN
NaN
NaN
NaN
NaN
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
5819049
2015
12
31
4
AA
1538
N866AA
LAX
MIA
2355
...
731.0
-17.0
0
0
NaN
NaN
NaN
NaN
NaN
NaN
5819050
2015
12
31
4
B6
136
N569JB
PHX
JFK
2355
...
607.0
-15.0
0
0
NaN
NaN
NaN
NaN
NaN
NaN
5819051
2015
12
31
4
AS
28
N477AS
SEA
MSP
2355
...
535.0
28.0
0
0
NaN
12.0
0.0
16.0
0.0
0.0
5819052
2015
12
31
4
AS
111
N586AS
LAS
ANC
2355
...
353.0
-62.0
0
0
NaN
NaN
NaN
NaN
NaN
NaN
5819053
2015
12
31
4
AS
121
N768AS
SEA
ANC
2355
...
224.0
-27.0
0
0
NaN
NaN
NaN
NaN
NaN
NaN
5819054
2015
12
31
4
AS
175
N431AS
LAX
ANC
2355
...
400.0
-45.0
0
0
NaN
NaN
NaN
NaN
NaN
NaN
5819055
2015
12
31
4
AS
471
N570AS
LAX
SEA
2355
...
225.0
-19.0
0
0
NaN
NaN
NaN
NaN
NaN
NaN
5819056
2015
12
31
4
AS
850
N536AS
KOA
SEA
2355
...
712.0
-38.0
0
0
NaN
NaN
NaN
NaN
NaN
NaN
5819057
2015
12
31
4
NK
371
N661NK
FLL
BQN
2358
...
346.0
28.0
0
0
NaN
15.0
0.0
13.0
0.0
0.0
5819058
2015
12
31
4
DL
2224
N332NW
PHX
DTW
2359
...
512.0
-31.0
0
0
NaN
NaN
NaN
NaN
NaN
NaN
5819059
2015
12
31
4
F9
710
N209FR
DEN
RSW
2359
...
518.0
-12.0
0
0
NaN
NaN
NaN
NaN
NaN
NaN
5819060
2015
12
31
4
F9
682
N701FR
DEN
MCO
2359
...
504.0
-21.0
0
0
NaN
NaN
NaN
NaN
NaN
NaN
5819061
2015
12
31
4
F9
566
N925FR
DEN
RDU
2359
...
446.0
-18.0
0
0
NaN
NaN
NaN
NaN
NaN
NaN
5819062
2015
12
31
4
F9
300
N949FR
DEN
TPA
2359
...
509.0
-9.0
0
0
NaN
NaN
NaN
NaN
NaN
NaN
5819063
2015
12
31
4
UA
516
N17245
SEA
IAH
2359
...
600.0
-6.0
0
0
NaN
NaN
NaN
NaN
NaN
NaN
5819064
2015
12
31
4
UA
668
N66848
LAS
IAD
2359
...
652.0
-29.0
0
0
NaN
NaN
NaN
NaN
NaN
NaN
5819065
2015
12
31
4
UA
671
N33209
PDX
IAH
2359
...
545.0
-19.0
0
0
NaN
NaN
NaN
NaN
NaN
NaN
5819066
2015
12
31
4
UA
1849
N497UA
LAS
EWR
2359
...
718.0
-24.0
0
0
NaN
NaN
NaN
NaN
NaN
NaN
5819067
2015
12
31
4
AA
2342
N3DLAA
DEN
MIA
2359
...
525.0
-18.0
0
0
NaN
NaN
NaN
NaN
NaN
NaN
5819068
2015
12
31
4
AA
219
N3LYAA
LAX
ORD
2359
...
544.0
-19.0
0
0
NaN
NaN
NaN
NaN
NaN
NaN
5819069
2015
12
31
4
B6
1248
N948JB
LAS
JFK
2359
...
1020.0
159.0
0
0
NaN
0.0
0.0
159.0
0.0
0.0
5819070
2015
12
31
4
B6
80
N584JB
RNO
JFK
2359
...
744.0
-21.0
0
0
NaN
NaN
NaN
NaN
NaN
NaN
5819071
2015
12
31
4
B6
802
N589JB
SLC
MCO
2359
...
625.0
17.0
0
0
NaN
1.0
0.0
16.0
0.0
0.0
5819072
2015
12
31
4
B6
98
N607JB
DEN
JFK
2359
...
519.0
-11.0
0
0
NaN
NaN
NaN
NaN
NaN
NaN
5819073
2015
12
31
4
B6
66
N655JB
ABQ
JFK
2359
...
549.0
3.0
0
0
NaN
NaN
NaN
NaN
NaN
NaN
5819074
2015
12
31
4
B6
688
N657JB
LAX
BOS
2359
...
753.0
-26.0
0
0
NaN
NaN
NaN
NaN
NaN
NaN
5819075
2015
12
31
4
B6
745
N828JB
JFK
PSE
2359
...
430.0
-16.0
0
0
NaN
NaN
NaN
NaN
NaN
NaN
5819076
2015
12
31
4
B6
1503
N913JB
JFK
SJU
2359
...
432.0
-8.0
0
0
NaN
NaN
NaN
NaN
NaN
NaN
5819077
2015
12
31
4
B6
333
N527JB
MCO
SJU
2359
...
330.0
-10.0
0
0
NaN
NaN
NaN
NaN
NaN
NaN
5819078
2015
12
31
4
B6
839
N534JB
JFK
BQN
2359
...
442.0
2.0
0
0
NaN
NaN
NaN
NaN
NaN
NaN
5819079 rows × 31 columns
In [8]:
airlines_df
Out[8]:
IATA_CODE
AIRLINE
0
UA
United Air Lines Inc.
1
AA
American Airlines Inc.
2
US
US Airways Inc.
3
F9
Frontier Airlines Inc.
4
B6
JetBlue Airways
5
OO
Skywest Airlines Inc.
6
AS
Alaska Airlines Inc.
7
NK
Spirit Air Lines
8
WN
Southwest Airlines Co.
9
DL
Delta Air Lines Inc.
10
EV
Atlantic Southeast Airlines
11
HA
Hawaiian Airlines Inc.
12
MQ
American Eagle Airlines Inc.
13
VX
Virgin America
In [9]:
airports_df
Out[9]:
IATA_CODE
AIRPORT
CITY
STATE
COUNTRY
LATITUDE
LONGITUDE
0
ABE
Lehigh Valley International Airport
Allentown
PA
USA
40.65236
-75.44040
1
ABI
Abilene Regional Airport
Abilene
TX
USA
32.41132
-99.68190
2
ABQ
Albuquerque International Sunport
Albuquerque
NM
USA
35.04022
-106.60919
3
ABR
Aberdeen Regional Airport
Aberdeen
SD
USA
45.44906
-98.42183
4
ABY
Southwest Georgia Regional Airport
Albany
GA
USA
31.53552
-84.19447
5
ACK
Nantucket Memorial Airport
Nantucket
MA
USA
41.25305
-70.06018
6
ACT
Waco Regional Airport
Waco
TX
USA
31.61129
-97.23052
7
ACV
Arcata Airport
Arcata/Eureka
CA
USA
40.97812
-124.10862
8
ACY
Atlantic City International Airport
Atlantic City
NJ
USA
39.45758
-74.57717
9
ADK
Adak Airport
Adak
AK
USA
51.87796
-176.64603
10
ADQ
Kodiak Airport
Kodiak
AK
USA
57.74997
-152.49386
11
AEX
Alexandria International Airport
Alexandria
LA
USA
31.32737
-92.54856
12
AGS
Augusta Regional Airport (Bush Field)
Augusta
GA
USA
33.36996
-81.96450
13
AKN
King Salmon Airport
King Salmon
AK
USA
58.67680
-156.64922
14
ALB
Albany International Airport
Albany
NY
USA
42.74812
-73.80298
15
ALO
Waterloo Regional Airport
Waterloo
IA
USA
42.55708
-92.40034
16
AMA
Rick Husband Amarillo International Airport
Amarillo
TX
USA
35.21937
-101.70593
17
ANC
Ted Stevens Anchorage International Airport
Anchorage
AK
USA
61.17432
-149.99619
18
APN
Alpena County Regional Airport
Alpena
MI
USA
45.07807
-83.56029
19
ASE
Aspen-Pitkin County Airport
Aspen
CO
USA
39.22316
-106.86885
20
ATL
Hartsfield-Jackson Atlanta International Airport
Atlanta
GA
USA
33.64044
-84.42694
21
ATW
Appleton International Airport
Appleton
WI
USA
44.25741
-88.51948
22
AUS
Austin-Bergstrom International Airport
Austin
TX
USA
30.19453
-97.66987
23
AVL
Asheville Regional Airport
Asheville
NC
USA
35.43619
-82.54181
24
AVP
Wilkes-Barre/Scranton International Airport
Wilkes-Barre/Scranton
PA
USA
41.33815
-75.72427
25
AZO
Kalamazoo/Battle Creek International Airport
Kalamazoo
MI
USA
42.23488
-85.55206
26
BDL
Bradley International Airport
Windsor Locks
CT
USA
41.93887
-72.68323
27
BET
Bethel Airport
Bethel
AK
USA
60.77978
-161.83800
28
BFL
Meadows Field
Bakersfield
CA
USA
35.43360
-119.05677
29
BGM
Greater Binghamton Airport
Binghamton
NY
USA
42.20848
-75.97961
...
...
...
...
...
...
...
...
292
SRQ
Sarasota-Bradenton International Airport
Sarasota
FL
USA
27.39533
-82.55411
293
STC
St. Cloud Regional Airport
St Cloud
MN
USA
45.54532
-94.05834
294
STL
St. Louis International Airport at Lambert Field
St Louis
MO
USA
38.74769
-90.35999
295
STT
Cyril E. King Airport
Charlotte Amalie
VI
USA
18.33731
-64.97336
296
STX
Henry E. Rohlsen Airport
Christiansted
VI
USA
17.70189
-64.79856
297
SUN
Friedman Memorial Airport
Hailey
ID
USA
43.50484
-114.29659
298
SUX
Sioux Gateway Airport
Sioux City
IA
USA
42.40260
-96.38437
299
SWF
Stewart International Airport
Newburgh
NY
USA
41.50409
-74.10484
300
SYR
Syracuse Hancock International Airport
Syracuse
NY
USA
43.11119
-76.10631
301
TLH
Tallahassee International Airport
Tallahassee
FL
USA
30.39653
-84.35033
302
TOL
Toledo Express Airport
Toledo
OH
USA
41.58681
-83.80783
303
TPA
Tampa International Airport
Tampa
FL
USA
27.97547
-82.53325
304
TRI
Tri-Cities Regional Airport
Bristol
TN
USA
36.47521
-82.40742
305
TTN
Trenton Mercer Airport
Trenton
NJ
USA
40.27669
-74.81347
306
TUL
Tulsa International Airport
Tulsa
OK
USA
36.19837
-95.88824
307
TUS
Tucson International Airport
Tucson
AZ
USA
32.11608
-110.94103
308
TVC
Cherry Capital Airport
Traverse City
MI
USA
44.74144
-85.58224
309
TWF
Magic Valley Regional Airport (Joslin Field)
Twin Falls
ID
USA
42.48180
-114.48774
310
TXK
Texarkana Regional Airport (Webb Field)
Texarkana
AR
USA
33.45371
-93.99102
311
TYR
Tyler Pounds Regional Airport
Tyler
TX
USA
32.35414
-95.40239
312
TYS
McGhee Tyson Airport
Knoxville
TN
USA
35.81249
-83.99286
313
UST
Northeast Florida Regional Airport (St. August...
St. Augustine
FL
USA
NaN
NaN
314
VEL
Valdez Airport
Vernal
UT
USA
40.44090
-109.50992
315
VLD
Valdosta Regional Airport
Valdosta
GA
USA
30.78250
-83.27672
316
VPS
Destin-Fort Walton Beach Airport/Eglin AFB
Valparaiso
FL
USA
30.48325
-86.52540
317
WRG
Wrangell Airport
Wrangell
AK
USA
56.48433
-132.36982
318
WYS
Westerly State Airport
West Yellowstone
MT
USA
44.68840
-111.11764
319
XNA
Northwest Arkansas Regional Airport
Fayetteville/Springdale/Rogers
AR
USA
36.28187
-94.30681
320
YAK
Yakutat Airport
Yakutat
AK
USA
59.50336
-139.66023
321
YUM
Yuma International Airport
Yuma
AZ
USA
32.65658
-114.60597
322 rows × 7 columns
In [28]:
# reduce columns
ftimes_df = flights_df[['AIRLINE', 'ORIGIN_AIRPORT', 'DESTINATION_AIRPORT', 'ELAPSED_TIME']]
ftimes_df.columns
Out[28]:
Index(['AIRLINE', 'ORIGIN_AIRPORT', 'DESTINATION_AIRPORT', 'ELAPSED_TIME'], dtype='object')
In [29]:
# reduce rows to only three airlines
ftimes_df = ftimes_df[(ftimes_df['AIRLINE'] == "UA") |
(ftimes_df['AIRLINE'] == "F9") |
(ftimes_df['AIRLINE'] == "EV")]
ftimes_df
Out[29]:
AIRLINE
ORIGIN_AIRPORT
DESTINATION_AIRPORT
ELAPSED_TIME
17
UA
SFO
IAH
217.0
25
UA
LAX
IAH
175.0
37
UA
SJU
EWR
241.0
49
UA
BQN
EWR
247.0
63
UA
ORD
IAH
182.0
72
EV
MKE
IAH
189.0
76
EV
BRO
IAH
72.0
77
EV
VPS
ATL
57.0
81
EV
BNA
ATL
70.0
83
UA
PHL
IAH
234.0
84
UA
PDX
DEN
142.0
85
UA
SEA
DEN
157.0
87
EV
SBN
ORD
44.0
88
F9
PHL
MCO
146.0
92
UA
SFO
DEN
153.0
93
UA
ORD
DEN
163.0
94
UA
IAD
IAH
215.0
98
EV
CRP
IAH
52.0
99
EV
PIA
DTW
75.0
100
EV
ORD
DTW
67.0
102
UA
SMF
DEN
164.0
109
EV
ATL
IAD
103.0
110
EV
JAX
EWR
132.0
114
F9
MSN
DEN
144.0
116
UA
GEG
DEN
136.0
117
UA
SEA
IAH
266.0
118
UA
DCA
IAH
197.0
125
EV
CHS
EWR
122.0
126
F9
MKE
DEN
142.0
127
F9
MSP
DEN
128.0
...
...
...
...
...
5818981
UA
SFO
MSY
246.0
5818982
F9
LAS
MIA
259.0
5818984
UA
PDX
IAH
234.0
5818992
UA
IAH
SJU
243.0
5818993
UA
LAX
IAD
251.0
5818996
F9
LAX
ATL
295.0
5819001
UA
LAX
IAH
170.0
5819006
UA
LAS
ORD
185.0
5819007
UA
SFO
ATL
284.0
5819010
UA
PHX
EWR
242.0
5819013
F9
LAS
STL
167.0
5819015
UA
ANC
DEN
306.0
5819018
UA
HNL
LAX
313.0
5819019
F9
LAS
MCO
239.0
5819024
UA
SFO
IAH
202.0
5819028
UA
DEN
EWR
186.0
5819034
UA
HNL
SFO
295.0
5819040
UA
LAX
EWR
274.0
5819041
UA
SFO
EWR
301.0
5819043
F9
DEN
PBI
200.0
5819044
F9
DEN
ATL
147.0
5819048
UA
LAX
ORD
220.0
5819059
F9
DEN
RSW
207.0
5819060
F9
DEN
MCO
190.0
5819061
F9
DEN
RDU
170.0
5819062
F9
DEN
TPA
184.0
5819063
UA
SEA
IAH
239.0
5819064
UA
LAS
IAD
236.0
5819065
UA
PDX
IAH
233.0
5819066
UA
LAS
EWR
261.0
1178536 rows × 4 columns
In [30]:
# reduce rows to only three destination
ftimes_df = ftimes_df[(ftimes_df['DESTINATION_AIRPORT'] == "ORD") |
(ftimes_df['DESTINATION_AIRPORT'] == "IAH") |
(ftimes_df['DESTINATION_AIRPORT'] == "ATL")]
ftimes_df
Out[30]:
AIRLINE
ORIGIN_AIRPORT
DESTINATION_AIRPORT
ELAPSED_TIME
17
UA
SFO
IAH
217.0
25
UA
LAX
IAH
175.0
63
UA
ORD
IAH
182.0
72
EV
MKE
IAH
189.0
76
EV
BRO
IAH
72.0
77
EV
VPS
ATL
57.0
81
EV
BNA
ATL
70.0
83
UA
PHL
IAH
234.0
87
EV
SBN
ORD
44.0
94
UA
IAD
IAH
215.0
98
EV
CRP
IAH
52.0
117
UA
SEA
IAH
266.0
118
UA
DCA
IAH
197.0
137
UA
CLE
IAH
198.0
145
EV
MCI
IAH
138.0
148
EV
RIC
IAH
220.0
154
EV
CID
ATL
114.0
163
EV
MCI
ORD
83.0
164
EV
LAN
ORD
75.0
169
UA
LAX
IAH
178.0
173
UA
PBI
IAH
172.0
174
UA
PHX
IAH
160.0
175
UA
RSW
IAH
155.0
177
UA
OMA
ORD
79.0
182
UA
PIT
IAH
197.0
183
UA
DEN
IAH
146.0
191
UA
SFO
ORD
256.0
193
UA
PDX
IAH
242.0
194
UA
BOS
ORD
165.0
199
UA
LAX
ORD
227.0
...
...
...
...
...
5817723
UA
ORD
IAH
166.0
5817748
UA
IAH
ORD
132.0
5817755
EV
AEX
IAH
58.0
5817757
UA
IAH
ATL
107.0
5817838
EV
HRL
IAH
61.0
5817843
EV
BRO
IAH
69.0
5817865
F9
MIA
ATL
134.0
5817893
UA
EWR
IAH
229.0
5818013
F9
ORD
ATL
104.0
5818091
UA
HNL
IAH
430.0
5818133
UA
EWR
ATL
137.0
5818156
UA
EWR
ORD
148.0
5818383
UA
ANC
ORD
345.0
5818454
F9
DEN
IAH
143.0
5818597
F9
PHL
ATL
132.0
5818899
UA
SAN
ORD
205.0
5818947
F9
LAS
ORD
198.0
5818964
UA
SEA
ORD
236.0
5818973
UA
SFO
ORD
241.0
5818979
UA
LAX
ORD
225.0
5818984
UA
PDX
IAH
234.0
5818996
F9
LAX
ATL
295.0
5819001
UA
LAX
IAH
170.0
5819006
UA
LAS
ORD
185.0
5819007
UA
SFO
ATL
284.0
5819024
UA
SFO
IAH
202.0
5819044
F9
DEN
ATL
147.0
5819048
UA
LAX
ORD
220.0
5819063
UA
SEA
IAH
239.0
5819065
UA
PDX
IAH
233.0
287053 rows × 4 columns
In [31]:
# reduce rows to only one origin airports
ftimes_df = ftimes_df[(ftimes_df['ORIGIN_AIRPORT'] == "IAD")]
ftimes_df
Out[31]:
AIRLINE
ORIGIN_AIRPORT
DESTINATION_AIRPORT
ELAPSED_TIME
94
UA
IAD
IAH
215.0
1397
UA
IAD
ORD
139.0
1756
UA
IAD
IAH
218.0
1830
EV
IAD
ATL
115.0
3121
UA
IAD
ORD
123.0
5897
F9
IAD
ATL
110.0
5993
UA
IAD
ORD
121.0
6561
EV
IAD
ATL
112.0
9824
UA
IAD
ORD
119.0
10169
UA
IAD
IAH
238.0
13523
EV
IAD
ATL
111.0
13546
EV
IAD
IAH
222.0
13696
F9
IAD
ORD
132.0
14122
UA
IAD
IAH
212.0
16456
F9
IAD
ORD
125.0
16615
UA
IAD
ORD
127.0
16854
UA
IAD
IAH
220.0
16962
EV
IAD
ATL
122.0
18534
UA
IAD
ORD
124.0
19409
F9
IAD
ATL
118.0
21678
UA
IAD
ORD
109.0
24966
EV
IAD
ATL
124.0
25789
UA
IAD
ORD
128.0
26165
UA
IAD
IAH
227.0
30145
EV
IAD
ATL
127.0
30197
EV
IAD
IAH
NaN
30852
UA
IAD
IAH
258.0
33005
F9
IAD
ORD
124.0
33025
UA
IAD
ORD
124.0
33372
UA
IAD
IAH
241.0
...
...
...
...
...
5764611
UA
IAD
IAH
210.0
5767801
F9
IAD
ATL
146.0
5768865
UA
IAD
IAH
194.0
5768925
UA
IAD
ORD
187.0
5773888
UA
IAD
IAH
200.0
5776164
UA
IAD
IAH
194.0
5776958
UA
IAD
ORD
129.0
5777924
EV
IAD
ATL
106.0
5778031
UA
IAD
ORD
NaN
5780820
UA
IAD
IAH
204.0
5784005
F9
IAD
ATL
109.0
5785069
UA
IAD
IAH
196.0
5785125
UA
IAD
ORD
107.0
5790092
UA
IAD
IAH
205.0
5792386
UA
IAD
IAH
205.0
5793066
UA
IAD
ORD
NaN
5794142
EV
IAD
ATL
156.0
5794253
UA
IAD
ORD
NaN
5797056
UA
IAD
IAH
212.0
5798947
EV
IAD
ATL
103.0
5800238
F9
IAD
ATL
146.0
5801305
UA
IAD
IAH
208.0
5801363
UA
IAD
ORD
118.0
5806274
UA
IAD
IAH
218.0
5808222
UA
IAD
IAH
213.0
5809966
UA
IAD
ORD
114.0
5812504
UA
IAD
IAH
216.0
5815477
F9
IAD
ATL
111.0
5816331
UA
IAD
IAH
213.0
5816369
UA
IAD
ORD
126.0
3668 rows × 4 columns
In [ ]:
# ftimes_df = flights_df.groupby(['AIRLINE'], as_index=True)['ELAPSED_TIME'].mean().reset_index()
In [ ]:
# ftimes_df
In [32]:
ftimes_df = ftimes_df.groupby(['AIRLINE', 'DESTINATION_AIRPORT'], as_index=True)['ELAPSED_TIME'].mean().reset_index()
In [33]:
ftimes_df
Out[33]:
AIRLINE
DESTINATION_AIRPORT
ELAPSED_TIME
0
EV
ATL
117.310734
1
EV
IAH
217.524590
2
EV
ORD
128.888889
3
F9
ATL
115.952381
4
F9
ORD
120.947619
5
UA
ATL
109.315789
6
UA
IAH
192.006923
7
UA
ORD
119.410381
In [70]:
means_ev = ftimes_df.ELAPSED_TIME[(ftimes_df.AIRLINE == "EV")].tolist()
means_f9 = ftimes_df.ELAPSED_TIME[(ftimes_df.AIRLINE == "F9")].tolist()
means_ua = ftimes_df.ELAPSED_TIME[(ftimes_df.AIRLINE == "UA")].tolist()
means_f9.insert(1, 0.0)
print(means_f9)
[115.95238095238095, 0.0, 120.94761904761904]
In [57]:
# import numpy as np
# import matplotlib.pyplot as plt
# %matplotlib inline
# ft.plot.bar()
In [73]:
import numpy as np
import matplotlib.pyplot as plt
n_groups = 3
# create plot
fig, ax = plt.subplots()
index = np.arange(n_groups)
bar_width = 0.25
opacity = 0.8
rects1 = plt.bar(index, means_ev, bar_width,
alpha=opacity,
color='b',
label='AtlanticSE')
rects2 = plt.bar(index + bar_width, means_ua, bar_width,
alpha=opacity,
color='r',
label='United')
rects3 = plt.bar(index + 2*bar_width, means_f9, bar_width,
alpha=opacity,
color='g',
label='Frontier')
plt.xlabel('Airport')
plt.ylabel('Minutes')
plt.title('Comparison of Flight Times from Dulles Airport')
plt.xticks(index + bar_width, ('ATL', 'IAH', 'ORD'))
plt.legend()
plt.tight_layout()
plt.show()
In [ ]:
# from bokeh.plotting import figure, output_file, show
# import bokeh.plotting as bk
# bk.output_notebook()
# x=ftimes_df['AIRLINE']
# y=ftimes_df['ELAPSED_TIME']
# p = figure()
# p. circle(x,y,fill_color="red",size=2)
# #p.line(x,y,line_width=2)
# #output_file("scatter.html")
# bk.show(p)
In [ ]:
Content source: jrentsch/2015_flight_analysis
Similar notebooks: