In [1]:
#imports
import pandas as pd
import numpy as np
import csv

In [2]:
airlines_data = '~/Projects/fdp/data/airlines.csv'
airports_data = '~/Projects/fdp/data/airports.csv'
flights_data = '~/Projects/fdp/data/flights.csv'

In [3]:
airlines_df = pd.read_csv(airlines_data)

In [4]:
airports_df = pd.read_csv(airports_data)

In [5]:
flights_df = pd.read_csv(flights_data, dtype= {'ORIGIN_AIRPORT': str, 'DESTINATION_AIRPORT' : str})

In [6]:
flights_df.columns


Out[6]:
Index(['YEAR', 'MONTH', 'DAY', 'DAY_OF_WEEK', 'AIRLINE', 'FLIGHT_NUMBER',
       'TAIL_NUMBER', 'ORIGIN_AIRPORT', 'DESTINATION_AIRPORT',
       'SCHEDULED_DEPARTURE', 'DEPARTURE_TIME', 'DEPARTURE_DELAY', 'TAXI_OUT',
       'WHEELS_OFF', 'SCHEDULED_TIME', 'ELAPSED_TIME', 'AIR_TIME', 'DISTANCE',
       'WHEELS_ON', 'TAXI_IN', 'SCHEDULED_ARRIVAL', 'ARRIVAL_TIME',
       'ARRIVAL_DELAY', 'DIVERTED', 'CANCELLED', 'CANCELLATION_REASON',
       'AIR_SYSTEM_DELAY', 'SECURITY_DELAY', 'AIRLINE_DELAY',
       'LATE_AIRCRAFT_DELAY', 'WEATHER_DELAY'],
      dtype='object')

In [7]:
flights_df


Out[7]:
YEAR MONTH DAY DAY_OF_WEEK AIRLINE FLIGHT_NUMBER TAIL_NUMBER ORIGIN_AIRPORT DESTINATION_AIRPORT SCHEDULED_DEPARTURE ... ARRIVAL_TIME ARRIVAL_DELAY DIVERTED CANCELLED CANCELLATION_REASON AIR_SYSTEM_DELAY SECURITY_DELAY AIRLINE_DELAY LATE_AIRCRAFT_DELAY WEATHER_DELAY
0 2015 1 1 4 AS 98 N407AS ANC SEA 5 ... 408.0 -22.0 0 0 NaN NaN NaN NaN NaN NaN
1 2015 1 1 4 AA 2336 N3KUAA LAX PBI 10 ... 741.0 -9.0 0 0 NaN NaN NaN NaN NaN NaN
2 2015 1 1 4 US 840 N171US SFO CLT 20 ... 811.0 5.0 0 0 NaN NaN NaN NaN NaN NaN
3 2015 1 1 4 AA 258 N3HYAA LAX MIA 20 ... 756.0 -9.0 0 0 NaN NaN NaN NaN NaN NaN
4 2015 1 1 4 AS 135 N527AS SEA ANC 25 ... 259.0 -21.0 0 0 NaN NaN NaN NaN NaN NaN
5 2015 1 1 4 DL 806 N3730B SFO MSP 25 ... 610.0 8.0 0 0 NaN NaN NaN NaN NaN NaN
6 2015 1 1 4 NK 612 N635NK LAS MSP 25 ... 509.0 -17.0 0 0 NaN NaN NaN NaN NaN NaN
7 2015 1 1 4 US 2013 N584UW LAX CLT 30 ... 753.0 -10.0 0 0 NaN NaN NaN NaN NaN NaN
8 2015 1 1 4 AA 1112 N3LAAA SFO DFW 30 ... 532.0 -13.0 0 0 NaN NaN NaN NaN NaN NaN
9 2015 1 1 4 DL 1173 N826DN LAS ATL 30 ... 656.0 -15.0 0 0 NaN NaN NaN NaN NaN NaN
10 2015 1 1 4 DL 2336 N958DN DEN ATL 30 ... 453.0 -30.0 0 0 NaN NaN NaN NaN NaN NaN
11 2015 1 1 4 AA 1674 N853AA LAS MIA 35 ... 753.0 -10.0 0 0 NaN NaN NaN NaN NaN NaN
12 2015 1 1 4 DL 1434 N547US LAX MSP 35 ... 605.0 -4.0 0 0 NaN NaN NaN NaN NaN NaN
13 2015 1 1 4 DL 2324 N3751B SLC ATL 40 ... 553.0 -22.0 0 0 NaN NaN NaN NaN NaN NaN
14 2015 1 1 4 DL 2440 N651DL SEA MSP 40 ... 557.0 8.0 0 0 NaN NaN NaN NaN NaN NaN
15 2015 1 1 4 AS 108 N309AS ANC SEA 45 ... 455.0 -14.0 0 0 NaN NaN NaN NaN NaN NaN
16 2015 1 1 4 DL 1560 N3743H ANC SEA 45 ... 451.0 -24.0 0 0 NaN NaN NaN NaN NaN NaN
17 2015 1 1 4 UA 1197 N78448 SFO IAH 48 ... 619.0 -7.0 0 0 NaN NaN NaN NaN NaN NaN
18 2015 1 1 4 AS 122 N413AS ANC PDX 50 ... 507.0 -18.0 0 0 NaN NaN NaN NaN NaN NaN
19 2015 1 1 4 DL 1670 N806DN PDX MSP 50 ... 551.0 -12.0 0 0 NaN NaN NaN NaN NaN NaN
20 2015 1 1 4 NK 520 N525NK LAS MCI 55 ... 543.0 6.0 0 0 NaN NaN NaN NaN NaN NaN
21 2015 1 1 4 AA 371 N3GXAA SEA MIA 100 ... 939.0 1.0 0 0 NaN NaN NaN NaN NaN NaN
22 2015 1 1 4 NK 214 N632NK LAS DFW 103 ... 529.0 -1.0 0 0 NaN NaN NaN NaN NaN NaN
23 2015 1 1 4 AA 115 N3CTAA LAX MIA 105 ... 839.0 -12.0 0 0 NaN NaN NaN NaN NaN NaN
24 2015 1 1 4 DL 1450 N671DN LAS MSP 105 ... 545.0 -23.0 0 0 NaN NaN NaN NaN NaN NaN
25 2015 1 1 4 UA 1545 N76517 LAX IAH 115 ... 607.0 -11.0 0 0 NaN NaN NaN NaN NaN NaN
26 2015 1 1 4 AS 130 N457AS FAI SEA 115 ... 545.0 -3.0 0 0 NaN NaN NaN NaN NaN NaN
27 2015 1 1 4 NK 597 N528NK MSP FLL 115 ... 607.0 25.0 0 0 NaN 25.0 0.0 0.0 0.0 0.0
28 2015 1 1 4 US 413 N571UW LAS CLT 120 ... 754.0 -31.0 0 0 NaN NaN NaN NaN NaN NaN
29 2015 1 1 4 AA 2392 N3HRAA DEN MIA 120 ... 709.0 2.0 0 0 NaN NaN NaN NaN NaN NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
5819049 2015 12 31 4 AA 1538 N866AA LAX MIA 2355 ... 731.0 -17.0 0 0 NaN NaN NaN NaN NaN NaN
5819050 2015 12 31 4 B6 136 N569JB PHX JFK 2355 ... 607.0 -15.0 0 0 NaN NaN NaN NaN NaN NaN
5819051 2015 12 31 4 AS 28 N477AS SEA MSP 2355 ... 535.0 28.0 0 0 NaN 12.0 0.0 16.0 0.0 0.0
5819052 2015 12 31 4 AS 111 N586AS LAS ANC 2355 ... 353.0 -62.0 0 0 NaN NaN NaN NaN NaN NaN
5819053 2015 12 31 4 AS 121 N768AS SEA ANC 2355 ... 224.0 -27.0 0 0 NaN NaN NaN NaN NaN NaN
5819054 2015 12 31 4 AS 175 N431AS LAX ANC 2355 ... 400.0 -45.0 0 0 NaN NaN NaN NaN NaN NaN
5819055 2015 12 31 4 AS 471 N570AS LAX SEA 2355 ... 225.0 -19.0 0 0 NaN NaN NaN NaN NaN NaN
5819056 2015 12 31 4 AS 850 N536AS KOA SEA 2355 ... 712.0 -38.0 0 0 NaN NaN NaN NaN NaN NaN
5819057 2015 12 31 4 NK 371 N661NK FLL BQN 2358 ... 346.0 28.0 0 0 NaN 15.0 0.0 13.0 0.0 0.0
5819058 2015 12 31 4 DL 2224 N332NW PHX DTW 2359 ... 512.0 -31.0 0 0 NaN NaN NaN NaN NaN NaN
5819059 2015 12 31 4 F9 710 N209FR DEN RSW 2359 ... 518.0 -12.0 0 0 NaN NaN NaN NaN NaN NaN
5819060 2015 12 31 4 F9 682 N701FR DEN MCO 2359 ... 504.0 -21.0 0 0 NaN NaN NaN NaN NaN NaN
5819061 2015 12 31 4 F9 566 N925FR DEN RDU 2359 ... 446.0 -18.0 0 0 NaN NaN NaN NaN NaN NaN
5819062 2015 12 31 4 F9 300 N949FR DEN TPA 2359 ... 509.0 -9.0 0 0 NaN NaN NaN NaN NaN NaN
5819063 2015 12 31 4 UA 516 N17245 SEA IAH 2359 ... 600.0 -6.0 0 0 NaN NaN NaN NaN NaN NaN
5819064 2015 12 31 4 UA 668 N66848 LAS IAD 2359 ... 652.0 -29.0 0 0 NaN NaN NaN NaN NaN NaN
5819065 2015 12 31 4 UA 671 N33209 PDX IAH 2359 ... 545.0 -19.0 0 0 NaN NaN NaN NaN NaN NaN
5819066 2015 12 31 4 UA 1849 N497UA LAS EWR 2359 ... 718.0 -24.0 0 0 NaN NaN NaN NaN NaN NaN
5819067 2015 12 31 4 AA 2342 N3DLAA DEN MIA 2359 ... 525.0 -18.0 0 0 NaN NaN NaN NaN NaN NaN
5819068 2015 12 31 4 AA 219 N3LYAA LAX ORD 2359 ... 544.0 -19.0 0 0 NaN NaN NaN NaN NaN NaN
5819069 2015 12 31 4 B6 1248 N948JB LAS JFK 2359 ... 1020.0 159.0 0 0 NaN 0.0 0.0 159.0 0.0 0.0
5819070 2015 12 31 4 B6 80 N584JB RNO JFK 2359 ... 744.0 -21.0 0 0 NaN NaN NaN NaN NaN NaN
5819071 2015 12 31 4 B6 802 N589JB SLC MCO 2359 ... 625.0 17.0 0 0 NaN 1.0 0.0 16.0 0.0 0.0
5819072 2015 12 31 4 B6 98 N607JB DEN JFK 2359 ... 519.0 -11.0 0 0 NaN NaN NaN NaN NaN NaN
5819073 2015 12 31 4 B6 66 N655JB ABQ JFK 2359 ... 549.0 3.0 0 0 NaN NaN NaN NaN NaN NaN
5819074 2015 12 31 4 B6 688 N657JB LAX BOS 2359 ... 753.0 -26.0 0 0 NaN NaN NaN NaN NaN NaN
5819075 2015 12 31 4 B6 745 N828JB JFK PSE 2359 ... 430.0 -16.0 0 0 NaN NaN NaN NaN NaN NaN
5819076 2015 12 31 4 B6 1503 N913JB JFK SJU 2359 ... 432.0 -8.0 0 0 NaN NaN NaN NaN NaN NaN
5819077 2015 12 31 4 B6 333 N527JB MCO SJU 2359 ... 330.0 -10.0 0 0 NaN NaN NaN NaN NaN NaN
5819078 2015 12 31 4 B6 839 N534JB JFK BQN 2359 ... 442.0 2.0 0 0 NaN NaN NaN NaN NaN NaN

5819079 rows × 31 columns


In [8]:
airlines_df


Out[8]:
IATA_CODE AIRLINE
0 UA United Air Lines Inc.
1 AA American Airlines Inc.
2 US US Airways Inc.
3 F9 Frontier Airlines Inc.
4 B6 JetBlue Airways
5 OO Skywest Airlines Inc.
6 AS Alaska Airlines Inc.
7 NK Spirit Air Lines
8 WN Southwest Airlines Co.
9 DL Delta Air Lines Inc.
10 EV Atlantic Southeast Airlines
11 HA Hawaiian Airlines Inc.
12 MQ American Eagle Airlines Inc.
13 VX Virgin America

In [9]:
airports_df


Out[9]:
IATA_CODE AIRPORT CITY STATE COUNTRY LATITUDE LONGITUDE
0 ABE Lehigh Valley International Airport Allentown PA USA 40.65236 -75.44040
1 ABI Abilene Regional Airport Abilene TX USA 32.41132 -99.68190
2 ABQ Albuquerque International Sunport Albuquerque NM USA 35.04022 -106.60919
3 ABR Aberdeen Regional Airport Aberdeen SD USA 45.44906 -98.42183
4 ABY Southwest Georgia Regional Airport Albany GA USA 31.53552 -84.19447
5 ACK Nantucket Memorial Airport Nantucket MA USA 41.25305 -70.06018
6 ACT Waco Regional Airport Waco TX USA 31.61129 -97.23052
7 ACV Arcata Airport Arcata/Eureka CA USA 40.97812 -124.10862
8 ACY Atlantic City International Airport Atlantic City NJ USA 39.45758 -74.57717
9 ADK Adak Airport Adak AK USA 51.87796 -176.64603
10 ADQ Kodiak Airport Kodiak AK USA 57.74997 -152.49386
11 AEX Alexandria International Airport Alexandria LA USA 31.32737 -92.54856
12 AGS Augusta Regional Airport (Bush Field) Augusta GA USA 33.36996 -81.96450
13 AKN King Salmon Airport King Salmon AK USA 58.67680 -156.64922
14 ALB Albany International Airport Albany NY USA 42.74812 -73.80298
15 ALO Waterloo Regional Airport Waterloo IA USA 42.55708 -92.40034
16 AMA Rick Husband Amarillo International Airport Amarillo TX USA 35.21937 -101.70593
17 ANC Ted Stevens Anchorage International Airport Anchorage AK USA 61.17432 -149.99619
18 APN Alpena County Regional Airport Alpena MI USA 45.07807 -83.56029
19 ASE Aspen-Pitkin County Airport Aspen CO USA 39.22316 -106.86885
20 ATL Hartsfield-Jackson Atlanta International Airport Atlanta GA USA 33.64044 -84.42694
21 ATW Appleton International Airport Appleton WI USA 44.25741 -88.51948
22 AUS Austin-Bergstrom International Airport Austin TX USA 30.19453 -97.66987
23 AVL Asheville Regional Airport Asheville NC USA 35.43619 -82.54181
24 AVP Wilkes-Barre/Scranton International Airport Wilkes-Barre/Scranton PA USA 41.33815 -75.72427
25 AZO Kalamazoo/Battle Creek International Airport Kalamazoo MI USA 42.23488 -85.55206
26 BDL Bradley International Airport Windsor Locks CT USA 41.93887 -72.68323
27 BET Bethel Airport Bethel AK USA 60.77978 -161.83800
28 BFL Meadows Field Bakersfield CA USA 35.43360 -119.05677
29 BGM Greater Binghamton Airport Binghamton NY USA 42.20848 -75.97961
... ... ... ... ... ... ... ...
292 SRQ Sarasota-Bradenton International Airport Sarasota FL USA 27.39533 -82.55411
293 STC St. Cloud Regional Airport St Cloud MN USA 45.54532 -94.05834
294 STL St. Louis International Airport at Lambert Field St Louis MO USA 38.74769 -90.35999
295 STT Cyril E. King Airport Charlotte Amalie VI USA 18.33731 -64.97336
296 STX Henry E. Rohlsen Airport Christiansted VI USA 17.70189 -64.79856
297 SUN Friedman Memorial Airport Hailey ID USA 43.50484 -114.29659
298 SUX Sioux Gateway Airport Sioux City IA USA 42.40260 -96.38437
299 SWF Stewart International Airport Newburgh NY USA 41.50409 -74.10484
300 SYR Syracuse Hancock International Airport Syracuse NY USA 43.11119 -76.10631
301 TLH Tallahassee International Airport Tallahassee FL USA 30.39653 -84.35033
302 TOL Toledo Express Airport Toledo OH USA 41.58681 -83.80783
303 TPA Tampa International Airport Tampa FL USA 27.97547 -82.53325
304 TRI Tri-Cities Regional Airport Bristol TN USA 36.47521 -82.40742
305 TTN Trenton Mercer Airport Trenton NJ USA 40.27669 -74.81347
306 TUL Tulsa International Airport Tulsa OK USA 36.19837 -95.88824
307 TUS Tucson International Airport Tucson AZ USA 32.11608 -110.94103
308 TVC Cherry Capital Airport Traverse City MI USA 44.74144 -85.58224
309 TWF Magic Valley Regional Airport (Joslin Field) Twin Falls ID USA 42.48180 -114.48774
310 TXK Texarkana Regional Airport (Webb Field) Texarkana AR USA 33.45371 -93.99102
311 TYR Tyler Pounds Regional Airport Tyler TX USA 32.35414 -95.40239
312 TYS McGhee Tyson Airport Knoxville TN USA 35.81249 -83.99286
313 UST Northeast Florida Regional Airport (St. August... St. Augustine FL USA NaN NaN
314 VEL Valdez Airport Vernal UT USA 40.44090 -109.50992
315 VLD Valdosta Regional Airport Valdosta GA USA 30.78250 -83.27672
316 VPS Destin-Fort Walton Beach Airport/Eglin AFB Valparaiso FL USA 30.48325 -86.52540
317 WRG Wrangell Airport Wrangell AK USA 56.48433 -132.36982
318 WYS Westerly State Airport West Yellowstone MT USA 44.68840 -111.11764
319 XNA Northwest Arkansas Regional Airport Fayetteville/Springdale/Rogers AR USA 36.28187 -94.30681
320 YAK Yakutat Airport Yakutat AK USA 59.50336 -139.66023
321 YUM Yuma International Airport Yuma AZ USA 32.65658 -114.60597

322 rows × 7 columns


In [28]:
# reduce columns
ftimes_df = flights_df[['AIRLINE', 'ORIGIN_AIRPORT', 'DESTINATION_AIRPORT', 'ELAPSED_TIME']]
ftimes_df.columns


Out[28]:
Index(['AIRLINE', 'ORIGIN_AIRPORT', 'DESTINATION_AIRPORT', 'ELAPSED_TIME'], dtype='object')

In [29]:
# reduce rows to only three airlines
ftimes_df = ftimes_df[(ftimes_df['AIRLINE'] == "UA") |
                      (ftimes_df['AIRLINE'] == "F9") |
                      (ftimes_df['AIRLINE'] == "EV")]
ftimes_df


Out[29]:
AIRLINE ORIGIN_AIRPORT DESTINATION_AIRPORT ELAPSED_TIME
17 UA SFO IAH 217.0
25 UA LAX IAH 175.0
37 UA SJU EWR 241.0
49 UA BQN EWR 247.0
63 UA ORD IAH 182.0
72 EV MKE IAH 189.0
76 EV BRO IAH 72.0
77 EV VPS ATL 57.0
81 EV BNA ATL 70.0
83 UA PHL IAH 234.0
84 UA PDX DEN 142.0
85 UA SEA DEN 157.0
87 EV SBN ORD 44.0
88 F9 PHL MCO 146.0
92 UA SFO DEN 153.0
93 UA ORD DEN 163.0
94 UA IAD IAH 215.0
98 EV CRP IAH 52.0
99 EV PIA DTW 75.0
100 EV ORD DTW 67.0
102 UA SMF DEN 164.0
109 EV ATL IAD 103.0
110 EV JAX EWR 132.0
114 F9 MSN DEN 144.0
116 UA GEG DEN 136.0
117 UA SEA IAH 266.0
118 UA DCA IAH 197.0
125 EV CHS EWR 122.0
126 F9 MKE DEN 142.0
127 F9 MSP DEN 128.0
... ... ... ... ...
5818981 UA SFO MSY 246.0
5818982 F9 LAS MIA 259.0
5818984 UA PDX IAH 234.0
5818992 UA IAH SJU 243.0
5818993 UA LAX IAD 251.0
5818996 F9 LAX ATL 295.0
5819001 UA LAX IAH 170.0
5819006 UA LAS ORD 185.0
5819007 UA SFO ATL 284.0
5819010 UA PHX EWR 242.0
5819013 F9 LAS STL 167.0
5819015 UA ANC DEN 306.0
5819018 UA HNL LAX 313.0
5819019 F9 LAS MCO 239.0
5819024 UA SFO IAH 202.0
5819028 UA DEN EWR 186.0
5819034 UA HNL SFO 295.0
5819040 UA LAX EWR 274.0
5819041 UA SFO EWR 301.0
5819043 F9 DEN PBI 200.0
5819044 F9 DEN ATL 147.0
5819048 UA LAX ORD 220.0
5819059 F9 DEN RSW 207.0
5819060 F9 DEN MCO 190.0
5819061 F9 DEN RDU 170.0
5819062 F9 DEN TPA 184.0
5819063 UA SEA IAH 239.0
5819064 UA LAS IAD 236.0
5819065 UA PDX IAH 233.0
5819066 UA LAS EWR 261.0

1178536 rows × 4 columns


In [30]:
# reduce rows to only three destination
ftimes_df = ftimes_df[(ftimes_df['DESTINATION_AIRPORT'] == "ORD") |
                      (ftimes_df['DESTINATION_AIRPORT'] == "IAH") |
                      (ftimes_df['DESTINATION_AIRPORT'] == "ATL")]
ftimes_df


Out[30]:
AIRLINE ORIGIN_AIRPORT DESTINATION_AIRPORT ELAPSED_TIME
17 UA SFO IAH 217.0
25 UA LAX IAH 175.0
63 UA ORD IAH 182.0
72 EV MKE IAH 189.0
76 EV BRO IAH 72.0
77 EV VPS ATL 57.0
81 EV BNA ATL 70.0
83 UA PHL IAH 234.0
87 EV SBN ORD 44.0
94 UA IAD IAH 215.0
98 EV CRP IAH 52.0
117 UA SEA IAH 266.0
118 UA DCA IAH 197.0
137 UA CLE IAH 198.0
145 EV MCI IAH 138.0
148 EV RIC IAH 220.0
154 EV CID ATL 114.0
163 EV MCI ORD 83.0
164 EV LAN ORD 75.0
169 UA LAX IAH 178.0
173 UA PBI IAH 172.0
174 UA PHX IAH 160.0
175 UA RSW IAH 155.0
177 UA OMA ORD 79.0
182 UA PIT IAH 197.0
183 UA DEN IAH 146.0
191 UA SFO ORD 256.0
193 UA PDX IAH 242.0
194 UA BOS ORD 165.0
199 UA LAX ORD 227.0
... ... ... ... ...
5817723 UA ORD IAH 166.0
5817748 UA IAH ORD 132.0
5817755 EV AEX IAH 58.0
5817757 UA IAH ATL 107.0
5817838 EV HRL IAH 61.0
5817843 EV BRO IAH 69.0
5817865 F9 MIA ATL 134.0
5817893 UA EWR IAH 229.0
5818013 F9 ORD ATL 104.0
5818091 UA HNL IAH 430.0
5818133 UA EWR ATL 137.0
5818156 UA EWR ORD 148.0
5818383 UA ANC ORD 345.0
5818454 F9 DEN IAH 143.0
5818597 F9 PHL ATL 132.0
5818899 UA SAN ORD 205.0
5818947 F9 LAS ORD 198.0
5818964 UA SEA ORD 236.0
5818973 UA SFO ORD 241.0
5818979 UA LAX ORD 225.0
5818984 UA PDX IAH 234.0
5818996 F9 LAX ATL 295.0
5819001 UA LAX IAH 170.0
5819006 UA LAS ORD 185.0
5819007 UA SFO ATL 284.0
5819024 UA SFO IAH 202.0
5819044 F9 DEN ATL 147.0
5819048 UA LAX ORD 220.0
5819063 UA SEA IAH 239.0
5819065 UA PDX IAH 233.0

287053 rows × 4 columns


In [31]:
# reduce rows to only one origin airports
ftimes_df = ftimes_df[(ftimes_df['ORIGIN_AIRPORT'] == "IAD")]
ftimes_df


Out[31]:
AIRLINE ORIGIN_AIRPORT DESTINATION_AIRPORT ELAPSED_TIME
94 UA IAD IAH 215.0
1397 UA IAD ORD 139.0
1756 UA IAD IAH 218.0
1830 EV IAD ATL 115.0
3121 UA IAD ORD 123.0
5897 F9 IAD ATL 110.0
5993 UA IAD ORD 121.0
6561 EV IAD ATL 112.0
9824 UA IAD ORD 119.0
10169 UA IAD IAH 238.0
13523 EV IAD ATL 111.0
13546 EV IAD IAH 222.0
13696 F9 IAD ORD 132.0
14122 UA IAD IAH 212.0
16456 F9 IAD ORD 125.0
16615 UA IAD ORD 127.0
16854 UA IAD IAH 220.0
16962 EV IAD ATL 122.0
18534 UA IAD ORD 124.0
19409 F9 IAD ATL 118.0
21678 UA IAD ORD 109.0
24966 EV IAD ATL 124.0
25789 UA IAD ORD 128.0
26165 UA IAD IAH 227.0
30145 EV IAD ATL 127.0
30197 EV IAD IAH NaN
30852 UA IAD IAH 258.0
33005 F9 IAD ORD 124.0
33025 UA IAD ORD 124.0
33372 UA IAD IAH 241.0
... ... ... ... ...
5764611 UA IAD IAH 210.0
5767801 F9 IAD ATL 146.0
5768865 UA IAD IAH 194.0
5768925 UA IAD ORD 187.0
5773888 UA IAD IAH 200.0
5776164 UA IAD IAH 194.0
5776958 UA IAD ORD 129.0
5777924 EV IAD ATL 106.0
5778031 UA IAD ORD NaN
5780820 UA IAD IAH 204.0
5784005 F9 IAD ATL 109.0
5785069 UA IAD IAH 196.0
5785125 UA IAD ORD 107.0
5790092 UA IAD IAH 205.0
5792386 UA IAD IAH 205.0
5793066 UA IAD ORD NaN
5794142 EV IAD ATL 156.0
5794253 UA IAD ORD NaN
5797056 UA IAD IAH 212.0
5798947 EV IAD ATL 103.0
5800238 F9 IAD ATL 146.0
5801305 UA IAD IAH 208.0
5801363 UA IAD ORD 118.0
5806274 UA IAD IAH 218.0
5808222 UA IAD IAH 213.0
5809966 UA IAD ORD 114.0
5812504 UA IAD IAH 216.0
5815477 F9 IAD ATL 111.0
5816331 UA IAD IAH 213.0
5816369 UA IAD ORD 126.0

3668 rows × 4 columns


In [ ]:
# ftimes_df = flights_df.groupby(['AIRLINE'], as_index=True)['ELAPSED_TIME'].mean().reset_index()

In [ ]:
# ftimes_df

In [32]:
ftimes_df = ftimes_df.groupby(['AIRLINE', 'DESTINATION_AIRPORT'], as_index=True)['ELAPSED_TIME'].mean().reset_index()

In [33]:
ftimes_df


Out[33]:
AIRLINE DESTINATION_AIRPORT ELAPSED_TIME
0 EV ATL 117.310734
1 EV IAH 217.524590
2 EV ORD 128.888889
3 F9 ATL 115.952381
4 F9 ORD 120.947619
5 UA ATL 109.315789
6 UA IAH 192.006923
7 UA ORD 119.410381

In [70]:
means_ev = ftimes_df.ELAPSED_TIME[(ftimes_df.AIRLINE == "EV")].tolist()
means_f9 = ftimes_df.ELAPSED_TIME[(ftimes_df.AIRLINE == "F9")].tolist()
means_ua = ftimes_df.ELAPSED_TIME[(ftimes_df.AIRLINE == "UA")].tolist()

means_f9.insert(1, 0.0)
print(means_f9)


[115.95238095238095, 0.0, 120.94761904761904]

In [57]:
# import numpy as np
# import matplotlib.pyplot as plt

# %matplotlib inline

# ft.plot.bar()

In [73]:
import numpy as np
import matplotlib.pyplot as plt

n_groups = 3

# create plot
fig, ax = plt.subplots()
index = np.arange(n_groups)
bar_width = 0.25
opacity = 0.8
 
rects1 = plt.bar(index, means_ev, bar_width,
                 alpha=opacity,
                 color='b',
                 label='AtlanticSE')
 
rects2 = plt.bar(index + bar_width, means_ua, bar_width,
                 alpha=opacity,
                 color='r',
                 label='United')

rects3 = plt.bar(index + 2*bar_width, means_f9, bar_width,
                 alpha=opacity,
                 color='g',
                 label='Frontier')

plt.xlabel('Airport')
plt.ylabel('Minutes')
plt.title('Comparison of Flight Times from Dulles Airport')
plt.xticks(index + bar_width, ('ATL', 'IAH', 'ORD'))
plt.legend()
 
plt.tight_layout()
plt.show()



In [ ]:
# from bokeh.plotting import figure, output_file, show
# import bokeh.plotting as bk
# bk.output_notebook()

# x=ftimes_df['AIRLINE']
# y=ftimes_df['ELAPSED_TIME']

# p = figure()
# p. circle(x,y,fill_color="red",size=2)
# #p.line(x,y,line_width=2)

# #output_file("scatter.html")

# bk.show(p)

In [ ]: