Introduction


In [2]:
%matplotlib inline 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [5]:
df = pd.read_excel('2011_Current_Table1.xlsx')

In [6]:
df.shape


Out[6]:
(15003, 16)

In [7]:
df.head()


Out[7]:
tbl Year quarter citymarketid_1 citymarketid_2 city1 city2 nsmiles passengers fare carrier_lg large_ms fare_lg carrier_low lf_ms fare_low
0 Table 1 2011 1 30135 31454 Allentown/Bethlehem/Easton, PA Orlando, FL 906 291.777778 119.161005 FL 0.687738 112.950000 G4 0.119573 84.792994
1 Table 1 2011 1 30140 30194 Albuquerque, NM Dallas/Fort Worth, TX 580 532.333333 189.705197 WN 0.612607 189.172266 WN 0.612607 189.172266
2 Table 1 2011 1 30140 30325 Albuquerque, NM Denver, CO 349 521.777778 114.180473 WN 0.449744 111.501610 F9 0.319208 104.576584
3 Table 1 2011 1 30140 30423 Albuquerque, NM Austin, TX 619 151.777778 201.043704 WN 0.718887 202.160998 AA 0.180088 201.397967
4 Table 1 2011 1 30140 30436 Albuquerque, NM Tucson, AZ 321 155.666667 115.909707 WN 0.927909 114.270308 WN 0.927909 114.270308

In [8]:
airports = pd.concat([df['citymarketid_1'], df['citymarketid_2']])

In [9]:
len(np.unique(airports))


Out[9]:
141

In [18]:
len(np.unique(df.citymarketid_1))


Out[18]:
123

In [10]:
from_nyc = df[df['city1'].str.contains('NY')]

In [11]:
from_nyc_to_or = from_nyc[from_nyc['city2'].str.contains('OR')]

In [16]:
from_nyc_to_or.plot(kind='bar', x='Year', y='fare', color='k');



In [17]:
df.to_csv('domenstic-flights.csv')

In [ ]: