Introduction


In [ ]:
%matplotlib inline 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [8]:
df = pd.read_excel('2011_Current_Table1.xlsx')

In [9]:
df.shape


Out[9]:
(15003, 16)

In [10]:
df.head()


Out[10]:
tbl Year quarter citymarketid_1 citymarketid_2 city1 city2 nsmiles passengers fare carrier_lg large_ms fare_lg carrier_low lf_ms fare_low
0 Table 1 2011 1 30135 31454 Allentown/Bethlehem/Easton, PA Orlando, FL 906 291.777778 119.161005 FL 0.687738 112.950000 G4 0.119573 84.792994
1 Table 1 2011 1 30140 30194 Albuquerque, NM Dallas/Fort Worth, TX 580 532.333333 189.705197 WN 0.612607 189.172266 WN 0.612607 189.172266
2 Table 1 2011 1 30140 30325 Albuquerque, NM Denver, CO 349 521.777778 114.180473 WN 0.449744 111.501610 F9 0.319208 104.576584
3 Table 1 2011 1 30140 30423 Albuquerque, NM Austin, TX 619 151.777778 201.043704 WN 0.718887 202.160998 AA 0.180088 201.397967
4 Table 1 2011 1 30140 30436 Albuquerque, NM Tucson, AZ 321 155.666667 115.909707 WN 0.927909 114.270308 WN 0.927909 114.270308

In [22]:
airports = pd.concat([df['citymarketid_1'], df['citymarketid_2']])

In [23]:
len(np.unique(airports))


Out[23]:
141

In [47]:
from_nyc = df[df['city1'].str.contains('NY')]

In [55]:
from_nyc_to_or = from_nyc[from_nyc['city2'].str.contains('OR')]

In [1]:
plt.figure();
df.fare.plot(kind='bar'); plt.axhline(0, color='k')


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-1-a7d60ef7e6cb> in <module>()
----> 1 plt.figure();
      2 df.fare.plot(kind='bar'); plt.axhline(0, color='k')

NameError: name 'plt' is not defined

In [ ]: