In [13]:
import pandas as pd
%matplotlib inline
In [14]:
import matplotlib.pyplot as plt # package for doing plotting (necessary for adding the line)
import statsmodels.formula.api as smf
In [15]:
df = pd.read_csv("data/ontime_reports_may_2015_ny.csv")
In [16]:
df.head()
Out[16]:
In [18]:
df.corr()['ARR_DELAY'].sort_values(ascending=False)
Out[18]:
In [19]:
from pandas.tools.plotting import scatter_matrix
In [ ]:
scatter_matrix(df[[ u'DEP_DELAY', u'ARR_DELAY', u'ACTUAL_ELAPSED_TIME',
u'CARRIER_DELAY', u'WEATHER_DELAY', u'NAS_DELAY', u'SECURITY_DELAY',
u'LATE_AIRCRAFT_DELAY']],alpha=0.2, figsize=(20, 20), diagonal='kde')
In [ ]:
lm = smf.ols(formula="DEP_DELAY ~ LATE_AIRCRAFT_DELAY + CARRIER_DELAY + NAS_DELAY",data=df).fit()
In [ ]:
lm.params
In [ ]: