In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.api as sm
plt.rcParams['figure.figsize'] = (12, 8)
In [2]:
df = pd.read_csv("flights.csv", index_col=0)
In [3]:
df.head()
Out[3]:
In [16]:
df['date'] = pd.to_datetime(df.year.astype(str)
+ '-' + df.month.astype(str)
+ '-' + df.day.astype(str))
df.head()
Out[16]:
In [6]:
avg_delay = df.dep_delay.mean()
print("Average departure delay: {:.2f} minutes".format(avg_delay))
Group by carrier and get the average dep_delay
.
In [30]:
df.groupby('carrier')['dep_delay'].mean().order(ascending=False)
Out[30]:
In [22]:
fig, ax = plt.subplots(figsize=(16, 6))
sns.kdeplot(df.dep_time / 60, shade=True, ax=ax)
Out[22]:
In [39]:
df.plot(kind='scatter', x='dep_time', y='dep_delay')
Out[39]:
In [20]:
df.describe()
Out[20]:
In [9]:
df.groupby('carrier')['dep_delay'].mean().order(ascending=False)
Out[9]:
In [15]:
count_ts = df.groupby('date')['flight'].count()
count_ts.plot()
Out[15]:
In [19]:
pd.rolling_mean(count_ts, 7).plot()
Out[19]:
In [25]:
datacols = ['dep_time', 'dep_delay', 'arr_time', 'arr_delay', 'air_time', 'distance']
In [28]:
gr = sns.PairGrid(df, hue='carrier', vars=datacols, size=6)
gr.map_upper(plt.scatter)
gr.map_diag(sns.kdeplot)
gr.map_lower(plt.scatter)
In [8]:
df.head()
Out[8]:
In [13]:
delay.head()
Out[13]:
In [27]:
planes = df.groupby('tailnum')
delay = planes.agg({'flight': 'count', 'distance': 'mean',
'dep_delay': 'mean'})
delay = delay.query('flight > 20 & distance < 2000')
delay.head()
Out[27]:
In [45]:
fig, ax = plt.subplots(figsize=(10, 10))
ax = delay.plot(kind='scatter', x='distance', y='dep_delay',
color='k', alpha=.5, s=(5 + df.flight.values / 100),
ax=ax)
xy = lowess(delay.dep_delay, delay.distance, frac=1/3)
ax.plot(xy[:, 0], xy[:, 1], linewidth=4)
Out[45]:
In [46]:
from IPython.html.widgets import interact
In [49]:
def f(frac=1/3):
fig, ax = plt.subplots(figsize=(10, 10))
ax = delay.plot(kind='scatter', x='distance', y='dep_delay',
color='k', alpha=.5, s=(5 + df.flight.values / 100),
ax=ax)
xy = lowess(delay.dep_delay, delay.distance, frac=frac)
ax.plot(xy[:, 0], xy[:, 1], linewidth=4)
return ax
In [50]:
interact(f)
In [63]:
sns.set_context("talk", rc={"figure.figsize": (12, 9)})
networks = sns.load_dataset("brain_networks", header=[0, 1, 2], index_col=0)
corrmap = networks.iloc[:, :30].corr()
In [71]:
cmap = sns.choose_diverging_palette(as_cmap=True)
In [72]:
sns.heatmap(corrmap, square=True, linewidths=1, cmap=cmap);
In [ ]: