In [3]:
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
In [19]:
df = pd.read_csv('ttc-subway-delay-data-2014-01-01_2017-04-30.csv',
parse_dates=[['Date', 'Time']])
# The default short should be by `Date_Time`.
df.sort_values(by='Date_Time', ascending=False)
df.head()
Out[19]:
In [20]:
df.ix[df['Min Delay'].idxmax()]
Out[20]:
In [17]:
delays_by_day = df.pivot(columns='Day', values='Min Delay')
delays_by_day.boxplot()
Out[17]:
In [ ]: