In [3]:
%matplotlib inline

import pandas as pd
import matplotlib.pyplot as plt

In [19]:
df = pd.read_csv('ttc-subway-delay-data-2014-01-01_2017-04-30.csv',
                 parse_dates=[['Date', 'Time']])

# The default short should be by `Date_Time`.
df.sort_values(by='Date_Time', ascending=False)

df.head()


Out[19]:
Date_Time Day Station Code Min Delay Min Gap Bound Line Vehicle
0 2014-01-01 02:06:00 Wednesday HIGH PARK STATION SUDP 3 7 W BD 5001
1 2014-01-01 02:40:00 Wednesday SHEPPARD STATION MUNCA 0 0 NaN YU 0
2 2014-01-01 03:10:00 Wednesday LANSDOWNE STATION SUDP 3 8 W BD 5116
3 2014-01-01 03:20:00 Wednesday BLOOR STATION MUSAN 5 10 S YU 5386
4 2014-01-01 03:29:00 Wednesday DUFFERIN STATION MUPAA 0 0 E BD 5174

In [20]:
df.ix[df['Min Delay'].idxmax()]


Out[20]:
Date_Time       2015-02-17 06:56:00
Day                         Tuesday
Station      SCARBOROUGH RAPID TRAN
Code                           ERHV
Min Delay                       999
Min Gap                         999
Bound                             N
Line                            SRT
Vehicle                        3007
Name: 23171, dtype: object

In [17]:
delays_by_day = df.pivot(columns='Day', values='Min Delay')
delays_by_day.boxplot()


Out[17]:
<matplotlib.axes._subplots.AxesSubplot at 0x10f3b49b0>

In [ ]: