This jupyter notebook is licensed under Creative Commons 2.0 By Attribution
License information is here: https://creativecommons.org/licenses/by/2.0/
Author: Lauren Oldja, laurenoldja.net
In [1]:
import pandas as pd
import numpy as np
import os
%matplotlib inline
import matplotlib.pyplot as plt
In [2]:
# Data source: http://web.mta.info/developers/turnstile.html
#Download all desired weeks from website, store them in a folder called ../data/mta_turnstiles
# This will load combined CSV files that are in this folder into an appended dataframe
datafiles = ['data/mta_turnstiles/' + x for x in os.listdir('data/mta_turnstiles/')]
list_ = []
for file_ in datafiles:
df = pd.read_csv(file_)
list_.append(df)
df = pd.concat(list_)
In [3]:
df.columns #notice the whitespace on EXITS
Out[3]:
Index([u'C/A', u'UNIT', u'SCP', u'STATION', u'LINENAME', u'DIVISION', u'DATE',
u'TIME', u'DESC', u'ENTRIES',
u'EXITS '],
dtype='object')
In [4]:
df.columns = df.columns.str.strip() #strip whitespace
df.head() #preview first five rows
Out[4]:
C/A
UNIT
SCP
STATION
LINENAME
DIVISION
DATE
TIME
DESC
ENTRIES
EXITS
0
A002
R051
02-00-00
59 ST
NQR456
BMT
05/21/2016
00:00:00
REGULAR
5672118
1920707
1
A002
R051
02-00-00
59 ST
NQR456
BMT
05/21/2016
04:00:00
REGULAR
5672183
1920719
2
A002
R051
02-00-00
59 ST
NQR456
BMT
05/21/2016
08:00:00
REGULAR
5672214
1920761
3
A002
R051
02-00-00
59 ST
NQR456
BMT
05/21/2016
12:00:00
REGULAR
5672330
1920867
4
A002
R051
02-00-00
59 ST
NQR456
BMT
05/21/2016
16:00:00
REGULAR
5672640
1920936
In [5]:
df.describe() #basic descriptive statistics
Out[5]:
ENTRIES
EXITS
count
7.772530e+05
7.772530e+05
mean
3.619627e+07
2.946001e+07
std
1.975870e+08
1.791294e+08
min
0.000000e+00
0.000000e+00
25%
5.873780e+05
2.979540e+05
50%
2.575607e+06
1.501834e+06
75%
6.546763e+06
4.657076e+06
max
2.147483e+09
2.087387e+09
In [6]:
df.duplicated().value_counts()
Out[6]:
False 777253
dtype: int64
In [7]:
df = df.drop_duplicates()
In [8]:
df['TIMESTAMP'] = pd.to_datetime((df.DATE + ' ' + df.TIME), format='%m/%d/%Y %H:%M:%S')
Create a column with the day of the week this timestamp shows:
In [9]:
import datetime as dt
weekdays = ['MON','TUE','WED','THU','FRI','SAT','SUN']
df['DATE'][1]
df['DOF'] = [weekdays[dt.datetime.strptime(dstring,'%m/%d/%Y').weekday()] for dstring in df.DATE.tolist()]
# DOF = "day of week"
In [10]:
df.head()
Out[10]:
C/A
UNIT
SCP
STATION
LINENAME
DIVISION
DATE
TIME
DESC
ENTRIES
EXITS
TIMESTAMP
DOF
0
A002
R051
02-00-00
59 ST
NQR456
BMT
05/21/2016
00:00:00
REGULAR
5672118
1920707
2016-05-21 00:00:00
SAT
1
A002
R051
02-00-00
59 ST
NQR456
BMT
05/21/2016
04:00:00
REGULAR
5672183
1920719
2016-05-21 04:00:00
SAT
2
A002
R051
02-00-00
59 ST
NQR456
BMT
05/21/2016
08:00:00
REGULAR
5672214
1920761
2016-05-21 08:00:00
SAT
3
A002
R051
02-00-00
59 ST
NQR456
BMT
05/21/2016
12:00:00
REGULAR
5672330
1920867
2016-05-21 12:00:00
SAT
4
A002
R051
02-00-00
59 ST
NQR456
BMT
05/21/2016
16:00:00
REGULAR
5672640
1920936
2016-05-21 16:00:00
SAT
In [11]:
df = df.reset_index()
In [12]:
l = [''.join(sorted(a)) for a in df['LINENAME']] #sort each linename, since subway lines aren't listed in a consistent order
In [13]:
df['STATID']=df['STATION']+pd.Series(l)
In [14]:
df['ENTRY_DIFF']=df.groupby(['STATID','UNIT','SCP'],as_index=False)['ENTRIES'].transform(pd.Series.diff)['ENTRIES']
Let's see some summary statistics on ENTRY_DIFF by station to identify outliers.
In [15]:
max_table = df.sort_values(['ENTRY_DIFF']).groupby(['STATID'])['STATID','ENTRY_DIFF'].max()
# print max_table
Yikes some are really big. Let's see the really big ones only.
In [33]:
max_table[max_table['ENTRY_DIFF'] > 100000 ]
Out[33]:
STATID
ENTRY_DIFF
STATID
33 ST6
33 ST6
1.162241e+08
34 ST-PENN STAACE
34 ST-PENN STAACE
2.402797e+06
51 ST6
51 ST6
1.403818e+09
AVENUE HBQ
AVENUE HBQ
1.174165e+08
CANAL ST6JNQRZ
CANAL ST6JNQRZ
3.175049e+08
CANARSIE-ROCKAWL
CANARSIE-ROCKAWL
4.951429e+07
FULTON ST2345ACJZ
FULTON ST2345ACJZ
1.174397e+08
GRD CNTRL-42 ST4567S
GRD CNTRL-42 ST4567S
1.879008e+09
JAY ST-METROTECACF
JAY ST-METROTECACF
2.076115e+09
MORISN AV/SNDVW6
MORISN AV/SNDVW6
3.931740e+05
PAVONIA/NEWPORT1
PAVONIA/NEWPORT1
2.557829e+06
ROCKAWAY AVC
ROCKAWAY AVC
2.615430e+05
ST. GEORGE1
ST. GEORGE1
8.351914e+07
THIRTY ST1
THIRTY ST1
1.629370e+06
WALL ST45
WALL ST45
5.199925e+08
In [17]:
min_table = df.sort_values(['ENTRY_DIFF']).groupby(['STATID'])['STATID','ENTRY_DIFF'].min()
# print min_table
In [18]:
min_table[min_table['ENTRY_DIFF'] < 0 ] #show negative values
Out[18]:
STATID
ENTRY_DIFF
STATID
1 AVL
1 AVL
-1.510852e+08
104 STA
104 STA
-1.250000e+02
104 STJZ
104 STJZ
-1.970000e+02
125 ST23
125 ST23
-9.950000e+02
125 ST456
125 ST456
-2.682000e+03
14 ST123FLM
14 ST123FLM
-1.143000e+03
14TH STREET1
14TH STREET1
-2.728372e+06
161/YANKEE STAD4BD
161/YANKEE STAD4BD
-8.280000e+02
170 STBD
170 STBD
-5.578590e+05
174 ST25
174 ST25
-6.390000e+02
18 ST1
18 ST1
-8.014810e+05
21 STG
21 STG
-2.234351e+06
23 ST6
23 ST6
-1.079000e+03
23 STFM
23 STFM
-1.565000e+03
231 ST1
231 ST1
-4.676500e+04
3 AV-149 ST25
3 AV-149 ST25
-1.461000e+03
34 ST-HERALD SQBDFMNQR
34 ST-HERALD SQBDFMNQR
-2.818800e+05
34 ST-PENN STA123
34 ST-PENN STA123
-2.288000e+03
34 ST-PENN STAACE
34 ST-PENN STAACE
-2.754015e+06
36 STMR
36 STMR
-3.020092e+08
42 ST-PORT AUTH1237ACENQRS
42 ST-PORT AUTH1237ACENQRS
-1.666000e+03
45 STR
45 STR
-5.870000e+02
47-50 STS ROCKBDFM
47-50 STS ROCKBDFM
-1.000000e+00
49 STNQR
49 STNQR
-5.037065e+07
50 ST1
50 ST1
-4.000000e+00
50 STCE
50 STCE
-7.887900e+04
51 ST6
51 ST6
-1.409337e+09
57 ST-7 AVNQR
57 ST-7 AVNQR
-1.462000e+03
59 STNR
59 STNR
-1.000000e+00
6 AV123FLM
6 AV123FLM
-1.115000e+03
...
...
...
JAMAICA CENTEREJZ
JAMAICA CENTEREJZ
-5.940000e+02
JAY ST-METROTECACF
JAY ST-METROTECACF
-7.500000e+02
JFK JAMAICA CT1E
JFK JAMAICA CT1E
-1.354400e+05
JOURNAL SQUARE1
JOURNAL SQUARE1
-1.748390e+05
LACKAWANNA1
LACKAWANNA1
-1.873000e+03
LEXINGTON AV/536EM
LEXINGTON AV/536EM
-1.555000e+03
LEXINGTON AV/63F
LEXINGTON AV/63F
-1.449000e+03
MARCY AVJMZ
MARCY AVJMZ
-2.082158e+06
METROPOLITAN AVM
METROPOLITAN AVM
-3.580000e+02
METS-WILLETS PT7
METS-WILLETS PT7
-7.258610e+05
MORISN AV/SNDVW6
MORISN AV/SNDVW6
-3.931720e+05
MT EDEN AV4
MT EDEN AV4
-3.152690e+06
NEWARK BM BW1
NEWARK BM BW1
-5.300890e+05
NEWARK C1
NEWARK C1
-8.197000e+03
NEWARK HM HE1
NEWARK HM HE1
-1.643100e+04
NOSTRAND AVAC
NOSTRAND AVAC
-1.400000e+01
PATH WTC 21
PATH WTC 21
-3.261950e+05
PATH WTC1
PATH WTC1
-5.277590e+05
PAVONIA/NEWPORT1
PAVONIA/NEWPORT1
-2.557080e+06
SHEEPSHEAD BAYBQ
SHEEPSHEAD BAYBQ
-1.933256e+06
SMITH-9 STFG
SMITH-9 STFG
-2.886522e+06
SUTPHIN-ARCHEREJZ
SUTPHIN-ARCHEREJZ
-2.000000e+00
THIRTY ST1
THIRTY ST1
-1.629062e+06
TIMES SQ-42 ST1237ACENQRS
TIMES SQ-42 ST1237ACENQRS
-4.313168e+06
TOMPKINSVILLE1
TOMPKINSVILLE1
-7.000000e+00
TWENTY THIRD ST1
TWENTY THIRD ST1
-3.168000e+03
UTICA AVAC
UTICA AVAC
-5.156290e+05
V.CORTLANDT PK1
V.CORTLANDT PK1
-6.450530e+06
WALL ST45
WALL ST45
-5.199925e+08
WILSON AVL
WILSON AVL
-3.000000e+00
87 rows × 2 columns
We've got some negative numbers here. The counters may be running backwards. We should drill down further to investigate, and explicitly throw out any outliers that are clearly indicative of mechanical malfunction (i.e. vary large or very negative numbers in low traffic areas). Let's go back and take the absolute values before we take row differences.
Strange numbers may also indicate that a counter has been reset. It's worth drilling down for a closer look at the data in this case.
In [19]:
df['ENTRY_DIFF_ABS']=df['ENTRY_DIFF'].abs()
min_table = df.sort_values(['ENTRY_DIFF_ABS']).groupby(['STATID'])['STATID','ENTRY_DIFF_ABS'].min()
In [20]:
min_table
Out[20]:
STATID
ENTRY_DIFF_ABS
STATID
1 AVL
1 AVL
0.0
103 ST-CORONA7
103 ST-CORONA7
0.0
103 ST1
103 ST1
3.0
103 ST6
103 ST6
0.0
103 STBC
103 STBC
0.0
104 STA
104 STA
0.0
104 STJZ
104 STJZ
0.0
110 ST6
110 ST6
0.0
111 ST7
111 ST7
6.0
111 STA
111 STA
0.0
111 STJ
111 STJ
0.0
116 ST-COLUMBIA1
116 ST-COLUMBIA1
0.0
116 ST23
116 ST23
0.0
116 ST6
116 ST6
0.0
116 STBC
116 STBC
0.0
121 STJZ
121 STJZ
0.0
125 ST1
125 ST1
8.0
125 ST23
125 ST23
0.0
125 ST456
125 ST456
0.0
125 STABCD
125 STABCD
0.0
135 ST23
135 ST23
0.0
135 STBC
135 STBC
0.0
137 ST CITY COL1
137 ST CITY COL1
0.0
138/GRAND CONC45
138/GRAND CONC45
7.0
14 ST-UNION SQ456LNQR
14 ST-UNION SQ456LNQR
0.0
14 ST123FLM
14 ST123FLM
0.0
14 STACEL
14 STACEL
0.0
145 ST1
145 ST1
0.0
145 ST3
145 ST3
0.0
145 STABCD
145 STABCD
13.0
...
...
...
THIRTY ST1
THIRTY ST1
0.0
THIRTY THIRD ST1
THIRTY THIRD ST1
0.0
TIMES SQ-42 ST1237ACENQRS
TIMES SQ-42 ST1237ACENQRS
0.0
TOMPKINSVILLE1
TOMPKINSVILLE1
0.0
TREMONT AVBD
TREMONT AVBD
0.0
TWENTY THIRD ST1
TWENTY THIRD ST1
0.0
UNION STR
UNION STR
0.0
UTICA AVAC
UTICA AVAC
0.0
V.CORTLANDT PK1
V.CORTLANDT PK1
0.0
VAN SICLEN AV3
VAN SICLEN AV3
0.0
VAN SICLEN AVEC
VAN SICLEN AVEC
6.0
VAN SICLEN AVJZ
VAN SICLEN AVJZ
10.0
VERNON-JACKSON7
VERNON-JACKSON7
0.0
W 4 ST-WASH SQABCDEFM
W 4 ST-WASH SQABCDEFM
0.0
W 8 ST-AQUARIUMFQ
W 8 ST-AQUARIUMFQ
0.0
WAKEFIELD/2412
WAKEFIELD/2412
11.0
WALL ST23
WALL ST23
0.0
WALL ST45
WALL ST45
0.0
WEST FARMS SQ25
WEST FARMS SQ25
0.0
WESTCHESTER SQ6
WESTCHESTER SQ6
11.0
WHITEHALL S-FRY1R
WHITEHALL S-FRY1R
0.0
WHITLOCK AV6
WHITLOCK AV6
0.0
WILSON AVL
WILSON AVL
0.0
WINTHROP ST25
WINTHROP ST25
0.0
WOODHAVEN BLVDJZ
WOODHAVEN BLVDJZ
0.0
WOODHAVEN BLVDMR
WOODHAVEN BLVDMR
0.0
WOODLAWN4
WOODLAWN4
0.0
WORLD TRADE CTR23ACE
WORLD TRADE CTR23ACE
0.0
YORK STF
YORK STF
2.0
ZEREGA AV6
ZEREGA AV6
0.0
462 rows × 2 columns
So many zeros! It's possible that this is true for some stations at some time periods. But in some cases the turnstile is probably just broken. Let's keep going. We can throw out the zeros after we bin the hourly data (if we do this before, it will be harder to tell if our bins are uniformly sized)
In [21]:
bins = [-1,3,7,11,15,19,24] #use a negative number at the beginning to ensure we do not lose midnight
In [22]:
df['HOD'] = [r.hour for r in df.TIMESTAMP] #hod = "hour of day"
df['HODBIN'] = pd.cut(df['HOD'], bins)
In [23]:
df.HODBIN.value_counts()
Out[23]:
(7, 11] 133285
(3, 7] 131890
(11, 15] 128746
(15, 19] 128215
(-1, 3] 127613
(19, 24] 127504
dtype: int64
In [24]:
df.groupby(['STATID','HODBIN']).sum()
Out[24]:
index
ENTRIES
EXITS
ENTRY_DIFF
ENTRY_DIFF_ABS
HOD
STATID
HODBIN
1 AVL
(-1, 3]
8381570
41308618010
65131878470
114632.0
114632.0
0
(3, 7]
8681394
42195326126
66003223660
31693.0
31693.0
1186
(7, 11]
8828993
42638507564
66438811853
41325.0
41325.0
2352
(11, 15]
8560810
41903090807
67580385111
123253.0
123253.0
3420
(15, 19]
8412575
41308949092
65132180874
-150950407.0
151219967.0
4480
(19, 24]
8084542
39747479552
61656053968
172474.0
172474.0
5380
103 ST-CORONA7
(-1, 3]
45125983
1785916471
1702408993
31328.0
31328.0
0
(3, 7]
45126235
1785923974
1702439760
7503.0
7503.0
1008
(7, 11]
44947308
1777320587
1695739995
162889.0
162889.0
2008
(11, 15]
45126738
1786226043
1702497260
139180.0
139180.0
3024
(15, 19]
45103617
1786481405
1702692239
87498.0
87498.0
4059
(19, 24]
45127242
1786388335
1702703066
74794.0
74794.0
5040
103 ST1
(-1, 3]
21042954
1702964768
871748416
60416.0
60416.0
156
(3, 7]
21848607
1768437200
905267087
2774.0
2774.0
816
(7, 11]
21043272
1703032641
871765592
65099.0
65099.0
1404
(11, 15]
20228109
1637728544
838313630
76985.0
76985.0
1950
(15, 19]
20228259
1637796750
838346681
68206.0
68206.0
2550
(19, 24]
20228409
1637863343
838391504
66593.0
66593.0
3150
103 ST6
(-1, 3]
25966765
1753981845
15082734333
21135.0
21135.0
167
(3, 7]
25966932
1753985288
15082741375
3443.0
3443.0
835
(7, 11]
26907512
1816980896
15621516027
78847.0
78847.0
1551
(11, 15]
26124009
1766177521
15088077827
74333.0
74333.0
2184
(15, 19]
26124177
1766272954
15088149587
95433.0
95433.0
2856
(19, 24]
25967608
1754306974
15082994684
73073.0
73073.0
3507
103 STBC
(-1, 3]
3815976
709745836
633326492
6531.0
6531.0
84
(3, 7]
3816060
709746768
633328582
932.0
932.0
420
(7, 11]
3952671
735149357
655972599
30443.0
30443.0
780
(11, 15]
3816231
709806180
633348463
28969.0
28969.0
1092
(15, 19]
3816315
709832259
633369157
26079.0
26079.0
1428
(19, 24]
3816399
709856622
633398335
24363.0
24363.0
1764
...
...
...
...
...
...
...
...
WOODHAVEN BLVDMR
(-1, 3]
25143221
1615829355
1063534905
33493.0
33493.0
308
(3, 7]
25471015
1627012660
1067224404
6513.0
6513.0
1568
(7, 11]
29245406
1916842428
1276081883
163915.0
163915.0
3207
(11, 15]
25637665
1662522204
1097793371
105725.0
105725.0
4075
(15, 19]
25715811
1662971325
1098147214
135240.0
135240.0
5348
(19, 24]
25144828
1616358010
1063841629
117262.0
117262.0
6468
WOODLAWN4
(-1, 3]
22436680
784752963
319555192
18476.0
18476.0
0
(3, 7]
22436820
784756979
319558546
4016.0
4016.0
560
(7, 11]
22436960
784811859
319562022
54880.0
54880.0
1120
(11, 15]
22437100
784851887
319568799
40028.0
40028.0
1680
(15, 19]
22437240
784888779
319578254
36892.0
36892.0
2240
(19, 24]
22437380
784927575
319595235
38796.0
38796.0
2800
WORLD TRADE CTR23ACE
(-1, 3]
17617978
1049798814
545443315
33564.0
33564.0
0
(3, 7]
17618286
1049803339
545446288
4525.0
4525.0
1232
(7, 11]
17561066
1041621847
539520377
17243.0
17243.0
2456
(11, 15]
17618901
1049868694
545526385
48112.0
48112.0
3696
(15, 19]
17619209
1049961674
545566378
92980.0
92980.0
4928
(19, 24]
17619517
1050130931
545614532
169257.0
169257.0
6160
YORK STF
(-1, 3]
8375103
912961711
829515373
25062.0
25062.0
84
(3, 7]
8375187
912963567
829517443
1856.0
1856.0
420
(7, 11]
8680941
945313722
858976819
26967.0
26967.0
777
(11, 15]
8375358
913031304
829603479
40770.0
40770.0
1092
(15, 19]
8375442
913099684
829641465
68380.0
68380.0
1428
(19, 24]
8375526
913196063
829677925
96379.0
96379.0
1764
ZEREGA AV6
(-1, 3]
28638414
1506446679
21451497
2378.0
2378.0
168
(3, 7]
28638582
1506448051
21453650
1372.0
1372.0
840
(7, 11]
28638750
1506472735
21463438
24684.0
24684.0
1512
(11, 15]
28638918
1506484944
21470760
12209.0
12209.0
2184
(15, 19]
28639086
1506498718
21483668
13774.0
13774.0
2856
(19, 24]
28639254
1506507031
21501854
8313.0
8313.0
3528
2772 rows × 6 columns
Focus on ENTRY_DIFF_ABS, none of the other sums mean anything. These bins look good.
In [25]:
df.sort_values(['ENTRY_DIFF_ABS']).groupby(['STATID'])['STATID','ENTRY_DIFF_ABS'].max()
Out[25]:
STATID
ENTRY_DIFF_ABS
STATID
1 AVL
1 AVL
151085187.0
103 ST-CORONA7
103 ST-CORONA7
2027.0
103 ST1
103 ST1
9003.0
103 ST6
103 ST6
1301.0
103 STBC
103 STBC
841.0
104 STA
104 STA
355.0
104 STJZ
104 STJZ
431.0
110 ST6
110 ST6
1272.0
111 ST7
111 ST7
1242.0
111 STA
111 STA
695.0
111 STJ
111 STJ
380.0
116 ST-COLUMBIA1
116 ST-COLUMBIA1
1886.0
116 ST23
116 ST23
989.0
116 ST6
116 ST6
1193.0
116 STBC
116 STBC
681.0
121 STJZ
121 STJZ
223.0
125 ST1
125 ST1
740.0
125 ST23
125 ST23
1008.0
125 ST456
125 ST456
2682.0
125 STABCD
125 STABCD
1486.0
135 ST23
135 ST23
990.0
135 STBC
135 STBC
714.0
137 ST CITY COL1
137 ST CITY COL1
1383.0
138/GRAND CONC45
138/GRAND CONC45
459.0
14 ST-UNION SQ456LNQR
14 ST-UNION SQ456LNQR
2607.0
14 ST123FLM
14 ST123FLM
1318.0
14 STACEL
14 STACEL
2295.0
145 ST1
145 ST1
1267.0
145 ST3
145 ST3
726.0
145 STABCD
145 STABCD
1477.0
...
...
...
THIRTY ST1
THIRTY ST1
1629370.0
THIRTY THIRD ST1
THIRTY THIRD ST1
1467.0
TIMES SQ-42 ST1237ACENQRS
TIMES SQ-42 ST1237ACENQRS
4313168.0
TOMPKINSVILLE1
TOMPKINSVILLE1
88.0
TREMONT AVBD
TREMONT AVBD
1689.0
TWENTY THIRD ST1
TWENTY THIRD ST1
3168.0
UNION STR
UNION STR
982.0
UTICA AVAC
UTICA AVAC
515629.0
V.CORTLANDT PK1
V.CORTLANDT PK1
6450530.0
VAN SICLEN AV3
VAN SICLEN AV3
700.0
VAN SICLEN AVEC
VAN SICLEN AVEC
672.0
VAN SICLEN AVJZ
VAN SICLEN AVJZ
674.0
VERNON-JACKSON7
VERNON-JACKSON7
1474.0
W 4 ST-WASH SQABCDEFM
W 4 ST-WASH SQABCDEFM
2296.0
W 8 ST-AQUARIUMFQ
W 8 ST-AQUARIUMFQ
573.0
WAKEFIELD/2412
WAKEFIELD/2412
672.0
WALL ST23
WALL ST23
1969.0
WALL ST45
WALL ST45
519992536.0
WEST FARMS SQ25
WEST FARMS SQ25
842.0
WESTCHESTER SQ6
WESTCHESTER SQ6
727.0
WHITEHALL S-FRY1R
WHITEHALL S-FRY1R
1040.0
WHITLOCK AV6
WHITLOCK AV6
331.0
WILSON AVL
WILSON AVL
663.0
WINTHROP ST25
WINTHROP ST25
1656.0
WOODHAVEN BLVDJZ
WOODHAVEN BLVDJZ
456.0
WOODHAVEN BLVDMR
WOODHAVEN BLVDMR
1993.0
WOODLAWN4
WOODLAWN4
815.0
WORLD TRADE CTR23ACE
WORLD TRADE CTR23ACE
1801.0
YORK STF
YORK STF
1695.0
ZEREGA AV6
ZEREGA AV6
653.0
462 rows × 2 columns
It looks like we might have some crazy outliers. What is going on with Wall St45? Let's take a look.
In [26]:
df[df['STATID'] == "WALL ST45"]
Out[26]:
index
C/A
UNIT
SCP
STATION
LINENAME
DIVISION
DATE
TIME
DESC
ENTRIES
EXITS
TIMESTAMP
DOF
STATID
ENTRY_DIFF
ENTRY_DIFF_ABS
HOD
HODBIN
141380
141380
R203
R043
00-00-00
WALL ST
45
IRT
05/21/2016
02:00:00
REGULAR
1753042
735701
2016-05-21 02:00:00
SAT
WALL ST45
NaN
NaN
2
(-1, 3]
141381
141381
R203
R043
00-00-00
WALL ST
45
IRT
05/21/2016
06:00:00
REGULAR
1753042
735702
2016-05-21 06:00:00
SAT
WALL ST45
0.0
0.0
6
(3, 7]
141382
141382
R203
R043
00-00-00
WALL ST
45
IRT
05/21/2016
07:58:21
REGULAR
1753053
735702
2016-05-21 07:58:21
SAT
WALL ST45
11.0
11.0
7
(3, 7]
141383
141383
R203
R043
00-00-00
WALL ST
45
IRT
05/21/2016
07:59:59
REGULAR
1753053
735702
2016-05-21 07:59:59
SAT
WALL ST45
0.0
0.0
7
(3, 7]
141384
141384
R203
R043
00-00-00
WALL ST
45
IRT
05/21/2016
10:00:00
REGULAR
1753077
735703
2016-05-21 10:00:00
SAT
WALL ST45
24.0
24.0
10
(7, 11]
141385
141385
R203
R043
00-00-00
WALL ST
45
IRT
05/21/2016
14:00:00
REGULAR
1753204
735712
2016-05-21 14:00:00
SAT
WALL ST45
127.0
127.0
14
(11, 15]
141386
141386
R203
R043
00-00-00
WALL ST
45
IRT
05/21/2016
18:00:00
REGULAR
1753381
735719
2016-05-21 18:00:00
SAT
WALL ST45
177.0
177.0
18
(15, 19]
141387
141387
R203
R043
00-00-00
WALL ST
45
IRT
05/21/2016
22:00:00
REGULAR
1753485
735722
2016-05-21 22:00:00
SAT
WALL ST45
104.0
104.0
22
(19, 24]
141388
141388
R203
R043
00-00-00
WALL ST
45
IRT
05/22/2016
02:00:00
REGULAR
1753505
735722
2016-05-22 02:00:00
SUN
WALL ST45
20.0
20.0
2
(-1, 3]
141389
141389
R203
R043
00-00-00
WALL ST
45
IRT
05/22/2016
06:00:00
REGULAR
1753507
735722
2016-05-22 06:00:00
SUN
WALL ST45
2.0
2.0
6
(3, 7]
141390
141390
R203
R043
00-00-00
WALL ST
45
IRT
05/22/2016
10:00:00
REGULAR
1753524
735722
2016-05-22 10:00:00
SUN
WALL ST45
17.0
17.0
10
(7, 11]
141391
141391
R203
R043
00-00-00
WALL ST
45
IRT
05/22/2016
14:00:00
REGULAR
1753640
735728
2016-05-22 14:00:00
SUN
WALL ST45
116.0
116.0
14
(11, 15]
141392
141392
R203
R043
00-00-00
WALL ST
45
IRT
05/22/2016
18:00:00
REGULAR
1753764
735741
2016-05-22 18:00:00
SUN
WALL ST45
124.0
124.0
18
(15, 19]
141393
141393
R203
R043
00-00-00
WALL ST
45
IRT
05/22/2016
22:00:00
REGULAR
1753821
735742
2016-05-22 22:00:00
SUN
WALL ST45
57.0
57.0
22
(19, 24]
141394
141394
R203
R043
00-00-00
WALL ST
45
IRT
05/23/2016
02:00:00
REGULAR
1753834
735743
2016-05-23 02:00:00
MON
WALL ST45
13.0
13.0
2
(-1, 3]
141395
141395
R203
R043
00-00-00
WALL ST
45
IRT
05/23/2016
06:00:00
REGULAR
1753836
735748
2016-05-23 06:00:00
MON
WALL ST45
2.0
2.0
6
(3, 7]
141396
141396
R203
R043
00-00-00
WALL ST
45
IRT
05/23/2016
10:00:00
REGULAR
1754017
736067
2016-05-23 10:00:00
MON
WALL ST45
181.0
181.0
10
(7, 11]
141397
141397
R203
R043
00-00-00
WALL ST
45
IRT
05/23/2016
14:00:00
REGULAR
1754210
736176
2016-05-23 14:00:00
MON
WALL ST45
193.0
193.0
14
(11, 15]
141398
141398
R203
R043
00-00-00
WALL ST
45
IRT
05/23/2016
18:00:00
REGULAR
1754984
736280
2016-05-23 18:00:00
MON
WALL ST45
774.0
774.0
18
(15, 19]
141399
141399
R203
R043
00-00-00
WALL ST
45
IRT
05/23/2016
22:00:00
REGULAR
1755367
736317
2016-05-23 22:00:00
MON
WALL ST45
383.0
383.0
22
(19, 24]
141400
141400
R203
R043
00-00-00
WALL ST
45
IRT
05/24/2016
02:00:00
REGULAR
1755404
736323
2016-05-24 02:00:00
TUE
WALL ST45
37.0
37.0
2
(-1, 3]
141401
141401
R203
R043
00-00-00
WALL ST
45
IRT
05/24/2016
06:00:00
REGULAR
1755408
736329
2016-05-24 06:00:00
TUE
WALL ST45
4.0
4.0
6
(3, 7]
141402
141402
R203
R043
00-00-00
WALL ST
45
IRT
05/24/2016
10:00:00
REGULAR
1755589
736690
2016-05-24 10:00:00
TUE
WALL ST45
181.0
181.0
10
(7, 11]
141403
141403
R203
R043
00-00-00
WALL ST
45
IRT
05/24/2016
14:00:00
REGULAR
1755805
736829
2016-05-24 14:00:00
TUE
WALL ST45
216.0
216.0
14
(11, 15]
141404
141404
R203
R043
00-00-00
WALL ST
45
IRT
05/24/2016
18:00:00
REGULAR
1756539
736944
2016-05-24 18:00:00
TUE
WALL ST45
734.0
734.0
18
(15, 19]
141405
141405
R203
R043
00-00-00
WALL ST
45
IRT
05/24/2016
22:00:00
REGULAR
1756990
736987
2016-05-24 22:00:00
TUE
WALL ST45
451.0
451.0
22
(19, 24]
141406
141406
R203
R043
00-00-00
WALL ST
45
IRT
05/25/2016
02:00:00
REGULAR
1757025
736992
2016-05-25 02:00:00
WED
WALL ST45
35.0
35.0
2
(-1, 3]
141407
141407
R203
R043
00-00-00
WALL ST
45
IRT
05/25/2016
06:00:00
REGULAR
1757026
736995
2016-05-25 06:00:00
WED
WALL ST45
1.0
1.0
6
(3, 7]
141408
141408
R203
R043
00-00-00
WALL ST
45
IRT
05/25/2016
10:00:00
REGULAR
1757205
737320
2016-05-25 10:00:00
WED
WALL ST45
179.0
179.0
10
(7, 11]
141409
141409
R203
R043
00-00-00
WALL ST
45
IRT
05/25/2016
14:00:00
REGULAR
1757440
737498
2016-05-25 14:00:00
WED
WALL ST45
235.0
235.0
14
(11, 15]
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
724599
141401
R204A
R043
03-06-01
WALL ST
45
IRT
06/13/2016
02:00:00
REGULAR
1308001
2925069
2016-06-13 02:00:00
MON
WALL ST45
0.0
0.0
2
(-1, 3]
724600
141402
R204A
R043
03-06-01
WALL ST
45
IRT
06/13/2016
06:00:00
REGULAR
1308001
2925069
2016-06-13 06:00:00
MON
WALL ST45
0.0
0.0
6
(3, 7]
724601
141403
R204A
R043
03-06-01
WALL ST
45
IRT
06/13/2016
10:00:00
REGULAR
1308017
2925619
2016-06-13 10:00:00
MON
WALL ST45
16.0
16.0
10
(7, 11]
724602
141404
R204A
R043
03-06-01
WALL ST
45
IRT
06/13/2016
14:00:00
REGULAR
1308058
2925777
2016-06-13 14:00:00
MON
WALL ST45
41.0
41.0
14
(11, 15]
724603
141405
R204A
R043
03-06-01
WALL ST
45
IRT
06/13/2016
18:00:00
REGULAR
1308386
2925878
2016-06-13 18:00:00
MON
WALL ST45
328.0
328.0
18
(15, 19]
724604
141406
R204A
R043
03-06-01
WALL ST
45
IRT
06/13/2016
22:00:00
REGULAR
1308470
2925903
2016-06-13 22:00:00
MON
WALL ST45
84.0
84.0
22
(19, 24]
724605
141407
R204A
R043
03-06-01
WALL ST
45
IRT
06/14/2016
02:00:00
REGULAR
1308470
2925903
2016-06-14 02:00:00
TUE
WALL ST45
0.0
0.0
2
(-1, 3]
724606
141408
R204A
R043
03-06-01
WALL ST
45
IRT
06/14/2016
06:00:00
REGULAR
1308470
2925903
2016-06-14 06:00:00
TUE
WALL ST45
0.0
0.0
6
(3, 7]
724607
141409
R204A
R043
03-06-01
WALL ST
45
IRT
06/14/2016
10:00:00
REGULAR
1308501
2926449
2016-06-14 10:00:00
TUE
WALL ST45
31.0
31.0
10
(7, 11]
724608
141410
R204A
R043
03-06-01
WALL ST
45
IRT
06/14/2016
14:00:00
REGULAR
1308566
2926661
2016-06-14 14:00:00
TUE
WALL ST45
65.0
65.0
14
(11, 15]
724609
141411
R204A
R043
03-06-01
WALL ST
45
IRT
06/14/2016
18:00:00
REGULAR
1308871
2926769
2016-06-14 18:00:00
TUE
WALL ST45
305.0
305.0
18
(15, 19]
724610
141412
R204A
R043
03-06-01
WALL ST
45
IRT
06/14/2016
22:00:00
REGULAR
1308952
2926783
2016-06-14 22:00:00
TUE
WALL ST45
81.0
81.0
22
(19, 24]
724611
141413
R204A
R043
03-06-01
WALL ST
45
IRT
06/15/2016
02:00:00
REGULAR
1308952
2926783
2016-06-15 02:00:00
WED
WALL ST45
0.0
0.0
2
(-1, 3]
724612
141414
R204A
R043
03-06-01
WALL ST
45
IRT
06/15/2016
06:00:00
REGULAR
1308952
2926783
2016-06-15 06:00:00
WED
WALL ST45
0.0
0.0
6
(3, 7]
724613
141415
R204A
R043
03-06-01
WALL ST
45
IRT
06/15/2016
10:00:00
REGULAR
1308974
2927329
2016-06-15 10:00:00
WED
WALL ST45
22.0
22.0
10
(7, 11]
724614
141416
R204A
R043
03-06-01
WALL ST
45
IRT
06/15/2016
14:00:00
REGULAR
1309029
2927524
2016-06-15 14:00:00
WED
WALL ST45
55.0
55.0
14
(11, 15]
724615
141417
R204A
R043
03-06-01
WALL ST
45
IRT
06/15/2016
18:00:00
REGULAR
1309361
2927609
2016-06-15 18:00:00
WED
WALL ST45
332.0
332.0
18
(15, 19]
724616
141418
R204A
R043
03-06-01
WALL ST
45
IRT
06/15/2016
22:00:00
REGULAR
1309441
2927640
2016-06-15 22:00:00
WED
WALL ST45
80.0
80.0
22
(19, 24]
724617
141419
R204A
R043
03-06-01
WALL ST
45
IRT
06/16/2016
02:00:00
REGULAR
1309441
2927640
2016-06-16 02:00:00
THU
WALL ST45
0.0
0.0
2
(-1, 3]
724618
141420
R204A
R043
03-06-01
WALL ST
45
IRT
06/16/2016
06:00:00
REGULAR
1309441
2927640
2016-06-16 06:00:00
THU
WALL ST45
0.0
0.0
6
(3, 7]
724619
141421
R204A
R043
03-06-01
WALL ST
45
IRT
06/16/2016
10:00:00
REGULAR
1309464
2928179
2016-06-16 10:00:00
THU
WALL ST45
23.0
23.0
10
(7, 11]
724620
141422
R204A
R043
03-06-01
WALL ST
45
IRT
06/16/2016
14:00:00
REGULAR
1309529
2928356
2016-06-16 14:00:00
THU
WALL ST45
65.0
65.0
14
(11, 15]
724621
141423
R204A
R043
03-06-01
WALL ST
45
IRT
06/16/2016
18:00:00
REGULAR
1309810
2928471
2016-06-16 18:00:00
THU
WALL ST45
281.0
281.0
18
(15, 19]
724622
141424
R204A
R043
03-06-01
WALL ST
45
IRT
06/16/2016
22:00:00
REGULAR
1309899
2928495
2016-06-16 22:00:00
THU
WALL ST45
89.0
89.0
22
(19, 24]
724623
141425
R204A
R043
03-06-01
WALL ST
45
IRT
06/17/2016
02:00:00
REGULAR
1309899
2928495
2016-06-17 02:00:00
FRI
WALL ST45
0.0
0.0
2
(-1, 3]
724624
141426
R204A
R043
03-06-01
WALL ST
45
IRT
06/17/2016
06:00:00
REGULAR
1309899
2928495
2016-06-17 06:00:00
FRI
WALL ST45
0.0
0.0
6
(3, 7]
724625
141427
R204A
R043
03-06-01
WALL ST
45
IRT
06/17/2016
10:00:00
REGULAR
1309931
2928983
2016-06-17 10:00:00
FRI
WALL ST45
32.0
32.0
10
(7, 11]
724626
141428
R204A
R043
03-06-01
WALL ST
45
IRT
06/17/2016
14:00:00
REGULAR
1309995
2929134
2016-06-17 14:00:00
FRI
WALL ST45
64.0
64.0
14
(11, 15]
724627
141429
R204A
R043
03-06-01
WALL ST
45
IRT
06/17/2016
18:00:00
REGULAR
1310253
2929247
2016-06-17 18:00:00
FRI
WALL ST45
258.0
258.0
18
(15, 19]
724628
141430
R204A
R043
03-06-01
WALL ST
45
IRT
06/17/2016
22:00:00
REGULAR
1310300
2929262
2016-06-17 22:00:00
FRI
WALL ST45
47.0
47.0
22
(19, 24]
4780 rows × 19 columns
In [27]:
df[df['STATID'] == "WALL ST23"]
Out[27]:
index
C/A
UNIT
SCP
STATION
LINENAME
DIVISION
DATE
TIME
DESC
ENTRIES
EXITS
TIMESTAMP
DOF
STATID
ENTRY_DIFF
ENTRY_DIFF_ABS
HOD
HODBIN
121389
121389
R110
R027
01-00-00
WALL ST
23
IRT
05/21/2016
00:00:00
REGULAR
3648884
5424996
2016-05-21 00:00:00
SAT
WALL ST23
NaN
NaN
0
(-1, 3]
121390
121390
R110
R027
01-00-00
WALL ST
23
IRT
05/21/2016
04:00:00
REGULAR
3648896
5425018
2016-05-21 04:00:00
SAT
WALL ST23
12.0
12.0
4
(3, 7]
121391
121391
R110
R027
01-00-00
WALL ST
23
IRT
05/21/2016
08:00:00
REGULAR
3648918
5425060
2016-05-21 08:00:00
SAT
WALL ST23
22.0
22.0
8
(7, 11]
121392
121392
R110
R027
01-00-00
WALL ST
23
IRT
05/21/2016
08:21:11
REGULAR
3648919
5425073
2016-05-21 08:21:11
SAT
WALL ST23
1.0
1.0
8
(7, 11]
121393
121393
R110
R027
01-00-00
WALL ST
23
IRT
05/21/2016
12:00:00
REGULAR
3648971
5425251
2016-05-21 12:00:00
SAT
WALL ST23
52.0
52.0
12
(11, 15]
121394
121394
R110
R027
01-00-00
WALL ST
23
IRT
05/21/2016
16:00:00
REGULAR
3649078
5425440
2016-05-21 16:00:00
SAT
WALL ST23
107.0
107.0
16
(15, 19]
121395
121395
R110
R027
01-00-00
WALL ST
23
IRT
05/21/2016
20:00:00
REGULAR
3649203
5425565
2016-05-21 20:00:00
SAT
WALL ST23
125.0
125.0
20
(19, 24]
121396
121396
R110
R027
01-00-00
WALL ST
23
IRT
05/22/2016
00:00:00
REGULAR
3649234
5425623
2016-05-22 00:00:00
SUN
WALL ST23
31.0
31.0
0
(-1, 3]
121397
121397
R110
R027
01-00-00
WALL ST
23
IRT
05/22/2016
04:00:00
REGULAR
3649240
5425650
2016-05-22 04:00:00
SUN
WALL ST23
6.0
6.0
4
(3, 7]
121398
121398
R110
R027
01-00-00
WALL ST
23
IRT
05/22/2016
08:00:00
REGULAR
3649241
5425676
2016-05-22 08:00:00
SUN
WALL ST23
1.0
1.0
8
(7, 11]
121399
121399
R110
R027
01-00-00
WALL ST
23
IRT
05/22/2016
12:00:00
REGULAR
3649282
5425775
2016-05-22 12:00:00
SUN
WALL ST23
41.0
41.0
12
(11, 15]
121400
121400
R110
R027
01-00-00
WALL ST
23
IRT
05/22/2016
16:00:00
REGULAR
3649341
5425888
2016-05-22 16:00:00
SUN
WALL ST23
59.0
59.0
16
(15, 19]
121401
121401
R110
R027
01-00-00
WALL ST
23
IRT
05/22/2016
20:00:00
REGULAR
3649415
5425998
2016-05-22 20:00:00
SUN
WALL ST23
74.0
74.0
20
(19, 24]
121402
121402
R110
R027
01-00-00
WALL ST
23
IRT
05/23/2016
00:00:00
REGULAR
3649450
5426075
2016-05-23 00:00:00
MON
WALL ST23
35.0
35.0
0
(-1, 3]
121403
121403
R110
R027
01-00-00
WALL ST
23
IRT
05/23/2016
04:00:00
REGULAR
3649451
5426079
2016-05-23 04:00:00
MON
WALL ST23
1.0
1.0
4
(3, 7]
121404
121404
R110
R027
01-00-00
WALL ST
23
IRT
05/23/2016
08:00:00
REGULAR
3649460
5426429
2016-05-23 08:00:00
MON
WALL ST23
9.0
9.0
8
(7, 11]
121405
121405
R110
R027
01-00-00
WALL ST
23
IRT
05/23/2016
12:00:00
REGULAR
3649515
5427510
2016-05-23 12:00:00
MON
WALL ST23
55.0
55.0
12
(11, 15]
121406
121406
R110
R027
01-00-00
WALL ST
23
IRT
05/23/2016
16:00:00
REGULAR
3649760
5427717
2016-05-23 16:00:00
MON
WALL ST23
245.0
245.0
16
(15, 19]
121407
121407
R110
R027
01-00-00
WALL ST
23
IRT
05/23/2016
20:00:00
REGULAR
3650581
5428027
2016-05-23 20:00:00
MON
WALL ST23
821.0
821.0
20
(19, 24]
121408
121408
R110
R027
01-00-00
WALL ST
23
IRT
05/24/2016
00:00:00
REGULAR
3650664
5428089
2016-05-24 00:00:00
TUE
WALL ST23
83.0
83.0
0
(-1, 3]
121409
121409
R110
R027
01-00-00
WALL ST
23
IRT
05/24/2016
04:00:00
REGULAR
3650667
5428097
2016-05-24 04:00:00
TUE
WALL ST23
3.0
3.0
4
(3, 7]
121410
121410
R110
R027
01-00-00
WALL ST
23
IRT
05/24/2016
06:48:51
REGULAR
3650671
5428157
2016-05-24 06:48:51
TUE
WALL ST23
4.0
4.0
6
(3, 7]
121411
121411
R110
R027
01-00-00
WALL ST
23
IRT
05/24/2016
08:00:00
REGULAR
3650678
5428431
2016-05-24 08:00:00
TUE
WALL ST23
7.0
7.0
8
(7, 11]
121412
121412
R110
R027
01-00-00
WALL ST
23
IRT
05/24/2016
12:00:00
REGULAR
3650743
5429565
2016-05-24 12:00:00
TUE
WALL ST23
65.0
65.0
12
(11, 15]
121413
121413
R110
R027
01-00-00
WALL ST
23
IRT
05/24/2016
16:00:00
REGULAR
3650977
5429769
2016-05-24 16:00:00
TUE
WALL ST23
234.0
234.0
16
(15, 19]
121414
121414
R110
R027
01-00-00
WALL ST
23
IRT
05/24/2016
20:00:00
REGULAR
3652149
5430101
2016-05-24 20:00:00
TUE
WALL ST23
1172.0
1172.0
20
(19, 24]
121415
121415
R110
R027
01-00-00
WALL ST
23
IRT
05/25/2016
00:00:00
REGULAR
3652233
5430193
2016-05-25 00:00:00
WED
WALL ST23
84.0
84.0
0
(-1, 3]
121416
121416
R110
R027
01-00-00
WALL ST
23
IRT
05/25/2016
04:00:00
REGULAR
3652239
5430196
2016-05-25 04:00:00
WED
WALL ST23
6.0
6.0
4
(3, 7]
121417
121417
R110
R027
01-00-00
WALL ST
23
IRT
05/25/2016
08:00:00
REGULAR
3652246
5430539
2016-05-25 08:00:00
WED
WALL ST23
7.0
7.0
8
(7, 11]
121418
121418
R110
R027
01-00-00
WALL ST
23
IRT
05/25/2016
09:49:15
REGULAR
3652268
5431354
2016-05-25 09:49:15
WED
WALL ST23
22.0
22.0
9
(7, 11]
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
704638
121440
R112A
R027
03-00-02
WALL ST
23
IRT
06/13/2016
00:00:00
REGULAR
384708
394335
2016-06-13 00:00:00
MON
WALL ST23
0.0
0.0
0
(-1, 3]
704639
121441
R112A
R027
03-00-02
WALL ST
23
IRT
06/13/2016
04:00:00
REGULAR
384708
394335
2016-06-13 04:00:00
MON
WALL ST23
0.0
0.0
4
(3, 7]
704640
121442
R112A
R027
03-00-02
WALL ST
23
IRT
06/13/2016
08:00:00
REGULAR
384716
394413
2016-06-13 08:00:00
MON
WALL ST23
8.0
8.0
8
(7, 11]
704641
121443
R112A
R027
03-00-02
WALL ST
23
IRT
06/13/2016
12:00:00
REGULAR
384810
394903
2016-06-13 12:00:00
MON
WALL ST23
94.0
94.0
12
(11, 15]
704642
121444
R112A
R027
03-00-02
WALL ST
23
IRT
06/13/2016
16:00:00
REGULAR
384910
394956
2016-06-13 16:00:00
MON
WALL ST23
100.0
100.0
16
(15, 19]
704643
121445
R112A
R027
03-00-02
WALL ST
23
IRT
06/13/2016
20:00:00
REGULAR
385385
395036
2016-06-13 20:00:00
MON
WALL ST23
475.0
475.0
20
(19, 24]
704644
121446
R112A
R027
03-00-02
WALL ST
23
IRT
06/14/2016
00:00:00
REGULAR
385412
395044
2016-06-14 00:00:00
TUE
WALL ST23
27.0
27.0
0
(-1, 3]
704645
121447
R112A
R027
03-00-02
WALL ST
23
IRT
06/14/2016
04:00:00
REGULAR
385422
395048
2016-06-14 04:00:00
TUE
WALL ST23
10.0
10.0
4
(3, 7]
704646
121448
R112A
R027
03-00-02
WALL ST
23
IRT
06/14/2016
08:00:00
REGULAR
385446
395129
2016-06-14 08:00:00
TUE
WALL ST23
24.0
24.0
8
(7, 11]
704647
121449
R112A
R027
03-00-02
WALL ST
23
IRT
06/14/2016
12:00:00
REGULAR
385541
395620
2016-06-14 12:00:00
TUE
WALL ST23
95.0
95.0
12
(11, 15]
704648
121450
R112A
R027
03-00-02
WALL ST
23
IRT
06/14/2016
16:00:00
REGULAR
385624
395698
2016-06-14 16:00:00
TUE
WALL ST23
83.0
83.0
16
(15, 19]
704649
121451
R112A
R027
03-00-02
WALL ST
23
IRT
06/14/2016
20:00:00
REGULAR
386118
395777
2016-06-14 20:00:00
TUE
WALL ST23
494.0
494.0
20
(19, 24]
704650
121452
R112A
R027
03-00-02
WALL ST
23
IRT
06/15/2016
00:00:00
REGULAR
386148
395796
2016-06-15 00:00:00
WED
WALL ST23
30.0
30.0
0
(-1, 3]
704651
121453
R112A
R027
03-00-02
WALL ST
23
IRT
06/15/2016
04:00:00
REGULAR
386163
395798
2016-06-15 04:00:00
WED
WALL ST23
15.0
15.0
4
(3, 7]
704652
121454
R112A
R027
03-00-02
WALL ST
23
IRT
06/15/2016
08:00:00
REGULAR
386180
395898
2016-06-15 08:00:00
WED
WALL ST23
17.0
17.0
8
(7, 11]
704653
121455
R112A
R027
03-00-02
WALL ST
23
IRT
06/15/2016
12:00:00
REGULAR
386275
396436
2016-06-15 12:00:00
WED
WALL ST23
95.0
95.0
12
(11, 15]
704654
121456
R112A
R027
03-00-02
WALL ST
23
IRT
06/15/2016
16:00:00
REGULAR
386375
396502
2016-06-15 16:00:00
WED
WALL ST23
100.0
100.0
16
(15, 19]
704655
121457
R112A
R027
03-00-02
WALL ST
23
IRT
06/15/2016
20:00:00
REGULAR
386870
396562
2016-06-15 20:00:00
WED
WALL ST23
495.0
495.0
20
(19, 24]
704656
121458
R112A
R027
03-00-02
WALL ST
23
IRT
06/16/2016
00:00:00
REGULAR
386905
396581
2016-06-16 00:00:00
THU
WALL ST23
35.0
35.0
0
(-1, 3]
704657
121459
R112A
R027
03-00-02
WALL ST
23
IRT
06/16/2016
04:00:00
REGULAR
386907
396584
2016-06-16 04:00:00
THU
WALL ST23
2.0
2.0
4
(3, 7]
704658
121460
R112A
R027
03-00-02
WALL ST
23
IRT
06/16/2016
08:00:00
REGULAR
386920
396694
2016-06-16 08:00:00
THU
WALL ST23
13.0
13.0
8
(7, 11]
704659
121461
R112A
R027
03-00-02
WALL ST
23
IRT
06/16/2016
12:00:00
REGULAR
387013
397156
2016-06-16 12:00:00
THU
WALL ST23
93.0
93.0
12
(11, 15]
704660
121462
R112A
R027
03-00-02
WALL ST
23
IRT
06/16/2016
16:00:00
REGULAR
387118
397229
2016-06-16 16:00:00
THU
WALL ST23
105.0
105.0
16
(15, 19]
704661
121463
R112A
R027
03-00-02
WALL ST
23
IRT
06/16/2016
20:00:00
REGULAR
387643
397305
2016-06-16 20:00:00
THU
WALL ST23
525.0
525.0
20
(19, 24]
704662
121464
R112A
R027
03-00-02
WALL ST
23
IRT
06/17/2016
00:00:00
REGULAR
387677
397328
2016-06-17 00:00:00
FRI
WALL ST23
34.0
34.0
0
(-1, 3]
704663
121465
R112A
R027
03-00-02
WALL ST
23
IRT
06/17/2016
04:00:00
REGULAR
387684
397330
2016-06-17 04:00:00
FRI
WALL ST23
7.0
7.0
4
(3, 7]
704664
121466
R112A
R027
03-00-02
WALL ST
23
IRT
06/17/2016
08:00:00
REGULAR
387693
397408
2016-06-17 08:00:00
FRI
WALL ST23
9.0
9.0
8
(7, 11]
704665
121467
R112A
R027
03-00-02
WALL ST
23
IRT
06/17/2016
12:00:00
REGULAR
387796
397791
2016-06-17 12:00:00
FRI
WALL ST23
103.0
103.0
12
(11, 15]
704666
121468
R112A
R027
03-00-02
WALL ST
23
IRT
06/17/2016
16:00:00
REGULAR
387928
397852
2016-06-17 16:00:00
FRI
WALL ST23
132.0
132.0
16
(15, 19]
704667
121469
R112A
R027
03-00-02
WALL ST
23
IRT
06/17/2016
20:00:00
REGULAR
388283
397926
2016-06-17 20:00:00
FRI
WALL ST23
355.0
355.0
20
(19, 24]
3769 rows × 19 columns
It looks like we have 1000 more rows for Wall St 23 than for Wall St 45, and that might account for some of the craziness. By taking a close look at HOD, we see some instances of two measurements taken within the same bin. But multiple measurements for the same time period should just result in small counts at each time. Let's take another approach.
In [28]:
df.sort_values(['ENTRY_DIFF_ABS']).groupby(['STATID','DOF','HODBIN'])['STATID','ENTRY_DIFF_ABS'].sum()
Out[28]:
ENTRY_DIFF_ABS
STATID
DOF
HODBIN
1 AVL
FRI
(-1, 3]
18438.0
(3, 7]
4000.0
(7, 11]
6731.0
(11, 15]
21397.0
(15, 19]
22171.0
(19, 24]
22392.0
MON
(-1, 3]
11410.0
(3, 7]
2573.0
(7, 11]
6151.0
(11, 15]
17640.0
(15, 19]
18313.0
(19, 24]
24814.0
SAT
(-1, 3]
21448.0
(3, 7]
7667.0
(7, 11]
2912.0
(11, 15]
11277.0
(15, 19]
19249.0
(19, 24]
19732.0
SUN
(-1, 3]
16433.0
(3, 7]
6680.0
(7, 11]
2072.0
(11, 15]
8610.0
(15, 19]
15583.0
(19, 24]
16248.0
THU
(-1, 3]
17612.0
(3, 7]
2951.0
(7, 11]
7410.0
(11, 15]
22647.0
(15, 19]
151104944.0
(19, 24]
29447.0
...
...
...
...
ZEREGA AV6
SAT
(-1, 3]
280.0
(3, 7]
101.0
(7, 11]
1222.0
(11, 15]
1373.0
(15, 19]
1412.0
(19, 24]
800.0
SUN
(-1, 3]
406.0
(3, 7]
104.0
(7, 11]
732.0
(11, 15]
1200.0
(15, 19]
1041.0
(19, 24]
634.0
THU
(-1, 3]
371.0
(3, 7]
273.0
(7, 11]
4665.0
(11, 15]
2041.0
(15, 19]
2357.0
(19, 24]
1373.0
TUE
(-1, 3]
308.0
(3, 7]
229.0
(7, 11]
4743.0
(11, 15]
1998.0
(15, 19]
2339.0
(19, 24]
1336.0
WED
(-1, 3]
374.0
(3, 7]
249.0
(7, 11]
4925.0
(11, 15]
2003.0
(15, 19]
2277.0
(19, 24]
1510.0
19404 rows × 1 columns
Look for this in Part Two of the tutorial!
If you liked this jupyter notebook or have any comments/questions/suggestions, please email me via the form at laurenoldja.net/contact
Content source: loldja/loldja.github.io
Similar notebooks: