In [2]:
#!/usr/bin/env python3
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.preprocessing import Imputer
from sklearn.metrics import pairwise
from pyproj import Geod
In [3]:
df = pd.read_csv('201707-citibike-tripdata.csv')
In [4]:
print('column numbers: ' + str(len(df.columns)))
print('row numbers: ' + str(len(df.index)))
column numbers: 15
row numbers: 1735599
In [5]:
df
Out[5]:
tripduration
starttime
stoptime
start station id
start station name
start station latitude
start station longitude
end station id
end station name
end station latitude
end station longitude
bikeid
usertype
birth year
gender
0
364
2017-07-01 00:00:00
2017-07-01 00:06:05
539
Metropolitan Ave & Bedford Ave
40.715348
-73.960241
3107
Bedford Ave & Nassau Ave
40.723117
-73.952123
14744
Subscriber
1986.0
1
1
2142
2017-07-01 00:00:03
2017-07-01 00:35:46
293
Lafayette St & E 8 St
40.730207
-73.991026
3425
2 Ave & E 104 St
40.789210
-73.943708
19587
Subscriber
1981.0
1
2
328
2017-07-01 00:00:08
2017-07-01 00:05:37
3242
Schermerhorn St & Court St
40.691029
-73.991834
3397
Court St & Nelson St
40.676395
-73.998699
27937
Subscriber
1984.0
2
3
2530
2017-07-01 00:00:11
2017-07-01 00:42:22
2002
Wythe Ave & Metropolitan Ave
40.716887
-73.963198
398
Atlantic Ave & Furman St
40.691652
-73.999979
26066
Subscriber
1985.0
1
4
2534
2017-07-01 00:00:15
2017-07-01 00:42:29
2002
Wythe Ave & Metropolitan Ave
40.716887
-73.963198
398
Atlantic Ave & Furman St
40.691652
-73.999979
29408
Subscriber
1982.0
2
5
545
2017-07-01 00:00:18
2017-07-01 00:09:24
361
Allen St & Hester St
40.716059
-73.991908
502
Henry St & Grand St
40.714215
-73.981346
21203
Subscriber
1954.0
2
6
1496
2017-07-01 00:00:18
2017-07-01 00:25:15
3233
E 48 St & 5 Ave
40.757246
-73.978059
546
E 30 St & Park Ave S
40.744449
-73.983035
15933
Customer
1971.0
1
7
1495
2017-07-01 00:00:19
2017-07-01 00:25:15
3233
E 48 St & 5 Ave
40.757246
-73.978059
546
E 30 St & Park Ave S
40.744449
-73.983035
18515
Customer
1964.0
2
8
593
2017-07-01 00:00:27
2017-07-01 00:10:20
3314
W 95 St & Broadway
40.793770
-73.971888
3363
E 102 St & Park Ave
40.790483
-73.950331
29135
Subscriber
1982.0
1
9
1950
2017-07-01 00:00:30
2017-07-01 00:33:00
3085
Roebling St & N 4 St
40.714690
-73.957390
468
Broadway & W 56 St
40.765265
-73.981923
17365
Subscriber
1983.0
1
10
736
2017-07-01 00:01:05
2017-07-01 00:13:22
382
University Pl & E 14 St
40.734927
-73.992005
526
E 33 St & 5 Ave
40.747659
-73.984907
20592
Subscriber
1966.0
1
11
588
2017-07-01 00:01:10
2017-07-01 00:10:59
305
E 58 St & 3 Ave
40.760958
-73.967245
519
Pershing Square North
40.751873
-73.977706
26555
Subscriber
1975.0
2
12
799
2017-07-01 00:01:19
2017-07-01 00:14:38
384
Fulton St & Washington Ave
40.683048
-73.964915
3329
Degraw St & Smith St
40.682915
-73.993182
16184
Subscriber
1960.0
1
13
404
2017-07-01 00:01:51
2017-07-01 00:08:35
502
Henry St & Grand St
40.714215
-73.981346
2009
Catherine St & Monroe St
40.711174
-73.996826
25127
Subscriber
1977.0
2
14
1045
2017-07-01 00:01:59
2017-07-01 00:19:25
491
E 24 St & Park Ave S
40.740964
-73.986022
500
Broadway & W 51 St
40.762288
-73.983362
21443
Subscriber
1972.0
1
15
589
2017-07-01 00:02:05
2017-07-01 00:11:55
252
MacDougal St & Washington Sq
40.732264
-73.998522
523
W 38 St & 8 Ave
40.754666
-73.991382
19047
Subscriber
1984.0
1
16
491
2017-07-01 00:02:06
2017-07-01 00:10:17
462
W 22 St & 10 Ave
40.746920
-74.004519
482
W 15 St & 7 Ave
40.739355
-73.999318
26666
Subscriber
1968.0
1
17
1718
2017-07-01 00:02:07
2017-07-01 00:30:46
450
W 49 St & 8 Ave
40.762272
-73.987882
473
Rivington St & Chrystie St
40.721101
-73.991925
25845
Subscriber
1988.0
1
18
773
2017-07-01 00:02:08
2017-07-01 00:15:01
441
E 52 St & 2 Ave
40.756014
-73.967416
3338
2 Ave & E 99 St
40.786259
-73.945526
27895
Subscriber
1988.0
2
19
859
2017-07-01 00:02:08
2017-07-01 00:16:27
3110
Meserole Ave & Manhattan Ave
40.727086
-73.952991
3081
Graham Ave & Grand St
40.711863
-73.944024
29350
Subscriber
1990.0
1
20
173
2017-07-01 00:02:10
2017-07-01 00:05:03
3083
Bushwick Ave & Powers St
40.712477
-73.941000
3074
Montrose Ave & Bushwick Ave
40.707678
-73.940162
25113
Subscriber
1981.0
1
21
1294
2017-07-01 00:02:13
2017-07-01 00:23:47
477
W 41 St & 8 Ave
40.756405
-73.990026
3147
E 85 St & 3 Ave
40.778012
-73.954071
14838
Subscriber
1966.0
1
22
876
2017-07-01 00:02:17
2017-07-01 00:16:54
435
W 21 St & 6 Ave
40.741740
-73.994156
266
Avenue D & E 8 St
40.723684
-73.975748
21645
Subscriber
1992.0
1
23
1835
2017-07-01 00:02:19
2017-07-01 00:32:55
3416
7 Ave & Park Pl
40.677615
-73.973243
3055
Greene Ave & Nostrand Ave
40.688334
-73.950916
21249
Customer
NaN
0
24
1327
2017-07-01 00:02:20
2017-07-01 00:24:28
402
Broadway & E 22 St
40.740343
-73.989551
514
12 Ave & W 40 St
40.760875
-74.002777
28025
Subscriber
1995.0
2
25
704
2017-07-01 00:02:33
2017-07-01 00:14:18
3255
8 Ave & W 31 St
40.750585
-73.994685
450
W 49 St & 8 Ave
40.762272
-73.987882
15139
Subscriber
1973.0
1
26
1449
2017-07-01 00:02:42
2017-07-01 00:26:52
401
Allen St & Rivington St
40.720196
-73.989978
446
W 24 St & 7 Ave
40.744876
-73.995299
28973
Subscriber
1988.0
1
27
2391
2017-07-01 00:02:44
2017-07-01 00:42:35
2008
Little West St & 1 Pl
40.705693
-74.016777
511
E 14 St & Avenue B
40.729387
-73.977724
27374
Subscriber
1952.0
1
28
983
2017-07-01 00:02:52
2017-07-01 00:19:16
501
FDR Drive & E 35 St
40.744219
-73.971212
3146
E 81 St & 3 Ave
40.775730
-73.956753
20559
Subscriber
1990.0
1
29
1323
2017-07-01 00:02:52
2017-07-01 00:24:55
355
Bayard St & Baxter St
40.716021
-73.999744
3431
E 35 St & 3 Ave
40.746524
-73.977885
26055
Subscriber
1973.0
2
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
1735569
254
2017-07-31 23:56:38
2017-08-01 00:00:52
3167
Amsterdam Ave & W 73 St
40.779668
-73.980930
3171
Amsterdam Ave & W 82 St
40.785247
-73.976673
28279
Subscriber
1969.0
2
1735570
405
2017-07-31 23:56:56
2017-08-01 00:03:41
297
E 15 St & 3 Ave
40.734232
-73.986923
345
W 13 St & 6 Ave
40.736494
-73.997044
29735
Subscriber
1986.0
1
1735571
398
2017-07-31 23:57:00
2017-08-01 00:03:38
3088
Union Ave & Jackson St
40.716075
-73.952029
3083
Bushwick Ave & Powers St
40.712477
-73.941000
18161
Subscriber
1988.0
2
1735572
866
2017-07-31 23:57:06
2017-08-01 00:11:32
308
St James Pl & Oliver St
40.713079
-73.998512
195
Liberty St & Broadway
40.709056
-74.010434
21589
Customer
NaN
0
1735573
425
2017-07-31 23:57:12
2017-08-01 00:04:17
285
Broadway & E 14 St
40.734546
-73.990741
432
E 7 St & Avenue A
40.726218
-73.983799
15434
Subscriber
1960.0
0
1735574
524
2017-07-31 23:57:14
2017-08-01 00:05:59
462
W 22 St & 10 Ave
40.746920
-74.004519
402
Broadway & E 22 St
40.740343
-73.989551
19699
Subscriber
1980.0
1
1735575
1185
2017-07-31 23:57:17
2017-08-01 00:17:02
526
E 33 St & 5 Ave
40.747659
-73.984907
434
9 Ave & W 18 St
40.743174
-74.003664
18723
Customer
1988.0
1
1735576
436
2017-07-31 23:57:21
2017-08-01 00:04:37
306
Cliff St & Fulton St
40.708235
-74.005301
360
William St & Pine St
40.707179
-74.008873
20164
Subscriber
1986.0
1
1735577
412
2017-07-31 23:57:31
2017-08-01 00:04:24
285
Broadway & E 14 St
40.734546
-73.990741
432
E 7 St & Avenue A
40.726218
-73.983799
19574
Subscriber
1959.0
1
1735578
592
2017-07-31 23:57:42
2017-08-01 00:07:34
241
DeKalb Ave & S Portland Ave
40.689810
-73.974931
3049
Cambridge Pl & Gates Ave
40.684880
-73.963040
16333
Subscriber
1989.0
1
1735579
330
2017-07-31 23:57:47
2017-08-01 00:03:18
3085
Roebling St & N 4 St
40.714690
-73.957390
3086
Graham Ave & Conselyea St
40.715143
-73.944507
29409
Subscriber
1982.0
1
1735580
1506
2017-07-31 23:58:28
2017-08-01 00:23:34
528
2 Ave & E 31 St
40.742909
-73.977061
3145
E 84 St & Park Ave
40.778627
-73.957721
20944
Subscriber
1967.0
2
1735581
1176
2017-07-31 23:58:40
2017-08-01 00:18:16
503
E 20 St & Park Ave
40.738274
-73.987520
2006
Central Park S & 6 Ave
40.765909
-73.976342
29009
Customer
1979.0
1
1735582
774
2017-07-31 23:58:42
2017-08-01 00:11:37
164
E 47 St & 2 Ave
40.753231
-73.970325
492
W 33 St & 7 Ave
40.750200
-73.990931
17972
Subscriber
1964.0
1
1735583
1060
2017-07-31 23:58:45
2017-08-01 00:16:25
428
E 3 St & 1 Ave
40.724677
-73.987834
128
MacDougal St & Prince St
40.727103
-74.002971
27508
Subscriber
1968.0
2
1735584
364
2017-07-31 23:58:47
2017-08-01 00:04:51
383
Greenwich Ave & Charles St
40.735238
-74.000271
509
9 Ave & W 22 St
40.745497
-74.001971
27677
Subscriber
1966.0
1
1735585
1343
2017-07-31 23:58:50
2017-08-01 00:21:14
161
LaGuardia Pl & W 3 St
40.729170
-73.998102
406
Hicks St & Montague St
40.695128
-73.995951
20912
Subscriber
1985.0
1
1735586
936
2017-07-31 23:58:54
2017-08-01 00:14:30
442
W 27 St & 7 Ave
40.746647
-73.993915
477
W 41 St & 8 Ave
40.756405
-73.990026
19612
Subscriber
1995.0
1
1735587
277
2017-07-31 23:59:10
2017-08-01 00:03:47
495
W 47 St & 10 Ave
40.762699
-73.993012
423
W 54 St & 9 Ave
40.765849
-73.986905
30198
Subscriber
1988.0
1
1735588
2294
2017-07-31 23:59:09
2017-08-01 00:37:24
232
Cadman Plaza E & Tillary St
40.695977
-73.990149
3342
Pioneer St & Richards St
40.677775
-74.009461
18769
Subscriber
1999.0
1
1735589
1916
2017-07-31 23:59:11
2017-08-01 00:31:07
440
E 45 St & 3 Ave
40.752554
-73.972826
3338
2 Ave & E 99 St
40.786259
-73.945526
25644
Subscriber
1992.0
1
1735590
2244
2017-07-31 23:59:10
2017-08-01 00:36:35
3165
Central Park West & W 72 St
40.775794
-73.976206
3163
Central Park West & W 68 St
40.773407
-73.977825
30201
Subscriber
1950.0
1
1735591
428
2017-07-31 23:59:11
2017-08-01 00:06:20
477
W 41 St & 8 Ave
40.756405
-73.990026
513
W 56 St & 10 Ave
40.768254
-73.988639
30127
Subscriber
1980.0
1
1735592
215
2017-07-31 23:59:15
2017-08-01 00:02:51
3164
Columbus Ave & W 72 St
40.777057
-73.978985
3170
W 84 St & Columbus Ave
40.785000
-73.972834
18057
Subscriber
1970.0
1
1735593
3058
2017-07-31 23:59:31
2017-08-01 00:50:30
3418
Plaza St West & Flatbush Ave
40.675021
-73.971115
293
Lafayette St & E 8 St
40.730207
-73.991026
30161
Subscriber
1963.0
1
1735594
560
2017-07-31 23:59:33
2017-08-01 00:08:53
3260
Mercer St & Bleecker St
40.727064
-73.996621
236
St Marks Pl & 2 Ave
40.728419
-73.987140
16126
Subscriber
1990.0
2
1735595
272
2017-07-31 23:59:35
2017-08-01 00:04:08
247
Perry St & Bleecker St
40.735354
-74.004831
434
9 Ave & W 18 St
40.743174
-74.003664
27684
Subscriber
1999.0
1
1735596
1211
2017-07-31 23:59:37
2017-08-01 00:19:49
3303
Butler St & Court St
40.684989
-73.994403
3308
Kane St & Clinton St
40.686176
-73.996453
28402
Subscriber
1975.0
2
1735597
547
2017-07-31 23:59:48
2017-08-01 00:08:56
276
Duane St & Greenwich St
40.717488
-74.010455
127
Barrow St & Hudson St
40.731724
-74.006744
30150
Subscriber
1992.0
1
1735598
1816
2017-07-31 23:59:57
2017-08-01 00:30:13
3091
Frost St & Meeker St
40.717640
-73.948820
258
DeKalb Ave & Vanderbilt Ave
40.689407
-73.968855
19643
Customer
NaN
0
1735599 rows × 15 columns
In [6]:
print(df.isnull().sum().sum())
print(pd.isnull(df).sum() > 0)
df = df[~df.isin(df[df.isnull().any(axis=1)])].dropna().reset_index(drop=True)
df
228596
tripduration False
starttime False
stoptime False
start station id False
start station name False
start station latitude False
start station longitude False
end station id False
end station name False
end station latitude False
end station longitude False
bikeid False
usertype False
birth year True
gender False
dtype: bool
Out[6]:
tripduration
starttime
stoptime
start station id
start station name
start station latitude
start station longitude
end station id
end station name
end station latitude
end station longitude
bikeid
usertype
birth year
gender
0
364.0
2017-07-01 00:00:00
2017-07-01 00:06:05
539.0
Metropolitan Ave & Bedford Ave
40.715348
-73.960241
3107.0
Bedford Ave & Nassau Ave
40.723117
-73.952123
14744.0
Subscriber
1986.0
1.0
1
2142.0
2017-07-01 00:00:03
2017-07-01 00:35:46
293.0
Lafayette St & E 8 St
40.730207
-73.991026
3425.0
2 Ave & E 104 St
40.789210
-73.943708
19587.0
Subscriber
1981.0
1.0
2
328.0
2017-07-01 00:00:08
2017-07-01 00:05:37
3242.0
Schermerhorn St & Court St
40.691029
-73.991834
3397.0
Court St & Nelson St
40.676395
-73.998699
27937.0
Subscriber
1984.0
2.0
3
2530.0
2017-07-01 00:00:11
2017-07-01 00:42:22
2002.0
Wythe Ave & Metropolitan Ave
40.716887
-73.963198
398.0
Atlantic Ave & Furman St
40.691652
-73.999979
26066.0
Subscriber
1985.0
1.0
4
2534.0
2017-07-01 00:00:15
2017-07-01 00:42:29
2002.0
Wythe Ave & Metropolitan Ave
40.716887
-73.963198
398.0
Atlantic Ave & Furman St
40.691652
-73.999979
29408.0
Subscriber
1982.0
2.0
5
545.0
2017-07-01 00:00:18
2017-07-01 00:09:24
361.0
Allen St & Hester St
40.716059
-73.991908
502.0
Henry St & Grand St
40.714215
-73.981346
21203.0
Subscriber
1954.0
2.0
6
1496.0
2017-07-01 00:00:18
2017-07-01 00:25:15
3233.0
E 48 St & 5 Ave
40.757246
-73.978059
546.0
E 30 St & Park Ave S
40.744449
-73.983035
15933.0
Customer
1971.0
1.0
7
1495.0
2017-07-01 00:00:19
2017-07-01 00:25:15
3233.0
E 48 St & 5 Ave
40.757246
-73.978059
546.0
E 30 St & Park Ave S
40.744449
-73.983035
18515.0
Customer
1964.0
2.0
8
593.0
2017-07-01 00:00:27
2017-07-01 00:10:20
3314.0
W 95 St & Broadway
40.793770
-73.971888
3363.0
E 102 St & Park Ave
40.790483
-73.950331
29135.0
Subscriber
1982.0
1.0
9
1950.0
2017-07-01 00:00:30
2017-07-01 00:33:00
3085.0
Roebling St & N 4 St
40.714690
-73.957390
468.0
Broadway & W 56 St
40.765265
-73.981923
17365.0
Subscriber
1983.0
1.0
10
736.0
2017-07-01 00:01:05
2017-07-01 00:13:22
382.0
University Pl & E 14 St
40.734927
-73.992005
526.0
E 33 St & 5 Ave
40.747659
-73.984907
20592.0
Subscriber
1966.0
1.0
11
588.0
2017-07-01 00:01:10
2017-07-01 00:10:59
305.0
E 58 St & 3 Ave
40.760958
-73.967245
519.0
Pershing Square North
40.751873
-73.977706
26555.0
Subscriber
1975.0
2.0
12
799.0
2017-07-01 00:01:19
2017-07-01 00:14:38
384.0
Fulton St & Washington Ave
40.683048
-73.964915
3329.0
Degraw St & Smith St
40.682915
-73.993182
16184.0
Subscriber
1960.0
1.0
13
404.0
2017-07-01 00:01:51
2017-07-01 00:08:35
502.0
Henry St & Grand St
40.714215
-73.981346
2009.0
Catherine St & Monroe St
40.711174
-73.996826
25127.0
Subscriber
1977.0
2.0
14
1045.0
2017-07-01 00:01:59
2017-07-01 00:19:25
491.0
E 24 St & Park Ave S
40.740964
-73.986022
500.0
Broadway & W 51 St
40.762288
-73.983362
21443.0
Subscriber
1972.0
1.0
15
589.0
2017-07-01 00:02:05
2017-07-01 00:11:55
252.0
MacDougal St & Washington Sq
40.732264
-73.998522
523.0
W 38 St & 8 Ave
40.754666
-73.991382
19047.0
Subscriber
1984.0
1.0
16
491.0
2017-07-01 00:02:06
2017-07-01 00:10:17
462.0
W 22 St & 10 Ave
40.746920
-74.004519
482.0
W 15 St & 7 Ave
40.739355
-73.999318
26666.0
Subscriber
1968.0
1.0
17
1718.0
2017-07-01 00:02:07
2017-07-01 00:30:46
450.0
W 49 St & 8 Ave
40.762272
-73.987882
473.0
Rivington St & Chrystie St
40.721101
-73.991925
25845.0
Subscriber
1988.0
1.0
18
773.0
2017-07-01 00:02:08
2017-07-01 00:15:01
441.0
E 52 St & 2 Ave
40.756014
-73.967416
3338.0
2 Ave & E 99 St
40.786259
-73.945526
27895.0
Subscriber
1988.0
2.0
19
859.0
2017-07-01 00:02:08
2017-07-01 00:16:27
3110.0
Meserole Ave & Manhattan Ave
40.727086
-73.952991
3081.0
Graham Ave & Grand St
40.711863
-73.944024
29350.0
Subscriber
1990.0
1.0
20
173.0
2017-07-01 00:02:10
2017-07-01 00:05:03
3083.0
Bushwick Ave & Powers St
40.712477
-73.941000
3074.0
Montrose Ave & Bushwick Ave
40.707678
-73.940162
25113.0
Subscriber
1981.0
1.0
21
1294.0
2017-07-01 00:02:13
2017-07-01 00:23:47
477.0
W 41 St & 8 Ave
40.756405
-73.990026
3147.0
E 85 St & 3 Ave
40.778012
-73.954071
14838.0
Subscriber
1966.0
1.0
22
876.0
2017-07-01 00:02:17
2017-07-01 00:16:54
435.0
W 21 St & 6 Ave
40.741740
-73.994156
266.0
Avenue D & E 8 St
40.723684
-73.975748
21645.0
Subscriber
1992.0
1.0
23
1327.0
2017-07-01 00:02:20
2017-07-01 00:24:28
402.0
Broadway & E 22 St
40.740343
-73.989551
514.0
12 Ave & W 40 St
40.760875
-74.002777
28025.0
Subscriber
1995.0
2.0
24
704.0
2017-07-01 00:02:33
2017-07-01 00:14:18
3255.0
8 Ave & W 31 St
40.750585
-73.994685
450.0
W 49 St & 8 Ave
40.762272
-73.987882
15139.0
Subscriber
1973.0
1.0
25
1449.0
2017-07-01 00:02:42
2017-07-01 00:26:52
401.0
Allen St & Rivington St
40.720196
-73.989978
446.0
W 24 St & 7 Ave
40.744876
-73.995299
28973.0
Subscriber
1988.0
1.0
26
2391.0
2017-07-01 00:02:44
2017-07-01 00:42:35
2008.0
Little West St & 1 Pl
40.705693
-74.016777
511.0
E 14 St & Avenue B
40.729387
-73.977724
27374.0
Subscriber
1952.0
1.0
27
983.0
2017-07-01 00:02:52
2017-07-01 00:19:16
501.0
FDR Drive & E 35 St
40.744219
-73.971212
3146.0
E 81 St & 3 Ave
40.775730
-73.956753
20559.0
Subscriber
1990.0
1.0
28
1323.0
2017-07-01 00:02:52
2017-07-01 00:24:55
355.0
Bayard St & Baxter St
40.716021
-73.999744
3431.0
E 35 St & 3 Ave
40.746524
-73.977885
26055.0
Subscriber
1973.0
2.0
29
417.0
2017-07-01 00:02:57
2017-07-01 00:09:54
3047.0
Halsey St & Tompkins Ave
40.682369
-73.944118
3056.0
Kosciuszko St & Nostrand Ave
40.690725
-73.951335
20333.0
Subscriber
1984.0
1.0
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
1506973
421.0
2017-07-31 23:56:18
2017-08-01 00:03:19
387.0
Centre St & Chambers St
40.712733
-74.004607
2008.0
Little West St & 1 Pl
40.705693
-74.016777
16776.0
Subscriber
1982.0
1.0
1506974
550.0
2017-07-31 23:56:31
2017-08-01 00:05:42
434.0
9 Ave & W 18 St
40.743174
-74.003664
435.0
W 21 St & 6 Ave
40.741740
-73.994156
30220.0
Subscriber
1985.0
2.0
1506975
254.0
2017-07-31 23:56:38
2017-08-01 00:00:52
3167.0
Amsterdam Ave & W 73 St
40.779668
-73.980930
3171.0
Amsterdam Ave & W 82 St
40.785247
-73.976673
28279.0
Subscriber
1969.0
2.0
1506976
405.0
2017-07-31 23:56:56
2017-08-01 00:03:41
297.0
E 15 St & 3 Ave
40.734232
-73.986923
345.0
W 13 St & 6 Ave
40.736494
-73.997044
29735.0
Subscriber
1986.0
1.0
1506977
398.0
2017-07-31 23:57:00
2017-08-01 00:03:38
3088.0
Union Ave & Jackson St
40.716075
-73.952029
3083.0
Bushwick Ave & Powers St
40.712477
-73.941000
18161.0
Subscriber
1988.0
2.0
1506978
425.0
2017-07-31 23:57:12
2017-08-01 00:04:17
285.0
Broadway & E 14 St
40.734546
-73.990741
432.0
E 7 St & Avenue A
40.726218
-73.983799
15434.0
Subscriber
1960.0
0.0
1506979
524.0
2017-07-31 23:57:14
2017-08-01 00:05:59
462.0
W 22 St & 10 Ave
40.746920
-74.004519
402.0
Broadway & E 22 St
40.740343
-73.989551
19699.0
Subscriber
1980.0
1.0
1506980
1185.0
2017-07-31 23:57:17
2017-08-01 00:17:02
526.0
E 33 St & 5 Ave
40.747659
-73.984907
434.0
9 Ave & W 18 St
40.743174
-74.003664
18723.0
Customer
1988.0
1.0
1506981
436.0
2017-07-31 23:57:21
2017-08-01 00:04:37
306.0
Cliff St & Fulton St
40.708235
-74.005301
360.0
William St & Pine St
40.707179
-74.008873
20164.0
Subscriber
1986.0
1.0
1506982
412.0
2017-07-31 23:57:31
2017-08-01 00:04:24
285.0
Broadway & E 14 St
40.734546
-73.990741
432.0
E 7 St & Avenue A
40.726218
-73.983799
19574.0
Subscriber
1959.0
1.0
1506983
592.0
2017-07-31 23:57:42
2017-08-01 00:07:34
241.0
DeKalb Ave & S Portland Ave
40.689810
-73.974931
3049.0
Cambridge Pl & Gates Ave
40.684880
-73.963040
16333.0
Subscriber
1989.0
1.0
1506984
330.0
2017-07-31 23:57:47
2017-08-01 00:03:18
3085.0
Roebling St & N 4 St
40.714690
-73.957390
3086.0
Graham Ave & Conselyea St
40.715143
-73.944507
29409.0
Subscriber
1982.0
1.0
1506985
1506.0
2017-07-31 23:58:28
2017-08-01 00:23:34
528.0
2 Ave & E 31 St
40.742909
-73.977061
3145.0
E 84 St & Park Ave
40.778627
-73.957721
20944.0
Subscriber
1967.0
2.0
1506986
1176.0
2017-07-31 23:58:40
2017-08-01 00:18:16
503.0
E 20 St & Park Ave
40.738274
-73.987520
2006.0
Central Park S & 6 Ave
40.765909
-73.976342
29009.0
Customer
1979.0
1.0
1506987
774.0
2017-07-31 23:58:42
2017-08-01 00:11:37
164.0
E 47 St & 2 Ave
40.753231
-73.970325
492.0
W 33 St & 7 Ave
40.750200
-73.990931
17972.0
Subscriber
1964.0
1.0
1506988
1060.0
2017-07-31 23:58:45
2017-08-01 00:16:25
428.0
E 3 St & 1 Ave
40.724677
-73.987834
128.0
MacDougal St & Prince St
40.727103
-74.002971
27508.0
Subscriber
1968.0
2.0
1506989
364.0
2017-07-31 23:58:47
2017-08-01 00:04:51
383.0
Greenwich Ave & Charles St
40.735238
-74.000271
509.0
9 Ave & W 22 St
40.745497
-74.001971
27677.0
Subscriber
1966.0
1.0
1506990
1343.0
2017-07-31 23:58:50
2017-08-01 00:21:14
161.0
LaGuardia Pl & W 3 St
40.729170
-73.998102
406.0
Hicks St & Montague St
40.695128
-73.995951
20912.0
Subscriber
1985.0
1.0
1506991
936.0
2017-07-31 23:58:54
2017-08-01 00:14:30
442.0
W 27 St & 7 Ave
40.746647
-73.993915
477.0
W 41 St & 8 Ave
40.756405
-73.990026
19612.0
Subscriber
1995.0
1.0
1506992
277.0
2017-07-31 23:59:10
2017-08-01 00:03:47
495.0
W 47 St & 10 Ave
40.762699
-73.993012
423.0
W 54 St & 9 Ave
40.765849
-73.986905
30198.0
Subscriber
1988.0
1.0
1506993
2294.0
2017-07-31 23:59:09
2017-08-01 00:37:24
232.0
Cadman Plaza E & Tillary St
40.695977
-73.990149
3342.0
Pioneer St & Richards St
40.677775
-74.009461
18769.0
Subscriber
1999.0
1.0
1506994
1916.0
2017-07-31 23:59:11
2017-08-01 00:31:07
440.0
E 45 St & 3 Ave
40.752554
-73.972826
3338.0
2 Ave & E 99 St
40.786259
-73.945526
25644.0
Subscriber
1992.0
1.0
1506995
2244.0
2017-07-31 23:59:10
2017-08-01 00:36:35
3165.0
Central Park West & W 72 St
40.775794
-73.976206
3163.0
Central Park West & W 68 St
40.773407
-73.977825
30201.0
Subscriber
1950.0
1.0
1506996
428.0
2017-07-31 23:59:11
2017-08-01 00:06:20
477.0
W 41 St & 8 Ave
40.756405
-73.990026
513.0
W 56 St & 10 Ave
40.768254
-73.988639
30127.0
Subscriber
1980.0
1.0
1506997
215.0
2017-07-31 23:59:15
2017-08-01 00:02:51
3164.0
Columbus Ave & W 72 St
40.777057
-73.978985
3170.0
W 84 St & Columbus Ave
40.785000
-73.972834
18057.0
Subscriber
1970.0
1.0
1506998
3058.0
2017-07-31 23:59:31
2017-08-01 00:50:30
3418.0
Plaza St West & Flatbush Ave
40.675021
-73.971115
293.0
Lafayette St & E 8 St
40.730207
-73.991026
30161.0
Subscriber
1963.0
1.0
1506999
560.0
2017-07-31 23:59:33
2017-08-01 00:08:53
3260.0
Mercer St & Bleecker St
40.727064
-73.996621
236.0
St Marks Pl & 2 Ave
40.728419
-73.987140
16126.0
Subscriber
1990.0
2.0
1507000
272.0
2017-07-31 23:59:35
2017-08-01 00:04:08
247.0
Perry St & Bleecker St
40.735354
-74.004831
434.0
9 Ave & W 18 St
40.743174
-74.003664
27684.0
Subscriber
1999.0
1.0
1507001
1211.0
2017-07-31 23:59:37
2017-08-01 00:19:49
3303.0
Butler St & Court St
40.684989
-73.994403
3308.0
Kane St & Clinton St
40.686176
-73.996453
28402.0
Subscriber
1975.0
2.0
1507002
547.0
2017-07-31 23:59:48
2017-08-01 00:08:56
276.0
Duane St & Greenwich St
40.717488
-74.010455
127.0
Barrow St & Hudson St
40.731724
-74.006744
30150.0
Subscriber
1992.0
1.0
1507003 rows × 15 columns
In [7]:
try:
station = pd.read_csv('station.csv')
except:
station = pd.DataFrame(df[['start station id', 'start station name', 'start station latitude', 'start station longitude']])
station.columns = ['id', 'name', 'latitude', 'longitude']
tmp = pd.DataFrame(df[['end station id', 'end station name', 'end station latitude', 'end station longitude']])
tmp.columns = ['id', 'name', 'latitude', 'longitude']
station = pd.concat([station, tmp])
station = station.sort_values('id').drop_duplicates().reset_index(drop=True)
station.to_csv('station.csv', index=False)
station
Out[7]:
id
name
latitude
longitude
in
out
freq
0
72.0
W 52 St & 11 Ave
40.767272
-73.993929
3352
3381
6733
1
79.0
Franklin St & W Broadway
40.719116
-74.006667
2608
2561
5169
2
82.0
St James Pl & Pearl St
40.711174
-74.000165
974
1050
2024
3
83.0
Atlantic Ave & Fort Greene Pl
40.683826
-73.976323
1640
1399
3039
4
116.0
W 17 St & 8 Ave
40.741776
-74.001497
3812
3732
7544
5
119.0
Park Ave & St Edwards St
40.696089
-73.978034
292
244
536
6
120.0
Lexington Ave & Classon Ave
40.686768
-73.959282
875
948
1823
7
127.0
Barrow St & Hudson St
40.731724
-74.006744
5690
5594
11284
8
128.0
MacDougal St & Prince St
40.727103
-74.002971
6262
6317
12579
9
143.0
Clinton St & Joralemon St
40.692395
-73.993379
1993
2005
3998
10
144.0
Nassau St & Navy St
40.698399
-73.980689
759
646
1405
11
146.0
Hudson St & Reade St
40.716250
-74.009106
2375
2529
4904
12
150.0
E 2 St & Avenue C
40.720874
-73.980858
2715
2688
5403
13
151.0
Cleveland Pl & Spring St
40.722104
-73.997249
7029
7099
14128
14
152.0
Warren St & Church St
40.714740
-74.009106
2381
2340
4721
15
153.0
E 40 St & 5 Ave
40.752062
-73.981632
4174
4341
8515
16
157.0
Henry St & Atlantic Ave
40.690893
-73.996123
1798
1786
3584
17
161.0
LaGuardia Pl & W 3 St
40.729170
-73.998102
4797
4730
9527
18
164.0
E 47 St & 2 Ave
40.753231
-73.970325
2684
2717
5401
19
167.0
E 39 St & 3 Ave
40.748901
-73.976049
4263
4285
8548
20
168.0
W 18 St & 6 Ave
40.739713
-73.994564
7412
7252
14664
21
173.0
Broadway & W 49 St
40.760683
-73.984527
4326
4590
8916
22
174.0
E 25 St & 1 Ave
40.738177
-73.977387
2976
3150
6126
23
195.0
Liberty St & Broadway
40.709056
-74.010434
3502
3453
6955
24
212.0
W 16 St & The High Line
40.743349
-74.006818
4390
4398
8788
25
216.0
Columbia Heights & Cranberry St
40.700379
-73.995481
526
643
1169
26
217.0
Old Fulton St
40.702772
-73.993836
3024
2220
5244
27
223.0
W 13 St & 7 Ave
40.737815
-73.999947
3245
3195
6440
28
228.0
E 48 St & 3 Ave
40.754601
-73.971879
2384
2386
4770
29
229.0
Great Jones St
40.727434
-73.993790
6941
6753
13694
...
...
...
...
...
...
...
...
603
3436.0
Greenwich St & Hubert St
40.721319
-74.010065
3414
3397
6811
604
3437.0
Riverside Dr & W 91 St
40.793135
-73.977004
153
174
327
605
3438.0
E 76 St & 3 Ave
40.772249
-73.958421
1125
1133
2258
606
3440.0
Fulton St & Adams St
40.692418
-73.989495
2094
2115
4209
607
3441.0
10 Hudson Yards
40.752957
-74.002640
401
406
807
608
3443.0
W 52 St & 6 Ave
40.761330
-73.979820
5818
5519
11337
609
3445.0
Riverside Dr & W 89 St
40.791812
-73.978602
1000
1042
2042
610
3447.0
E 71 St & 1 Ave
40.767034
-73.956227
1793
1803
3596
611
3449.0
Eckford St & Engert Ave
40.721463
-73.948009
572
553
1125
612
3452.0
Bayard St & Leonard St
40.719156
-73.948854
581
565
1146
613
3453.0
Devoe St & Lorimer St
40.713352
-73.949103
1458
1427
2885
614
3454.0
Leonard St & Maujer St
40.710369
-73.947060
1076
1080
2156
615
3455.0
Schermerhorn St & 3 Ave
40.686808
-73.980362
730
686
1416
616
3456.0
Jackson St & Leonard St
40.716380
-73.948213
354
351
705
617
3457.0
E 58 St & Madison Ave
40.763026
-73.972095
1152
1099
2251
618
3458.0
W 55 St & 6 Ave
40.763094
-73.978350
3542
3573
7115
619
3459.0
E 53 St & 3 Ave
40.757632
-73.969306
2441
2427
4868
620
3461.0
Murray St & Greenwich St
40.714852
-74.011223
2885
2826
5711
621
3462.0
E 44 St & 2 Ave
40.751184
-73.971387
1979
2006
3985
622
3463.0
E 16 St & Irving Pl
40.735367
-73.987974
3202
3130
6332
623
3464.0
W 37 St & Broadway
40.752271
-73.987706
1900
1870
3770
624
3466.0
W 45 St & 6 Ave
40.756687
-73.982577
2659
2667
5326
625
3468.0
NYCBS Depot - STY - Garage 4
40.730380
-73.974750
49
31
80
626
3469.0
India St & West St
40.731814
-73.959950
581
550
1131
627
3470.0
Gowanus Tech Station
40.669802
-73.994905
2
2
4
628
3472.0
W 15 St & 10 Ave
40.742754
-74.007474
2848
2770
5618
629
3474.0
6 Ave & Spring St
40.725256
-74.004121
1665
1637
3302
630
3476.0
Norman Ave & Leonard St
40.725770
-73.950740
771
738
1509
631
3477.0
39 St & 2 Ave - Citi Bike HQ at Industry City
40.655400
-74.010628
63
52
115
632
3478.0
2 Ave & 36 St - Citi Bike HQ at Industry City
40.657089
-74.008702
258
266
524
633 rows × 7 columns
In [8]:
try:
path = read_csv('path.csv')
except:
path = df.drop(['start station name', 'start station latitude', 'start station longitude', 'end station name', 'end station latitude', 'end station longitude'], axis=1)
path.to_csv('path.csv', index=False)
path
Out[8]:
tripduration
starttime
stoptime
start station id
end station id
bikeid
usertype
birth year
gender
0
364.0
2017-07-01 00:00:00
2017-07-01 00:06:05
539.0
3107.0
14744.0
Subscriber
1986.0
1.0
1
2142.0
2017-07-01 00:00:03
2017-07-01 00:35:46
293.0
3425.0
19587.0
Subscriber
1981.0
1.0
2
328.0
2017-07-01 00:00:08
2017-07-01 00:05:37
3242.0
3397.0
27937.0
Subscriber
1984.0
2.0
3
2530.0
2017-07-01 00:00:11
2017-07-01 00:42:22
2002.0
398.0
26066.0
Subscriber
1985.0
1.0
4
2534.0
2017-07-01 00:00:15
2017-07-01 00:42:29
2002.0
398.0
29408.0
Subscriber
1982.0
2.0
5
545.0
2017-07-01 00:00:18
2017-07-01 00:09:24
361.0
502.0
21203.0
Subscriber
1954.0
2.0
6
1496.0
2017-07-01 00:00:18
2017-07-01 00:25:15
3233.0
546.0
15933.0
Customer
1971.0
1.0
7
1495.0
2017-07-01 00:00:19
2017-07-01 00:25:15
3233.0
546.0
18515.0
Customer
1964.0
2.0
8
593.0
2017-07-01 00:00:27
2017-07-01 00:10:20
3314.0
3363.0
29135.0
Subscriber
1982.0
1.0
9
1950.0
2017-07-01 00:00:30
2017-07-01 00:33:00
3085.0
468.0
17365.0
Subscriber
1983.0
1.0
10
736.0
2017-07-01 00:01:05
2017-07-01 00:13:22
382.0
526.0
20592.0
Subscriber
1966.0
1.0
11
588.0
2017-07-01 00:01:10
2017-07-01 00:10:59
305.0
519.0
26555.0
Subscriber
1975.0
2.0
12
799.0
2017-07-01 00:01:19
2017-07-01 00:14:38
384.0
3329.0
16184.0
Subscriber
1960.0
1.0
13
404.0
2017-07-01 00:01:51
2017-07-01 00:08:35
502.0
2009.0
25127.0
Subscriber
1977.0
2.0
14
1045.0
2017-07-01 00:01:59
2017-07-01 00:19:25
491.0
500.0
21443.0
Subscriber
1972.0
1.0
15
589.0
2017-07-01 00:02:05
2017-07-01 00:11:55
252.0
523.0
19047.0
Subscriber
1984.0
1.0
16
491.0
2017-07-01 00:02:06
2017-07-01 00:10:17
462.0
482.0
26666.0
Subscriber
1968.0
1.0
17
1718.0
2017-07-01 00:02:07
2017-07-01 00:30:46
450.0
473.0
25845.0
Subscriber
1988.0
1.0
18
773.0
2017-07-01 00:02:08
2017-07-01 00:15:01
441.0
3338.0
27895.0
Subscriber
1988.0
2.0
19
859.0
2017-07-01 00:02:08
2017-07-01 00:16:27
3110.0
3081.0
29350.0
Subscriber
1990.0
1.0
20
173.0
2017-07-01 00:02:10
2017-07-01 00:05:03
3083.0
3074.0
25113.0
Subscriber
1981.0
1.0
21
1294.0
2017-07-01 00:02:13
2017-07-01 00:23:47
477.0
3147.0
14838.0
Subscriber
1966.0
1.0
22
876.0
2017-07-01 00:02:17
2017-07-01 00:16:54
435.0
266.0
21645.0
Subscriber
1992.0
1.0
23
1327.0
2017-07-01 00:02:20
2017-07-01 00:24:28
402.0
514.0
28025.0
Subscriber
1995.0
2.0
24
704.0
2017-07-01 00:02:33
2017-07-01 00:14:18
3255.0
450.0
15139.0
Subscriber
1973.0
1.0
25
1449.0
2017-07-01 00:02:42
2017-07-01 00:26:52
401.0
446.0
28973.0
Subscriber
1988.0
1.0
26
2391.0
2017-07-01 00:02:44
2017-07-01 00:42:35
2008.0
511.0
27374.0
Subscriber
1952.0
1.0
27
983.0
2017-07-01 00:02:52
2017-07-01 00:19:16
501.0
3146.0
20559.0
Subscriber
1990.0
1.0
28
1323.0
2017-07-01 00:02:52
2017-07-01 00:24:55
355.0
3431.0
26055.0
Subscriber
1973.0
2.0
29
417.0
2017-07-01 00:02:57
2017-07-01 00:09:54
3047.0
3056.0
20333.0
Subscriber
1984.0
1.0
...
...
...
...
...
...
...
...
...
...
1506973
421.0
2017-07-31 23:56:18
2017-08-01 00:03:19
387.0
2008.0
16776.0
Subscriber
1982.0
1.0
1506974
550.0
2017-07-31 23:56:31
2017-08-01 00:05:42
434.0
435.0
30220.0
Subscriber
1985.0
2.0
1506975
254.0
2017-07-31 23:56:38
2017-08-01 00:00:52
3167.0
3171.0
28279.0
Subscriber
1969.0
2.0
1506976
405.0
2017-07-31 23:56:56
2017-08-01 00:03:41
297.0
345.0
29735.0
Subscriber
1986.0
1.0
1506977
398.0
2017-07-31 23:57:00
2017-08-01 00:03:38
3088.0
3083.0
18161.0
Subscriber
1988.0
2.0
1506978
425.0
2017-07-31 23:57:12
2017-08-01 00:04:17
285.0
432.0
15434.0
Subscriber
1960.0
0.0
1506979
524.0
2017-07-31 23:57:14
2017-08-01 00:05:59
462.0
402.0
19699.0
Subscriber
1980.0
1.0
1506980
1185.0
2017-07-31 23:57:17
2017-08-01 00:17:02
526.0
434.0
18723.0
Customer
1988.0
1.0
1506981
436.0
2017-07-31 23:57:21
2017-08-01 00:04:37
306.0
360.0
20164.0
Subscriber
1986.0
1.0
1506982
412.0
2017-07-31 23:57:31
2017-08-01 00:04:24
285.0
432.0
19574.0
Subscriber
1959.0
1.0
1506983
592.0
2017-07-31 23:57:42
2017-08-01 00:07:34
241.0
3049.0
16333.0
Subscriber
1989.0
1.0
1506984
330.0
2017-07-31 23:57:47
2017-08-01 00:03:18
3085.0
3086.0
29409.0
Subscriber
1982.0
1.0
1506985
1506.0
2017-07-31 23:58:28
2017-08-01 00:23:34
528.0
3145.0
20944.0
Subscriber
1967.0
2.0
1506986
1176.0
2017-07-31 23:58:40
2017-08-01 00:18:16
503.0
2006.0
29009.0
Customer
1979.0
1.0
1506987
774.0
2017-07-31 23:58:42
2017-08-01 00:11:37
164.0
492.0
17972.0
Subscriber
1964.0
1.0
1506988
1060.0
2017-07-31 23:58:45
2017-08-01 00:16:25
428.0
128.0
27508.0
Subscriber
1968.0
2.0
1506989
364.0
2017-07-31 23:58:47
2017-08-01 00:04:51
383.0
509.0
27677.0
Subscriber
1966.0
1.0
1506990
1343.0
2017-07-31 23:58:50
2017-08-01 00:21:14
161.0
406.0
20912.0
Subscriber
1985.0
1.0
1506991
936.0
2017-07-31 23:58:54
2017-08-01 00:14:30
442.0
477.0
19612.0
Subscriber
1995.0
1.0
1506992
277.0
2017-07-31 23:59:10
2017-08-01 00:03:47
495.0
423.0
30198.0
Subscriber
1988.0
1.0
1506993
2294.0
2017-07-31 23:59:09
2017-08-01 00:37:24
232.0
3342.0
18769.0
Subscriber
1999.0
1.0
1506994
1916.0
2017-07-31 23:59:11
2017-08-01 00:31:07
440.0
3338.0
25644.0
Subscriber
1992.0
1.0
1506995
2244.0
2017-07-31 23:59:10
2017-08-01 00:36:35
3165.0
3163.0
30201.0
Subscriber
1950.0
1.0
1506996
428.0
2017-07-31 23:59:11
2017-08-01 00:06:20
477.0
513.0
30127.0
Subscriber
1980.0
1.0
1506997
215.0
2017-07-31 23:59:15
2017-08-01 00:02:51
3164.0
3170.0
18057.0
Subscriber
1970.0
1.0
1506998
3058.0
2017-07-31 23:59:31
2017-08-01 00:50:30
3418.0
293.0
30161.0
Subscriber
1963.0
1.0
1506999
560.0
2017-07-31 23:59:33
2017-08-01 00:08:53
3260.0
236.0
16126.0
Subscriber
1990.0
2.0
1507000
272.0
2017-07-31 23:59:35
2017-08-01 00:04:08
247.0
434.0
27684.0
Subscriber
1999.0
1.0
1507001
1211.0
2017-07-31 23:59:37
2017-08-01 00:19:49
3303.0
3308.0
28402.0
Subscriber
1975.0
2.0
1507002
547.0
2017-07-31 23:59:48
2017-08-01 00:08:56
276.0
127.0
30150.0
Subscriber
1992.0
1.0
1507003 rows × 9 columns
In [9]:
import bisect
try:
out_flow = pd.read_csv('out_flow.csv')
in_flow = pd.read_csv('in_flow.csv')
except:
begin = datetime.datetime(2017, 7, 1, 0, 0, 0)
end = datetime.datetime(2017, 7, 31, 23, 30, 0)
date_list = [ end - datetime.timedelta(seconds=x*60*30) for x in range(0, 1488)][::-1]
#print(date_list)
tmp = pd.DataFrame(np.zeros((len(station['id']), len(date_list))), columns=date_list)
in_flow = pd.DataFrame({'id': list(station['id'])}).join(tmp)
out_flow = pd.DataFrame({'id': list(station['id'])}).join(tmp)
#in_flow
get_idx = {}
for idx, row in station.iterrows():
get_idx[row.iloc[0]] = idx
for idx, row in path.iterrows():
date = datetime.datetime.strptime(row.iloc[1], "%Y-%m-%d %H:%M:%S")
out_num = bisect.bisect_right(date_list, date)
out_flow.iloc[get_idx[row.iloc[3]], out_num] += 1
date = datetime.datetime.strptime(row.iloc[2], "%Y-%m-%d %H:%M:%S")
in_num = bisect.bisect_right(date_list, date)
in_flow.iloc[get_idx[row.iloc[4]], in_num] += 1
out_flow.to_csv('out_flow.csv', index=False)
in_flow.to_csv('in_flow.csv', index=False)
print('in-flow')
print(in_flow)
print('out-flow')
print(out_flow)
in-flow
id 2017-07-01 00:00:00 2017-07-01 00:30:00 2017-07-01 01:00:00 \
0 72.0 1.0 0.0 2.0
1 79.0 1.0 0.0 0.0
2 82.0 0.0 0.0 0.0
3 83.0 0.0 0.0 0.0
4 116.0 0.0 0.0 0.0
5 119.0 0.0 0.0 0.0
6 120.0 0.0 0.0 1.0
7 127.0 0.0 2.0 2.0
8 128.0 1.0 1.0 1.0
9 143.0 0.0 0.0 0.0
10 144.0 0.0 0.0 0.0
11 146.0 0.0 0.0 0.0
12 150.0 0.0 2.0 0.0
13 151.0 0.0 0.0 1.0
14 152.0 0.0 1.0 1.0
15 153.0 0.0 0.0 0.0
16 157.0 0.0 0.0 0.0
17 161.0 0.0 6.0 0.0
18 164.0 1.0 0.0 0.0
19 167.0 0.0 0.0 0.0
20 168.0 1.0 0.0 0.0
21 173.0 1.0 0.0 0.0
22 174.0 0.0 0.0 3.0
23 195.0 1.0 1.0 2.0
24 212.0 1.0 0.0 0.0
25 216.0 0.0 0.0 0.0
26 217.0 0.0 1.0 0.0
27 223.0 1.0 0.0 0.0
28 228.0 0.0 0.0 0.0
29 229.0 0.0 0.0 1.0
.. ... ... ... ...
603 3436.0 0.0 0.0 0.0
604 3437.0 0.0 0.0 0.0
605 3438.0 0.0 0.0 0.0
606 3440.0 0.0 0.0 1.0
607 3441.0 0.0 0.0 0.0
608 3443.0 0.0 0.0 0.0
609 3445.0 0.0 0.0 0.0
610 3447.0 0.0 1.0 0.0
611 3449.0 0.0 0.0 0.0
612 3452.0 0.0 0.0 0.0
613 3453.0 0.0 2.0 0.0
614 3454.0 0.0 0.0 2.0
615 3455.0 0.0 0.0 0.0
616 3456.0 0.0 0.0 0.0
617 3457.0 0.0 1.0 0.0
618 3458.0 0.0 1.0 2.0
619 3459.0 0.0 0.0 0.0
620 3461.0 0.0 0.0 0.0
621 3462.0 0.0 0.0 0.0
622 3463.0 0.0 2.0 0.0
623 3464.0 0.0 0.0 0.0
624 3466.0 0.0 0.0 0.0
625 3468.0 0.0 0.0 0.0
626 3469.0 0.0 0.0 0.0
627 3470.0 0.0 0.0 0.0
628 3472.0 0.0 0.0 1.0
629 3474.0 0.0 0.0 0.0
630 3476.0 0.0 0.0 0.0
631 3477.0 0.0 0.0 0.0
632 3478.0 0.0 0.0 0.0
2017-07-01 01:30:00 2017-07-01 02:00:00 2017-07-01 02:30:00 \
0 0.0 0.0 0.0
1 1.0 0.0 1.0
2 0.0 0.0 0.0
3 0.0 0.0 0.0
4 2.0 0.0 0.0
5 0.0 0.0 0.0
6 0.0 0.0 0.0
7 0.0 1.0 0.0
8 0.0 2.0 0.0
9 0.0 1.0 0.0
10 0.0 0.0 0.0
11 0.0 0.0 0.0
12 3.0 0.0 1.0
13 1.0 0.0 0.0
14 0.0 0.0 0.0
15 0.0 0.0 0.0
16 0.0 1.0 0.0
17 1.0 1.0 2.0
18 0.0 0.0 0.0
19 0.0 0.0 0.0
20 0.0 0.0 1.0
21 0.0 0.0 0.0
22 0.0 1.0 1.0
23 0.0 0.0 0.0
24 0.0 0.0 0.0
25 0.0 0.0 0.0
26 0.0 0.0 0.0
27 1.0 0.0 0.0
28 1.0 0.0 0.0
29 1.0 0.0 2.0
.. ... ... ...
603 0.0 0.0 0.0
604 0.0 0.0 0.0
605 0.0 1.0 1.0
606 1.0 0.0 0.0
607 0.0 0.0 0.0
608 0.0 0.0 0.0
609 0.0 0.0 0.0
610 0.0 0.0 2.0
611 1.0 0.0 0.0
612 0.0 0.0 0.0
613 0.0 0.0 0.0
614 1.0 0.0 0.0
615 0.0 0.0 0.0
616 0.0 0.0 0.0
617 0.0 0.0 0.0
618 0.0 0.0 0.0
619 0.0 0.0 0.0
620 0.0 0.0 0.0
621 1.0 0.0 0.0
622 1.0 0.0 0.0
623 0.0 0.0 0.0
624 0.0 0.0 0.0
625 0.0 0.0 0.0
626 0.0 0.0 0.0
627 0.0 0.0 0.0
628 0.0 0.0 0.0
629 0.0 0.0 0.0
630 0.0 0.0 0.0
631 0.0 0.0 0.0
632 0.0 0.0 0.0
2017-07-01 03:00:00 2017-07-01 03:30:00 2017-07-01 04:00:00 \
0 0.0 0.0 0.0
1 0.0 0.0 0.0
2 0.0 0.0 0.0
3 0.0 0.0 0.0
4 0.0 0.0 0.0
5 0.0 0.0 0.0
6 1.0 0.0 0.0
7 1.0 0.0 0.0
8 0.0 0.0 1.0
9 0.0 0.0 0.0
10 0.0 0.0 0.0
11 0.0 0.0 0.0
12 0.0 1.0 0.0
13 1.0 0.0 0.0
14 0.0 0.0 1.0
15 0.0 0.0 0.0
16 0.0 0.0 0.0
17 0.0 0.0 0.0
18 0.0 0.0 0.0
19 0.0 0.0 0.0
20 0.0 0.0 0.0
21 0.0 0.0 0.0
22 0.0 0.0 0.0
23 1.0 0.0 0.0
24 0.0 0.0 0.0
25 0.0 0.0 0.0
26 0.0 0.0 0.0
27 0.0 0.0 0.0
28 0.0 0.0 0.0
29 2.0 0.0 0.0
.. ... ... ...
603 0.0 0.0 0.0
604 0.0 0.0 0.0
605 0.0 0.0 0.0
606 0.0 0.0 0.0
607 0.0 0.0 0.0
608 0.0 0.0 0.0
609 0.0 0.0 0.0
610 0.0 0.0 0.0
611 0.0 0.0 0.0
612 0.0 0.0 0.0
613 0.0 0.0 0.0
614 0.0 1.0 0.0
615 0.0 0.0 0.0
616 0.0 0.0 0.0
617 0.0 0.0 1.0
618 0.0 0.0 0.0
619 0.0 0.0 0.0
620 0.0 0.0 0.0
621 0.0 0.0 0.0
622 0.0 0.0 0.0
623 0.0 0.0 0.0
624 0.0 0.0 0.0
625 0.0 0.0 0.0
626 0.0 0.0 0.0
627 0.0 0.0 0.0
628 0.0 0.0 0.0
629 0.0 0.0 0.0
630 0.0 0.0 0.0
631 0.0 0.0 0.0
632 0.0 0.0 0.0
... 2017-07-31 19:00:00 2017-07-31 19:30:00 \
0 ... 5.0 7.0
1 ... 2.0 6.0
2 ... 1.0 3.0
3 ... 1.0 3.0
4 ... 2.0 5.0
5 ... 0.0 1.0
6 ... 3.0 6.0
7 ... 19.0 15.0
8 ... 12.0 15.0
9 ... 9.0 1.0
10 ... 1.0 1.0
11 ... 4.0 7.0
12 ... 2.0 4.0
13 ... 13.0 14.0
14 ... 3.0 6.0
15 ... 6.0 3.0
16 ... 1.0 6.0
17 ... 6.0 7.0
18 ... 1.0 4.0
19 ... 4.0 4.0
20 ... 13.0 11.0
21 ... 2.0 4.0
22 ... 6.0 8.0
23 ... 2.0 3.0
24 ... 6.0 6.0
25 ... 4.0 0.0
26 ... 5.0 5.0
27 ... 9.0 4.0
28 ... 2.0 0.0
29 ... 24.0 13.0
.. ... ... ...
603 ... 8.0 5.0
604 ... 1.0 1.0
605 ... 2.0 1.0
606 ... 1.0 4.0
607 ... 2.0 0.0
608 ... 1.0 2.0
609 ... 0.0 0.0
610 ... 6.0 1.0
611 ... 2.0 1.0
612 ... 0.0 0.0
613 ... 4.0 1.0
614 ... 3.0 2.0
615 ... 2.0 1.0
616 ... 0.0 1.0
617 ... 0.0 0.0
618 ... 6.0 1.0
619 ... 2.0 2.0
620 ... 6.0 10.0
621 ... 2.0 0.0
622 ... 5.0 4.0
623 ... 0.0 0.0
624 ... 1.0 2.0
625 ... 1.0 0.0
626 ... 3.0 1.0
627 ... 0.0 0.0
628 ... 8.0 3.0
629 ... 2.0 1.0
630 ... 3.0 4.0
631 ... 2.0 0.0
632 ... 1.0 0.0
2017-07-31 20:00:00 2017-07-31 20:30:00 2017-07-31 21:00:00 \
0 9.0 6.0 2.0
1 1.0 0.0 4.0
2 1.0 4.0 3.0
3 0.0 1.0 1.0
4 7.0 6.0 2.0
5 1.0 0.0 0.0
6 0.0 0.0 0.0
7 5.0 6.0 5.0
8 11.0 10.0 3.0
9 5.0 3.0 2.0
10 0.0 0.0 0.0
11 4.0 4.0 0.0
12 3.0 3.0 1.0
13 12.0 11.0 6.0
14 5.0 4.0 1.0
15 1.0 3.0 1.0
16 4.0 0.0 0.0
17 9.0 6.0 6.0
18 6.0 1.0 1.0
19 5.0 4.0 1.0
20 7.0 6.0 1.0
21 1.0 2.0 1.0
22 6.0 3.0 1.0
23 3.0 5.0 2.0
24 2.0 5.0 6.0
25 1.0 1.0 3.0
26 2.0 1.0 1.0
27 2.0 6.0 7.0
28 2.0 4.0 0.0
29 14.0 6.0 2.0
.. ... ... ...
603 3.0 3.0 2.0
604 2.0 5.0 2.0
605 0.0 2.0 5.0
606 0.0 2.0 3.0
607 0.0 0.0 0.0
608 0.0 1.0 1.0
609 0.0 0.0 0.0
610 3.0 1.0 2.0
611 2.0 0.0 0.0
612 0.0 0.0 0.0
613 4.0 2.0 0.0
614 6.0 3.0 4.0
615 0.0 1.0 1.0
616 2.0 1.0 1.0
617 0.0 0.0 0.0
618 2.0 3.0 1.0
619 0.0 0.0 3.0
620 4.0 8.0 2.0
621 1.0 3.0 2.0
622 7.0 3.0 2.0
623 0.0 0.0 0.0
624 0.0 1.0 1.0
625 0.0 0.0 0.0
626 0.0 0.0 3.0
627 0.0 0.0 0.0
628 2.0 1.0 1.0
629 3.0 3.0 2.0
630 2.0 1.0 2.0
631 0.0 0.0 0.0
632 1.0 1.0 0.0
2017-07-31 21:30:00 2017-07-31 22:00:00 2017-07-31 22:30:00 \
0 0.0 1.0 3.0
1 1.0 3.0 1.0
2 0.0 2.0 1.0
3 0.0 0.0 1.0
4 1.0 1.0 2.0
5 0.0 0.0 0.0
6 1.0 0.0 2.0
7 6.0 3.0 1.0
8 5.0 3.0 3.0
9 0.0 3.0 0.0
10 0.0 1.0 0.0
11 3.0 1.0 0.0
12 1.0 1.0 0.0
13 5.0 2.0 7.0
14 1.0 2.0 2.0
15 0.0 2.0 0.0
16 1.0 1.0 2.0
17 3.0 3.0 0.0
18 0.0 1.0 2.0
19 2.0 1.0 3.0
20 3.0 1.0 2.0
21 4.0 2.0 1.0
22 3.0 2.0 2.0
23 2.0 2.0 3.0
24 5.0 1.0 1.0
25 0.0 0.0 0.0
26 2.0 1.0 0.0
27 2.0 4.0 0.0
28 1.0 1.0 1.0
29 3.0 2.0 3.0
.. ... ... ...
603 3.0 4.0 0.0
604 0.0 2.0 0.0
605 2.0 0.0 0.0
606 2.0 0.0 0.0
607 0.0 0.0 0.0
608 1.0 1.0 0.0
609 0.0 0.0 0.0
610 1.0 1.0 0.0
611 0.0 1.0 2.0
612 0.0 0.0 0.0
613 0.0 0.0 0.0
614 1.0 1.0 1.0
615 0.0 3.0 0.0
616 1.0 2.0 0.0
617 0.0 0.0 0.0
618 0.0 0.0 0.0
619 3.0 1.0 0.0
620 1.0 1.0 0.0
621 1.0 2.0 1.0
622 2.0 2.0 4.0
623 0.0 0.0 0.0
624 0.0 0.0 0.0
625 0.0 0.0 0.0
626 1.0 0.0 0.0
627 0.0 0.0 0.0
628 0.0 1.0 0.0
629 0.0 2.0 1.0
630 2.0 1.0 1.0
631 0.0 0.0 0.0
632 0.0 0.0 0.0
2017-07-31 23:00:00 2017-07-31 23:30:00
0 0.0 2.0
1 0.0 1.0
2 1.0 0.0
3 0.0 1.0
4 1.0 3.0
5 0.0 1.0
6 1.0 1.0
7 3.0 4.0
8 2.0 1.0
9 1.0 0.0
10 0.0 0.0
11 0.0 1.0
12 1.0 1.0
13 2.0 3.0
14 2.0 0.0
15 0.0 0.0
16 0.0 0.0
17 1.0 2.0
18 0.0 0.0
19 2.0 1.0
20 0.0 0.0
21 1.0 0.0
22 7.0 2.0
23 1.0 1.0
24 1.0 1.0
25 0.0 0.0
26 0.0 0.0
27 1.0 2.0
28 0.0 0.0
29 3.0 2.0
.. ... ...
603 0.0 0.0
604 1.0 1.0
605 0.0 1.0
606 2.0 0.0
607 0.0 0.0
608 0.0 1.0
609 0.0 0.0
610 1.0 1.0
611 0.0 0.0
612 0.0 0.0
613 1.0 1.0
614 1.0 0.0
615 0.0 0.0
616 0.0 0.0
617 0.0 0.0
618 1.0 0.0
619 3.0 0.0
620 0.0 0.0
621 1.0 0.0
622 2.0 1.0
623 0.0 0.0
624 1.0 0.0
625 0.0 0.0
626 3.0 1.0
627 0.0 0.0
628 0.0 0.0
629 1.0 1.0
630 2.0 0.0
631 0.0 0.0
632 0.0 0.0
[633 rows x 1489 columns]
out-flow
id 2017-07-01 00:00:00 2017-07-01 00:30:00 2017-07-01 01:00:00 \
0 72.0 0.0 0.0 1.0
1 79.0 0.0 0.0 1.0
2 82.0 0.0 0.0 0.0
3 83.0 0.0 1.0 0.0
4 116.0 1.0 1.0 0.0
5 119.0 0.0 0.0 0.0
6 120.0 0.0 1.0 2.0
7 127.0 1.0 1.0 0.0
8 128.0 3.0 3.0 2.0
9 143.0 0.0 0.0 0.0
10 144.0 0.0 0.0 0.0
11 146.0 0.0 1.0 0.0
12 150.0 1.0 0.0 4.0
13 151.0 1.0 0.0 0.0
14 152.0 0.0 0.0 0.0
15 153.0 1.0 0.0 0.0
16 157.0 0.0 1.0 0.0
17 161.0 2.0 4.0 3.0
18 164.0 0.0 3.0 1.0
19 167.0 0.0 1.0 0.0
20 168.0 1.0 0.0 0.0
21 173.0 0.0 2.0 0.0
22 174.0 0.0 0.0 0.0
23 195.0 0.0 0.0 2.0
24 212.0 3.0 0.0 0.0
25 216.0 0.0 0.0 0.0
26 217.0 2.0 1.0 0.0
27 223.0 2.0 1.0 0.0
28 228.0 0.0 0.0 1.0
29 229.0 1.0 0.0 1.0
.. ... ... ... ...
603 3436.0 0.0 0.0 0.0
604 3437.0 0.0 0.0 0.0
605 3438.0 0.0 0.0 0.0
606 3440.0 1.0 0.0 0.0
607 3441.0 0.0 0.0 0.0
608 3443.0 0.0 0.0 0.0
609 3445.0 0.0 0.0 0.0
610 3447.0 1.0 0.0 0.0
611 3449.0 1.0 0.0 0.0
612 3452.0 0.0 0.0 0.0
613 3453.0 0.0 1.0 2.0
614 3454.0 0.0 1.0 0.0
615 3455.0 0.0 1.0 0.0
616 3456.0 0.0 0.0 0.0
617 3457.0 0.0 0.0 1.0
618 3458.0 0.0 3.0 0.0
619 3459.0 1.0 0.0 0.0
620 3461.0 0.0 0.0 0.0
621 3462.0 1.0 0.0 0.0
622 3463.0 0.0 0.0 1.0
623 3464.0 0.0 0.0 0.0
624 3466.0 1.0 0.0 1.0
625 3468.0 0.0 0.0 0.0
626 3469.0 0.0 0.0 0.0
627 3470.0 0.0 0.0 0.0
628 3472.0 0.0 2.0 3.0
629 3474.0 0.0 0.0 0.0
630 3476.0 0.0 0.0 0.0
631 3477.0 0.0 0.0 0.0
632 3478.0 0.0 0.0 0.0
2017-07-01 01:30:00 2017-07-01 02:00:00 2017-07-01 02:30:00 \
0 1.0 0.0 0.0
1 2.0 2.0 0.0
2 0.0 0.0 0.0
3 0.0 0.0 0.0
4 0.0 1.0 0.0
5 0.0 0.0 0.0
6 0.0 1.0 0.0
7 0.0 0.0 0.0
8 1.0 0.0 0.0
9 0.0 0.0 0.0
10 0.0 0.0 0.0
11 0.0 0.0 0.0
12 0.0 1.0 0.0
13 1.0 1.0 0.0
14 0.0 0.0 1.0
15 0.0 0.0 0.0
16 1.0 0.0 0.0
17 0.0 0.0 1.0
18 0.0 0.0 0.0
19 0.0 0.0 0.0
20 2.0 0.0 0.0
21 0.0 0.0 0.0
22 0.0 0.0 0.0
23 0.0 0.0 0.0
24 0.0 0.0 0.0
25 0.0 0.0 0.0
26 0.0 0.0 0.0
27 0.0 0.0 1.0
28 0.0 1.0 0.0
29 0.0 0.0 0.0
.. ... ... ...
603 0.0 0.0 0.0
604 0.0 0.0 0.0
605 0.0 0.0 0.0
606 0.0 0.0 0.0
607 0.0 0.0 0.0
608 0.0 0.0 0.0
609 0.0 0.0 0.0
610 0.0 0.0 1.0
611 0.0 0.0 0.0
612 0.0 0.0 0.0
613 0.0 0.0 0.0
614 2.0 0.0 0.0
615 0.0 0.0 0.0
616 0.0 0.0 0.0
617 0.0 0.0 0.0
618 0.0 0.0 0.0
619 0.0 0.0 0.0
620 0.0 0.0 0.0
621 0.0 0.0 1.0
622 0.0 0.0 0.0
623 0.0 0.0 0.0
624 0.0 0.0 0.0
625 0.0 0.0 0.0
626 0.0 0.0 0.0
627 0.0 0.0 0.0
628 2.0 0.0 0.0
629 0.0 0.0 0.0
630 0.0 0.0 0.0
631 0.0 0.0 0.0
632 0.0 0.0 0.0
2017-07-01 03:00:00 2017-07-01 03:30:00 2017-07-01 04:00:00 \
0 0.0 0.0 0.0
1 0.0 0.0 0.0
2 0.0 0.0 0.0
3 0.0 0.0 0.0
4 0.0 0.0 0.0
5 0.0 0.0 0.0
6 0.0 0.0 0.0
7 0.0 0.0 0.0
8 0.0 0.0 0.0
9 0.0 0.0 0.0
10 0.0 0.0 0.0
11 0.0 0.0 0.0
12 1.0 1.0 0.0
13 0.0 0.0 0.0
14 0.0 0.0 1.0
15 0.0 0.0 0.0
16 0.0 0.0 0.0
17 1.0 0.0 0.0
18 0.0 0.0 0.0
19 0.0 0.0 0.0
20 0.0 0.0 0.0
21 0.0 0.0 0.0
22 0.0 1.0 0.0
23 0.0 0.0 0.0
24 0.0 0.0 0.0
25 0.0 0.0 0.0
26 0.0 0.0 0.0
27 0.0 0.0 0.0
28 0.0 0.0 0.0
29 0.0 1.0 1.0
.. ... ... ...
603 0.0 0.0 0.0
604 0.0 0.0 0.0
605 0.0 0.0 0.0
606 1.0 0.0 0.0
607 0.0 0.0 0.0
608 0.0 0.0 0.0
609 0.0 0.0 0.0
610 0.0 0.0 0.0
611 0.0 0.0 0.0
612 0.0 0.0 0.0
613 0.0 0.0 0.0
614 0.0 0.0 1.0
615 0.0 0.0 0.0
616 0.0 0.0 0.0
617 0.0 0.0 0.0
618 0.0 0.0 0.0
619 0.0 0.0 0.0
620 0.0 0.0 0.0
621 0.0 0.0 0.0
622 0.0 0.0 0.0
623 0.0 0.0 0.0
624 0.0 0.0 1.0
625 0.0 0.0 0.0
626 0.0 0.0 0.0
627 0.0 0.0 0.0
628 0.0 1.0 0.0
629 0.0 0.0 1.0
630 0.0 0.0 0.0
631 0.0 0.0 0.0
632 0.0 0.0 0.0
... 2017-07-31 19:00:00 2017-07-31 19:30:00 \
0 ... 12.0 7.0
1 ... 2.0 6.0
2 ... 4.0 5.0
3 ... 5.0 1.0
4 ... 2.0 1.0
5 ... 1.0 0.0
6 ... 2.0 1.0
7 ... 13.0 14.0
8 ... 16.0 13.0
9 ... 8.0 0.0
10 ... 1.0 1.0
11 ... 3.0 5.0
12 ... 0.0 2.0
13 ... 16.0 14.0
14 ... 12.0 8.0
15 ... 4.0 4.0
16 ... 1.0 2.0
17 ... 9.0 5.0
18 ... 3.0 3.0
19 ... 4.0 4.0
20 ... 13.0 10.0
21 ... 4.0 6.0
22 ... 2.0 10.0
23 ... 1.0 3.0
24 ... 8.0 7.0
25 ... 1.0 1.0
26 ... 3.0 1.0
27 ... 1.0 4.0
28 ... 3.0 1.0
29 ... 9.0 17.0
.. ... ... ...
603 ... 5.0 7.0
604 ... 2.0 1.0
605 ... 2.0 1.0
606 ... 4.0 2.0
607 ... 0.0 1.0
608 ... 0.0 3.0
609 ... 0.0 0.0
610 ... 9.0 2.0
611 ... 0.0 0.0
612 ... 0.0 0.0
613 ... 2.0 1.0
614 ... 1.0 2.0
615 ... 0.0 0.0
616 ... 1.0 0.0
617 ... 0.0 0.0
618 ... 6.0 1.0
619 ... 2.0 1.0
620 ... 7.0 5.0
621 ... 2.0 1.0
622 ... 9.0 2.0
623 ... 0.0 0.0
624 ... 0.0 4.0
625 ... 0.0 0.0
626 ... 2.0 4.0
627 ... 0.0 0.0
628 ... 3.0 4.0
629 ... 4.0 1.0
630 ... 4.0 1.0
631 ... 0.0 0.0
632 ... 5.0 0.0
2017-07-31 20:00:00 2017-07-31 20:30:00 2017-07-31 21:00:00 \
0 7.0 4.0 2.0
1 3.0 0.0 2.0
2 2.0 3.0 1.0
3 5.0 0.0 0.0
4 6.0 9.0 5.0
5 0.0 0.0 0.0
6 1.0 0.0 1.0
7 7.0 5.0 5.0
8 4.0 15.0 3.0
9 5.0 1.0 3.0
10 0.0 0.0 0.0
11 3.0 3.0 4.0
12 4.0 5.0 2.0
13 9.0 9.0 5.0
14 9.0 3.0 2.0
15 0.0 3.0 2.0
16 3.0 2.0 2.0
17 9.0 6.0 2.0
18 5.0 3.0 1.0
19 6.0 3.0 1.0
20 9.0 7.0 0.0
21 0.0 2.0 1.0
22 8.0 3.0 0.0
23 4.0 5.0 3.0
24 1.0 3.0 6.0
25 0.0 1.0 2.0
26 3.0 4.0 6.0
27 4.0 8.0 3.0
28 2.0 4.0 0.0
29 16.0 16.0 5.0
.. ... ... ...
603 7.0 2.0 2.0
604 2.0 1.0 0.0
605 4.0 1.0 4.0
606 1.0 1.0 3.0
607 0.0 0.0 0.0
608 0.0 0.0 2.0
609 0.0 0.0 0.0
610 1.0 3.0 1.0
611 0.0 1.0 0.0
612 0.0 0.0 0.0
613 2.0 3.0 0.0
614 2.0 0.0 1.0
615 0.0 0.0 0.0
616 2.0 3.0 0.0
617 0.0 0.0 0.0
618 2.0 2.0 2.0
619 0.0 0.0 2.0
620 6.0 3.0 6.0
621 1.0 2.0 3.0
622 5.0 0.0 3.0
623 0.0 0.0 0.0
624 4.0 0.0 2.0
625 1.0 0.0 0.0
626 1.0 0.0 2.0
627 0.0 0.0 0.0
628 8.0 1.0 0.0
629 1.0 2.0 5.0
630 2.0 0.0 0.0
631 0.0 0.0 0.0
632 1.0 0.0 0.0
2017-07-31 21:30:00 2017-07-31 22:00:00 2017-07-31 22:30:00 \
0 2.0 3.0 1.0
1 3.0 0.0 3.0
2 1.0 1.0 0.0
3 2.0 0.0 0.0
4 0.0 1.0 1.0
5 0.0 0.0 0.0
6 0.0 1.0 1.0
7 2.0 7.0 3.0
8 3.0 4.0 4.0
9 1.0 1.0 0.0
10 0.0 0.0 0.0
11 3.0 1.0 4.0
12 0.0 1.0 0.0
13 8.0 0.0 6.0
14 7.0 0.0 1.0
15 1.0 1.0 1.0
16 5.0 4.0 1.0
17 7.0 9.0 3.0
18 0.0 1.0 0.0
19 1.0 4.0 1.0
20 1.0 3.0 0.0
21 7.0 3.0 2.0
22 0.0 2.0 3.0
23 0.0 2.0 2.0
24 3.0 1.0 3.0
25 1.0 1.0 1.0
26 0.0 1.0 1.0
27 2.0 3.0 1.0
28 1.0 1.0 1.0
29 5.0 2.0 2.0
.. ... ... ...
603 3.0 3.0 2.0
604 2.0 0.0 0.0
605 1.0 1.0 2.0
606 2.0 1.0 3.0
607 0.0 1.0 0.0
608 0.0 2.0 1.0
609 0.0 0.0 0.0
610 2.0 0.0 1.0
611 0.0 0.0 1.0
612 0.0 0.0 0.0
613 1.0 1.0 0.0
614 2.0 1.0 1.0
615 0.0 0.0 0.0
616 0.0 0.0 0.0
617 0.0 0.0 0.0
618 0.0 0.0 0.0
619 2.0 2.0 0.0
620 1.0 0.0 1.0
621 1.0 2.0 0.0
622 2.0 2.0 0.0
623 0.0 0.0 0.0
624 1.0 0.0 0.0
625 0.0 0.0 0.0
626 0.0 0.0 2.0
627 0.0 0.0 0.0
628 0.0 0.0 2.0
629 0.0 0.0 1.0
630 0.0 0.0 0.0
631 0.0 0.0 0.0
632 0.0 0.0 1.0
2017-07-31 23:00:00 2017-07-31 23:30:00
0 1.0 1.0
1 0.0 1.0
2 1.0 0.0
3 3.0 0.0
4 3.0 0.0
5 0.0 0.0
6 2.0 0.0
7 3.0 1.0
8 5.0 0.0
9 2.0 0.0
10 0.0 1.0
11 7.0 1.0
12 1.0 0.0
13 2.0 1.0
14 1.0 1.0
15 0.0 0.0
16 1.0 0.0
17 3.0 3.0
18 0.0 1.0
19 4.0 0.0
20 0.0 1.0
21 0.0 2.0
22 0.0 0.0
23 2.0 2.0
24 1.0 0.0
25 0.0 0.0
26 1.0 1.0
27 2.0 3.0
28 0.0 0.0
29 2.0 0.0
.. ... ...
603 0.0 0.0
604 0.0 0.0
605 0.0 0.0
606 1.0 0.0
607 0.0 0.0
608 0.0 1.0
609 0.0 0.0
610 0.0 0.0
611 1.0 1.0
612 0.0 0.0
613 1.0 2.0
614 0.0 0.0
615 0.0 1.0
616 0.0 0.0
617 0.0 0.0
618 1.0 0.0
619 2.0 1.0
620 0.0 0.0
621 2.0 0.0
622 2.0 1.0
623 0.0 0.0
624 1.0 0.0
625 0.0 0.0
626 0.0 0.0
627 0.0 0.0
628 0.0 0.0
629 0.0 0.0
630 0.0 0.0
631 0.0 0.0
632 0.0 0.0
[633 rows x 1489 columns]
In [10]:
from pyproj import Geod
wgs84_geod = Geod(ellps='WGS84')
def Distance(lat1,lon1,lat2,lon2):
az12,az21,dist = wgs84_geod.inv(lon1,lat1,lon2,lat2)
return dist
try:
dist = pd.read_csv('dist.csv')
except:
dist = pd.DataFrame(columns=['begin', 'end', 'dist'])
for idx1, row1 in station.iterrows():
for idx2, row2 in station.iterrows():
dist = dist.append(pd.Series([row1.iloc[0], row2.iloc[0], Distance(row1.iloc[2], row1.iloc[3], row2.iloc[2], row2.iloc[3])], index=['begin','end', 'dist']), ignore_index=True)
#print(row1.iloc[0], row2.iloc[0], Distance(row1.iloc[2], row1.iloc[3], row2.iloc[2], row2.iloc[3]))
#print(dist)
dist.to_csv('dist.csv', index=False)
dist
Out[10]:
begin
end
dist
0
72.0
72.0
0.000000
1
72.0
79.0
5454.887660
2
72.0
82.0
6251.852068
3
72.0
83.0
9385.185658
4
72.0
116.0
2902.567147
5
72.0
119.0
8017.980523
6
72.0
120.0
9406.865190
7
72.0
127.0
4093.238958
8
72.0
128.0
4525.685810
9
72.0
143.0
8315.137982
10
72.0
144.0
7729.624333
11
72.0
146.0
5809.148046
12
72.0
150.0
5269.452069
13
72.0
151.0
5023.733805
14
72.0
152.0
5972.838300
15
72.0
153.0
1982.665966
16
72.0
157.0
8483.861600
17
72.0
161.0
4245.835806
18
72.0
164.0
2530.547873
19
72.0
167.0
2538.067353
20
72.0
168.0
3060.890092
21
72.0
173.0
1079.593568
22
72.0
174.0
3520.114705
23
72.0
195.0
6613.424594
24
72.0
212.0
2870.916185
25
72.0
216.0
7429.595912
26
72.0
217.0
7162.714520
27
72.0
223.0
3310.421420
28
72.0
228.0
2333.778707
29
72.0
229.0
4423.978882
...
...
...
...
400659
3478.0
3436.0
7133.574376
400660
3478.0
3437.0
15343.248062
400661
3478.0
3438.0
13475.585153
400662
3478.0
3440.0
4246.080050
400663
3478.0
3441.0
10658.325912
400664
3478.0
3443.0
11830.298091
400665
3478.0
3445.0
15175.385265
400666
3478.0
3447.0
12989.537331
400667
3478.0
3449.0
8799.005948
400668
3478.0
3452.0
8549.782723
400669
3478.0
3453.0
8026.181423
400670
3478.0
3454.0
7884.224721
400671
3478.0
3455.0
4078.431514
400672
3478.0
3456.0
8336.518248
400673
3478.0
3457.0
12164.058313
400674
3478.0
3458.0
12047.870104
400675
3478.0
3459.0
11650.978189
400676
3478.0
3461.0
6417.978497
400677
3478.0
3462.0
10914.589079
400678
3478.0
3463.0
8867.450597
400679
3478.0
3464.0
10717.721983
400680
3478.0
3466.0
11278.430101
400681
3478.0
3468.0
8629.975167
400682
3478.0
3469.0
9264.882495
400683
3478.0
3470.0
1831.487926
400684
3478.0
3472.0
9513.521058
400685
3478.0
3474.0
7579.706374
400686
3478.0
3476.0
9064.965484
400687
3478.0
3477.0
248.394878
400688
3478.0
3478.0
0.000000
400689 rows × 3 columns
In [11]:
sz = station.shape[0]
n = sz * (sz-1) / 2
ans = 0
for idx, row in dist.iterrows():
if row.iloc[0] == row.iloc[1]:
continue
ans += row.iloc[2] / n
print('average distance:', ans, 'm')
average distance: 10786.9349309 m
In [12]:
from collections import defaultdict
import datetime
weekday = dict()
weekday = defaultdict(lambda: 0, weekday)
weekend = dict()
weekend = defaultdict(lambda: 0, weekend)
for idx, row in path.iterrows():
if datetime.datetime.strptime(row.iloc[1], "%Y-%m-%d %H:%M:%S").isoweekday() > 5: # weekend
weekend[(row.iloc[3], row.iloc[4])] += 1
else:
weekday[(row.iloc[3], row.iloc[4])] += 1
top_weekday = sorted(weekday, key=weekday.get)[::-1][:3]
top_weekend = sorted(weekend, key=weekend.get)[::-1][:3]
print(top_weekday)
print(top_weekend)
[(432.0, 3263.0), (3263.0, 432.0), (519.0, 498.0)]
[(3254.0, 3182.0), (3182.0, 3182.0), (3182.0, 3254.0)]
In [13]:
from collections import defaultdict
import datetime
weekday_in = dict()
weekday_in = defaultdict(lambda: 0, weekday_in)
weekend_in = dict()
weekend_in = defaultdict(lambda: 0, weekend_in)
weekday_out = dict()
weekday_out = defaultdict(lambda: 0, weekday_out)
weekend_out = dict()
weekend_out = defaultdict(lambda: 0, weekend_out)
allDay_in = dict()
allDay_in = defaultdict(lambda: 0, allDay_in)
allDay_out = dict()
allDay_out = defaultdict(lambda: 0, allDay_out)
allDay = dict()
allDay = defaultdict(lambda: 0, allDay)
for idx, row in path.iterrows():
if 'freq' not in station.columns:
allDay_in[row.iloc[4]] += 1
allDay_out[row.iloc[3]] += 1
allDay[row.iloc[4]] += 1
allDay[row.iloc[3]] += 1
if datetime.datetime.strptime(row.iloc[1], "%Y-%m-%d %H:%M:%S").isoweekday() > 5: # weekend
weekend_out[row.iloc[3]] += 1
weekend_in[row.iloc[4]] += 1
else:
weekday_out[row.iloc[3]] += 1
weekday_in[row.iloc[4]] += 1
if 'freq' not in station.columns:
station['in'] = station['id'].map(allDay_in)
station['out'] = station['id'].map(allDay_out)
station['freq'] = station['id'].map(allDay)
station.to_csv('station.csv', index=False)
top_weekday_in = sorted(weekday_in, key=weekday_in.get)[::-1][:3]
top_weekday_out = sorted(weekday_out, key=weekday_out.get)[::-1][:3]
top_weekend_in = sorted(weekend_in, key=weekend_in.get)[::-1][:3]
top_weekend_out = sorted(weekend_out, key=weekend_out.get)[::-1][:3]
print('weekday in_flow:', top_weekday_in)
print('weekday out_flow:', top_weekday_in)
print('weekend in_flow:', top_weekend_in)
print('weekend out_flow:', top_weekend_in)
weekday in_flow: [519.0, 402.0, 426.0]
weekday out_flow: [519.0, 402.0, 426.0]
weekend in_flow: [426.0, 435.0, 497.0]
weekend out_flow: [426.0, 435.0, 497.0]
drow line chart
In [14]:
import matplotlib.pyplot as plt
most = station.nlargest(1, 'freq').index[0]
in_most = in_flow.iloc[most][1:]
out_most = out_flow.iloc[most][1:]
plt.figure()
axes = plt.gca()
axes.set_ylim([-10,130])
out_most.plot(figsize=(15, 5))
in_most.plot(figsize=(15, 5))
plt.show()
In [15]:
from sklearn.metrics import pairwise
pairwise.pairwise_distances(np.array([in_most.tolist(), out_most.tolist()]), metric='minkowski', n_jobs=4, p=2)[0][1]
Out[15]:
171.40303381212365
In [16]:
plt.figure()
axes = plt.gca()
axes.set_ylim([-50,120])
in_most.sub(in_most.mean()).plot(figsize=(15, 5))
out_most.sub(out_most.mean()).plot(figsize=(15, 5))
plt.show()
# print(in_most.mean())
# print(out_most.mean())
pairwise.pairwise_distances(np.array([in_most.sub(in_most.mean()).tolist(), out_most.sub(out_most.mean()).tolist()]), metric='minkowski', n_jobs=4, p=2)[0][1]
Out[16]:
171.28531933087413
In [17]:
plt.figure()
axes = plt.gca()
axes.set_ylim([-5,10])
in_most.sub(in_most.mean()).divide(in_most.std()).plot(figsize=(15, 5))
out_most.sub(out_most.mean()).divide(out_most.std()).plot(figsize=(15, 5))
plt.show()
#print(in_most.mean(), in_most.std())
#print(out_most.mean(), out_most.std())
pairwise.pairwise_distances(np.array([in_most.sub(in_most.mean()).divide(in_most.std()).tolist(), out_most.sub(out_most.mean()).divide(out_most.std()).tolist()]), metric='minkowski', n_jobs=4, p=2)[0][1]
Out[17]:
11.990369115699348
In [18]:
from sklearn import linear_model
import matplotlib.pyplot as plt
reg_in = linear_model.LinearRegression(normalize=True)
reg_out = linear_model.LinearRegression(normalize=True)
y = [i for i in range(len(in_most))]
in_X = [[i] for i in in_most.tolist()]
out_X = [[i] for i in out_most.tolist()]
reg_in.fit(X=in_X, y=y)
reg_out.fit(X=out_X, y=y)
in_df = pd.DataFrame(reg_in.predict(in_X)).T
in_df.columns = in_most.keys()
out_df = pd.DataFrame(reg_out.predict(out_X)).T
out_df.columns = out_most.keys()
in_s = in_df.iloc[0][:]
out_s = out_df.iloc[0][:]
plt.figure()
in_s.plot(figsize=(15, 5))
out_s.plot(figsize=(15, 5))
plt.show()
pairwise.pairwise_distances(np.array([in_s.tolist(), out_s.tolist()]), metric='minkowski', n_jobs=4, p=2)[0][1]
Out[18]:
530.12990552891745
In [19]:
from sklearn.neighbors.kde import KernelDensity
in_tmp = [[i, j] for i,j in enumerate(in_most)]
out_tmp = [[j, i] for i,j in enumerate(out_most)]
kde_in = KernelDensity(kernel='gaussian', bandwidth=30).fit(in_tmp)
kde_out = KernelDensity(kernel='cosine', bandwidth=30).fit(out_tmp)
in_arr = kde_in.score_samples(in_tmp)
out_arr = kde_out.score_samples(out_tmp)
in_ss = pd.Series(in_arr.tolist(), index=in_most.keys())
out_ss = pd.Series(out_arr.tolist(), index=in_most.keys())
#print(in_ss)
plt.figure()
# axes = plt.gca()
# axes.set_ylim([0,1])
in_ss.plot(figsize=(15, 5))
out_ss.plot(figsize=(15, 5))
plt.show()
根據上面的資料,我們可以發現尖峰時刻是有大週期的,而有趣的是,除了大週期之外,每個大週期內,也都有小週期,大週期與小週期的成因,也許是可以研究的方向。
Content source: calee0219/Course
Similar notebooks: