In [1]:
#!/usr/bin/env python3
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.preprocessing import Imputer
from sklearn.metrics import pairwise
from pyproj import Geod
/usr/local/lib/python3.5/dist-packages/pandas/core/computation/__init__.py:18: UserWarning: The installed version of numexpr 2.4.3 is not supported in pandas and will be not be used
The minimum supported version is 2.4.6
ver=ver, min_ver=_MIN_NUMEXPR_VERSION), UserWarning)
In [2]:
df = pd.read_csv('201707-citibike-tripdata.csv')
In [3]:
print('column numbers: ' + str(len(df.columns)))
print('row numbers: ' + str(len(df.index)))
column numbers: 15
row numbers: 1735599
In [4]:
df
Out[4]:
tripduration
starttime
stoptime
start station id
start station name
start station latitude
start station longitude
end station id
end station name
end station latitude
end station longitude
bikeid
usertype
birth year
gender
0
364
2017-07-01 00:00:00
2017-07-01 00:06:05
539
Metropolitan Ave & Bedford Ave
40.715348
-73.960241
3107
Bedford Ave & Nassau Ave
40.723117
-73.952123
14744
Subscriber
1986.0
1
1
2142
2017-07-01 00:00:03
2017-07-01 00:35:46
293
Lafayette St & E 8 St
40.730207
-73.991026
3425
2 Ave & E 104 St
40.789210
-73.943708
19587
Subscriber
1981.0
1
2
328
2017-07-01 00:00:08
2017-07-01 00:05:37
3242
Schermerhorn St & Court St
40.691029
-73.991834
3397
Court St & Nelson St
40.676395
-73.998699
27937
Subscriber
1984.0
2
3
2530
2017-07-01 00:00:11
2017-07-01 00:42:22
2002
Wythe Ave & Metropolitan Ave
40.716887
-73.963198
398
Atlantic Ave & Furman St
40.691652
-73.999979
26066
Subscriber
1985.0
1
4
2534
2017-07-01 00:00:15
2017-07-01 00:42:29
2002
Wythe Ave & Metropolitan Ave
40.716887
-73.963198
398
Atlantic Ave & Furman St
40.691652
-73.999979
29408
Subscriber
1982.0
2
5
545
2017-07-01 00:00:18
2017-07-01 00:09:24
361
Allen St & Hester St
40.716059
-73.991908
502
Henry St & Grand St
40.714215
-73.981346
21203
Subscriber
1954.0
2
6
1496
2017-07-01 00:00:18
2017-07-01 00:25:15
3233
E 48 St & 5 Ave
40.757246
-73.978059
546
E 30 St & Park Ave S
40.744449
-73.983035
15933
Customer
1971.0
1
7
1495
2017-07-01 00:00:19
2017-07-01 00:25:15
3233
E 48 St & 5 Ave
40.757246
-73.978059
546
E 30 St & Park Ave S
40.744449
-73.983035
18515
Customer
1964.0
2
8
593
2017-07-01 00:00:27
2017-07-01 00:10:20
3314
W 95 St & Broadway
40.793770
-73.971888
3363
E 102 St & Park Ave
40.790483
-73.950331
29135
Subscriber
1982.0
1
9
1950
2017-07-01 00:00:30
2017-07-01 00:33:00
3085
Roebling St & N 4 St
40.714690
-73.957390
468
Broadway & W 56 St
40.765265
-73.981923
17365
Subscriber
1983.0
1
10
736
2017-07-01 00:01:05
2017-07-01 00:13:22
382
University Pl & E 14 St
40.734927
-73.992005
526
E 33 St & 5 Ave
40.747659
-73.984907
20592
Subscriber
1966.0
1
11
588
2017-07-01 00:01:10
2017-07-01 00:10:59
305
E 58 St & 3 Ave
40.760958
-73.967245
519
Pershing Square North
40.751873
-73.977706
26555
Subscriber
1975.0
2
12
799
2017-07-01 00:01:19
2017-07-01 00:14:38
384
Fulton St & Washington Ave
40.683048
-73.964915
3329
Degraw St & Smith St
40.682915
-73.993182
16184
Subscriber
1960.0
1
13
404
2017-07-01 00:01:51
2017-07-01 00:08:35
502
Henry St & Grand St
40.714215
-73.981346
2009
Catherine St & Monroe St
40.711174
-73.996826
25127
Subscriber
1977.0
2
14
1045
2017-07-01 00:01:59
2017-07-01 00:19:25
491
E 24 St & Park Ave S
40.740964
-73.986022
500
Broadway & W 51 St
40.762288
-73.983362
21443
Subscriber
1972.0
1
15
589
2017-07-01 00:02:05
2017-07-01 00:11:55
252
MacDougal St & Washington Sq
40.732264
-73.998522
523
W 38 St & 8 Ave
40.754666
-73.991382
19047
Subscriber
1984.0
1
16
491
2017-07-01 00:02:06
2017-07-01 00:10:17
462
W 22 St & 10 Ave
40.746920
-74.004519
482
W 15 St & 7 Ave
40.739355
-73.999318
26666
Subscriber
1968.0
1
17
1718
2017-07-01 00:02:07
2017-07-01 00:30:46
450
W 49 St & 8 Ave
40.762272
-73.987882
473
Rivington St & Chrystie St
40.721101
-73.991925
25845
Subscriber
1988.0
1
18
773
2017-07-01 00:02:08
2017-07-01 00:15:01
441
E 52 St & 2 Ave
40.756014
-73.967416
3338
2 Ave & E 99 St
40.786259
-73.945526
27895
Subscriber
1988.0
2
19
859
2017-07-01 00:02:08
2017-07-01 00:16:27
3110
Meserole Ave & Manhattan Ave
40.727086
-73.952991
3081
Graham Ave & Grand St
40.711863
-73.944024
29350
Subscriber
1990.0
1
20
173
2017-07-01 00:02:10
2017-07-01 00:05:03
3083
Bushwick Ave & Powers St
40.712477
-73.941000
3074
Montrose Ave & Bushwick Ave
40.707678
-73.940162
25113
Subscriber
1981.0
1
21
1294
2017-07-01 00:02:13
2017-07-01 00:23:47
477
W 41 St & 8 Ave
40.756405
-73.990026
3147
E 85 St & 3 Ave
40.778012
-73.954071
14838
Subscriber
1966.0
1
22
876
2017-07-01 00:02:17
2017-07-01 00:16:54
435
W 21 St & 6 Ave
40.741740
-73.994156
266
Avenue D & E 8 St
40.723684
-73.975748
21645
Subscriber
1992.0
1
23
1835
2017-07-01 00:02:19
2017-07-01 00:32:55
3416
7 Ave & Park Pl
40.677615
-73.973243
3055
Greene Ave & Nostrand Ave
40.688334
-73.950916
21249
Customer
NaN
0
24
1327
2017-07-01 00:02:20
2017-07-01 00:24:28
402
Broadway & E 22 St
40.740343
-73.989551
514
12 Ave & W 40 St
40.760875
-74.002777
28025
Subscriber
1995.0
2
25
704
2017-07-01 00:02:33
2017-07-01 00:14:18
3255
8 Ave & W 31 St
40.750585
-73.994685
450
W 49 St & 8 Ave
40.762272
-73.987882
15139
Subscriber
1973.0
1
26
1449
2017-07-01 00:02:42
2017-07-01 00:26:52
401
Allen St & Rivington St
40.720196
-73.989978
446
W 24 St & 7 Ave
40.744876
-73.995299
28973
Subscriber
1988.0
1
27
2391
2017-07-01 00:02:44
2017-07-01 00:42:35
2008
Little West St & 1 Pl
40.705693
-74.016777
511
E 14 St & Avenue B
40.729387
-73.977724
27374
Subscriber
1952.0
1
28
983
2017-07-01 00:02:52
2017-07-01 00:19:16
501
FDR Drive & E 35 St
40.744219
-73.971212
3146
E 81 St & 3 Ave
40.775730
-73.956753
20559
Subscriber
1990.0
1
29
1323
2017-07-01 00:02:52
2017-07-01 00:24:55
355
Bayard St & Baxter St
40.716021
-73.999744
3431
E 35 St & 3 Ave
40.746524
-73.977885
26055
Subscriber
1973.0
2
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
1735569
254
2017-07-31 23:56:38
2017-08-01 00:00:52
3167
Amsterdam Ave & W 73 St
40.779668
-73.980930
3171
Amsterdam Ave & W 82 St
40.785247
-73.976673
28279
Subscriber
1969.0
2
1735570
405
2017-07-31 23:56:56
2017-08-01 00:03:41
297
E 15 St & 3 Ave
40.734232
-73.986923
345
W 13 St & 6 Ave
40.736494
-73.997044
29735
Subscriber
1986.0
1
1735571
398
2017-07-31 23:57:00
2017-08-01 00:03:38
3088
Union Ave & Jackson St
40.716075
-73.952029
3083
Bushwick Ave & Powers St
40.712477
-73.941000
18161
Subscriber
1988.0
2
1735572
866
2017-07-31 23:57:06
2017-08-01 00:11:32
308
St James Pl & Oliver St
40.713079
-73.998512
195
Liberty St & Broadway
40.709056
-74.010434
21589
Customer
NaN
0
1735573
425
2017-07-31 23:57:12
2017-08-01 00:04:17
285
Broadway & E 14 St
40.734546
-73.990741
432
E 7 St & Avenue A
40.726218
-73.983799
15434
Subscriber
1960.0
0
1735574
524
2017-07-31 23:57:14
2017-08-01 00:05:59
462
W 22 St & 10 Ave
40.746920
-74.004519
402
Broadway & E 22 St
40.740343
-73.989551
19699
Subscriber
1980.0
1
1735575
1185
2017-07-31 23:57:17
2017-08-01 00:17:02
526
E 33 St & 5 Ave
40.747659
-73.984907
434
9 Ave & W 18 St
40.743174
-74.003664
18723
Customer
1988.0
1
1735576
436
2017-07-31 23:57:21
2017-08-01 00:04:37
306
Cliff St & Fulton St
40.708235
-74.005301
360
William St & Pine St
40.707179
-74.008873
20164
Subscriber
1986.0
1
1735577
412
2017-07-31 23:57:31
2017-08-01 00:04:24
285
Broadway & E 14 St
40.734546
-73.990741
432
E 7 St & Avenue A
40.726218
-73.983799
19574
Subscriber
1959.0
1
1735578
592
2017-07-31 23:57:42
2017-08-01 00:07:34
241
DeKalb Ave & S Portland Ave
40.689810
-73.974931
3049
Cambridge Pl & Gates Ave
40.684880
-73.963040
16333
Subscriber
1989.0
1
1735579
330
2017-07-31 23:57:47
2017-08-01 00:03:18
3085
Roebling St & N 4 St
40.714690
-73.957390
3086
Graham Ave & Conselyea St
40.715143
-73.944507
29409
Subscriber
1982.0
1
1735580
1506
2017-07-31 23:58:28
2017-08-01 00:23:34
528
2 Ave & E 31 St
40.742909
-73.977061
3145
E 84 St & Park Ave
40.778627
-73.957721
20944
Subscriber
1967.0
2
1735581
1176
2017-07-31 23:58:40
2017-08-01 00:18:16
503
E 20 St & Park Ave
40.738274
-73.987520
2006
Central Park S & 6 Ave
40.765909
-73.976342
29009
Customer
1979.0
1
1735582
774
2017-07-31 23:58:42
2017-08-01 00:11:37
164
E 47 St & 2 Ave
40.753231
-73.970325
492
W 33 St & 7 Ave
40.750200
-73.990931
17972
Subscriber
1964.0
1
1735583
1060
2017-07-31 23:58:45
2017-08-01 00:16:25
428
E 3 St & 1 Ave
40.724677
-73.987834
128
MacDougal St & Prince St
40.727103
-74.002971
27508
Subscriber
1968.0
2
1735584
364
2017-07-31 23:58:47
2017-08-01 00:04:51
383
Greenwich Ave & Charles St
40.735238
-74.000271
509
9 Ave & W 22 St
40.745497
-74.001971
27677
Subscriber
1966.0
1
1735585
1343
2017-07-31 23:58:50
2017-08-01 00:21:14
161
LaGuardia Pl & W 3 St
40.729170
-73.998102
406
Hicks St & Montague St
40.695128
-73.995951
20912
Subscriber
1985.0
1
1735586
936
2017-07-31 23:58:54
2017-08-01 00:14:30
442
W 27 St & 7 Ave
40.746647
-73.993915
477
W 41 St & 8 Ave
40.756405
-73.990026
19612
Subscriber
1995.0
1
1735587
277
2017-07-31 23:59:10
2017-08-01 00:03:47
495
W 47 St & 10 Ave
40.762699
-73.993012
423
W 54 St & 9 Ave
40.765849
-73.986905
30198
Subscriber
1988.0
1
1735588
2294
2017-07-31 23:59:09
2017-08-01 00:37:24
232
Cadman Plaza E & Tillary St
40.695977
-73.990149
3342
Pioneer St & Richards St
40.677775
-74.009461
18769
Subscriber
1999.0
1
1735589
1916
2017-07-31 23:59:11
2017-08-01 00:31:07
440
E 45 St & 3 Ave
40.752554
-73.972826
3338
2 Ave & E 99 St
40.786259
-73.945526
25644
Subscriber
1992.0
1
1735590
2244
2017-07-31 23:59:10
2017-08-01 00:36:35
3165
Central Park West & W 72 St
40.775794
-73.976206
3163
Central Park West & W 68 St
40.773407
-73.977825
30201
Subscriber
1950.0
1
1735591
428
2017-07-31 23:59:11
2017-08-01 00:06:20
477
W 41 St & 8 Ave
40.756405
-73.990026
513
W 56 St & 10 Ave
40.768254
-73.988639
30127
Subscriber
1980.0
1
1735592
215
2017-07-31 23:59:15
2017-08-01 00:02:51
3164
Columbus Ave & W 72 St
40.777057
-73.978985
3170
W 84 St & Columbus Ave
40.785000
-73.972834
18057
Subscriber
1970.0
1
1735593
3058
2017-07-31 23:59:31
2017-08-01 00:50:30
3418
Plaza St West & Flatbush Ave
40.675021
-73.971115
293
Lafayette St & E 8 St
40.730207
-73.991026
30161
Subscriber
1963.0
1
1735594
560
2017-07-31 23:59:33
2017-08-01 00:08:53
3260
Mercer St & Bleecker St
40.727064
-73.996621
236
St Marks Pl & 2 Ave
40.728419
-73.987140
16126
Subscriber
1990.0
2
1735595
272
2017-07-31 23:59:35
2017-08-01 00:04:08
247
Perry St & Bleecker St
40.735354
-74.004831
434
9 Ave & W 18 St
40.743174
-74.003664
27684
Subscriber
1999.0
1
1735596
1211
2017-07-31 23:59:37
2017-08-01 00:19:49
3303
Butler St & Court St
40.684989
-73.994403
3308
Kane St & Clinton St
40.686176
-73.996453
28402
Subscriber
1975.0
2
1735597
547
2017-07-31 23:59:48
2017-08-01 00:08:56
276
Duane St & Greenwich St
40.717488
-74.010455
127
Barrow St & Hudson St
40.731724
-74.006744
30150
Subscriber
1992.0
1
1735598
1816
2017-07-31 23:59:57
2017-08-01 00:30:13
3091
Frost St & Meeker St
40.717640
-73.948820
258
DeKalb Ave & Vanderbilt Ave
40.689407
-73.968855
19643
Customer
NaN
0
1735599 rows × 15 columns
In [5]:
print(df.isnull().sum().sum())
print(pd.isnull(df).sum() > 0)
birth_mean = df['birth year'].mean()
df = df.fillna(birth_mean)
228596
tripduration False
starttime False
stoptime False
start station id False
start station name False
start station latitude False
start station longitude False
end station id False
end station name False
end station latitude False
end station longitude False
bikeid False
usertype False
birth year True
gender False
dtype: bool
In [6]:
df = df.drop(df.index[df['starttime'] >= df['stoptime']])
df = df.reset_index(drop=True)
In [7]:
try:
station = pd.read_csv('station.csv')
except:
station = pd.DataFrame(df[['start station id', 'start station name', 'start station latitude', 'start station longitude']])
station.columns = ['id', 'name', 'latitude', 'longitude']
tmp = pd.DataFrame(df[['end station id', 'end station name', 'end station latitude', 'end station longitude']])
tmp.columns = ['id', 'name', 'latitude', 'longitude']
station = pd.concat([station, tmp])
station = station.sort_values('id').drop_duplicates().reset_index(drop=True)
station.to_csv('station.csv', index=False)
station
Out[7]:
id
name
latitude
longitude
0
72
W 52 St & 11 Ave
40.767272
-73.993929
1
79
Franklin St & W Broadway
40.719116
-74.006667
2
82
St James Pl & Pearl St
40.711174
-74.000165
3
83
Atlantic Ave & Fort Greene Pl
40.683826
-73.976323
4
116
W 17 St & 8 Ave
40.741776
-74.001497
5
119
Park Ave & St Edwards St
40.696089
-73.978034
6
120
Lexington Ave & Classon Ave
40.686768
-73.959282
7
127
Barrow St & Hudson St
40.731724
-74.006744
8
128
MacDougal St & Prince St
40.727103
-74.002971
9
143
Clinton St & Joralemon St
40.692395
-73.993379
10
144
Nassau St & Navy St
40.698399
-73.980689
11
146
Hudson St & Reade St
40.716250
-74.009106
12
150
E 2 St & Avenue C
40.720874
-73.980858
13
151
Cleveland Pl & Spring St
40.722104
-73.997249
14
152
Warren St & Church St
40.714740
-74.009106
15
153
E 40 St & 5 Ave
40.752062
-73.981632
16
157
Henry St & Atlantic Ave
40.690893
-73.996123
17
161
LaGuardia Pl & W 3 St
40.729170
-73.998102
18
164
E 47 St & 2 Ave
40.753231
-73.970325
19
167
E 39 St & 3 Ave
40.748901
-73.976049
20
168
W 18 St & 6 Ave
40.739713
-73.994564
21
173
Broadway & W 49 St
40.760683
-73.984527
22
174
E 25 St & 1 Ave
40.738177
-73.977387
23
195
Liberty St & Broadway
40.709056
-74.010434
24
212
W 16 St & The High Line
40.743349
-74.006818
25
216
Columbia Heights & Cranberry St
40.700379
-73.995481
26
217
Old Fulton St
40.702772
-73.993836
27
223
W 13 St & 7 Ave
40.737815
-73.999947
28
228
E 48 St & 3 Ave
40.754601
-73.971879
29
229
Great Jones St
40.727434
-73.993790
...
...
...
...
...
604
3436
Greenwich St & Hubert St
40.721319
-74.010065
605
3437
Riverside Dr & W 91 St
40.793135
-73.977004
606
3438
E 76 St & 3 Ave
40.772249
-73.958421
607
3440
Fulton St & Adams St
40.692418
-73.989495
608
3441
10 Hudson Yards
40.752957
-74.002640
609
3443
W 52 St & 6 Ave
40.761330
-73.979820
610
3445
Riverside Dr & W 89 St
40.791812
-73.978602
611
3447
E 71 St & 1 Ave
40.767034
-73.956227
612
3449
Eckford St & Engert Ave
40.721463
-73.948009
613
3452
Bayard St & Leonard St
40.719156
-73.948854
614
3453
Devoe St & Lorimer St
40.713352
-73.949103
615
3454
Leonard St & Maujer St
40.710369
-73.947060
616
3455
Schermerhorn St & 3 Ave
40.686808
-73.980362
617
3456
Jackson St & Leonard St
40.716380
-73.948213
618
3457
E 58 St & Madison Ave
40.763026
-73.972095
619
3458
W 55 St & 6 Ave
40.763094
-73.978350
620
3459
E 53 St & 3 Ave
40.757632
-73.969306
621
3461
Murray St & Greenwich St
40.714852
-74.011223
622
3462
E 44 St & 2 Ave
40.751184
-73.971387
623
3463
E 16 St & Irving Pl
40.735367
-73.987974
624
3464
W 37 St & Broadway
40.752271
-73.987706
625
3466
W 45 St & 6 Ave
40.756687
-73.982577
626
3468
NYCBS Depot - STY - Garage 4
40.730380
-73.974750
627
3469
India St & West St
40.731814
-73.959950
628
3470
Gowanus Tech Station
40.669802
-73.994905
629
3472
W 15 St & 10 Ave
40.742754
-74.007474
630
3474
6 Ave & Spring St
40.725256
-74.004121
631
3476
Norman Ave & Leonard St
40.725770
-73.950740
632
3477
39 St & 2 Ave - Citi Bike HQ at Industry City
40.655400
-74.010628
633
3478
2 Ave & 36 St - Citi Bike HQ at Industry City
40.657089
-74.008702
634 rows × 4 columns
In [8]:
import mpl_toolkits
from mpl_toolkits.basemap import Basemap
import matplotlib.pyplot as plt
import numpy as np
my_map = Basemap(projection='merc', lat_0=40.7, lon_0=-73.98,
resolution = 'h', area_thresh = 0.01,
llcrnrlon=-74.1, llcrnrlat=40.64,
urcrnrlon=-73.9, urcrnrlat=40.85)
lon = station['longitude'].tolist()
lat = station['latitude'].tolist()
labels = station['id'].tolist()
fig = plt.figure(frameon=False)
fig.set_size_inches(18,12)
x,y = my_map(lon, lat)
my_map.plot(x, y, 'bo', markersize=2)
my_map.drawcoastlines()
my_map.drawcountries()
my_map.fillcontinents(color='coral')
my_map.drawmapboundary()
my_map.drawmeridians(np.arange(0, 360, 30))
my_map.drawparallels(np.arange(-90, 90, 30))
for label, xpt, ypt in zip(labels, lon, lat):
plt.text(xpt, ypt, label)
plt.show()
/usr/local/lib/python3.5/dist-packages/mpl_toolkits/basemap/__init__.py:3260: MatplotlibDeprecationWarning: The ishold function was deprecated in version 2.0.
b = ax.ishold()
/usr/local/lib/python3.5/dist-packages/mpl_toolkits/basemap/__init__.py:3269: MatplotlibDeprecationWarning: axes.hold is deprecated.
See the API Changes document (http://matplotlib.org/api/api_changes.html)
for more details.
ax.hold(b)
/usr/local/lib/python3.5/dist-packages/mpl_toolkits/basemap/__init__.py:1767: MatplotlibDeprecationWarning: The get_axis_bgcolor function was deprecated in version 2.0. Use get_facecolor instead.
axisbgc = ax.get_axis_bgcolor()
/usr/local/lib/python3.5/dist-packages/mpl_toolkits/basemap/__init__.py:1623: MatplotlibDeprecationWarning: The get_axis_bgcolor function was deprecated in version 2.0. Use get_facecolor instead.
fill_color = ax.get_axis_bgcolor()
/usr/local/lib/python3.5/dist-packages/mpl_toolkits/basemap/__init__.py:1698: MatplotlibDeprecationWarning: The axesPatch function was deprecated in version 2.1. Use Axes.patch instead.
limb = ax.axesPatch
In [10]:
try:
path = read_csv('path.csv')
except:
path = df.drop(['start station name', 'start station latitude', 'start station longitude', 'end station name', 'end station latitude', 'end station longitude'], axis=1)
path.to_csv('path.csv', index=False)
path
Out[10]:
tripduration
starttime
stoptime
start station id
end station id
bikeid
usertype
birth year
gender
0
364
2017-07-01 00:00:00
2017-07-01 00:06:05
539
3107
14744
Subscriber
1986.000000
1
1
2142
2017-07-01 00:00:03
2017-07-01 00:35:46
293
3425
19587
Subscriber
1981.000000
1
2
328
2017-07-01 00:00:08
2017-07-01 00:05:37
3242
3397
27937
Subscriber
1984.000000
2
3
2530
2017-07-01 00:00:11
2017-07-01 00:42:22
2002
398
26066
Subscriber
1985.000000
1
4
2534
2017-07-01 00:00:15
2017-07-01 00:42:29
2002
398
29408
Subscriber
1982.000000
2
5
545
2017-07-01 00:00:18
2017-07-01 00:09:24
361
502
21203
Subscriber
1954.000000
2
6
1496
2017-07-01 00:00:18
2017-07-01 00:25:15
3233
546
15933
Customer
1971.000000
1
7
1495
2017-07-01 00:00:19
2017-07-01 00:25:15
3233
546
18515
Customer
1964.000000
2
8
593
2017-07-01 00:00:27
2017-07-01 00:10:20
3314
3363
29135
Subscriber
1982.000000
1
9
1950
2017-07-01 00:00:30
2017-07-01 00:33:00
3085
468
17365
Subscriber
1983.000000
1
10
736
2017-07-01 00:01:05
2017-07-01 00:13:22
382
526
20592
Subscriber
1966.000000
1
11
588
2017-07-01 00:01:10
2017-07-01 00:10:59
305
519
26555
Subscriber
1975.000000
2
12
799
2017-07-01 00:01:19
2017-07-01 00:14:38
384
3329
16184
Subscriber
1960.000000
1
13
404
2017-07-01 00:01:51
2017-07-01 00:08:35
502
2009
25127
Subscriber
1977.000000
2
14
1045
2017-07-01 00:01:59
2017-07-01 00:19:25
491
500
21443
Subscriber
1972.000000
1
15
589
2017-07-01 00:02:05
2017-07-01 00:11:55
252
523
19047
Subscriber
1984.000000
1
16
491
2017-07-01 00:02:06
2017-07-01 00:10:17
462
482
26666
Subscriber
1968.000000
1
17
1718
2017-07-01 00:02:07
2017-07-01 00:30:46
450
473
25845
Subscriber
1988.000000
1
18
773
2017-07-01 00:02:08
2017-07-01 00:15:01
441
3338
27895
Subscriber
1988.000000
2
19
859
2017-07-01 00:02:08
2017-07-01 00:16:27
3110
3081
29350
Subscriber
1990.000000
1
20
173
2017-07-01 00:02:10
2017-07-01 00:05:03
3083
3074
25113
Subscriber
1981.000000
1
21
1294
2017-07-01 00:02:13
2017-07-01 00:23:47
477
3147
14838
Subscriber
1966.000000
1
22
876
2017-07-01 00:02:17
2017-07-01 00:16:54
435
266
21645
Subscriber
1992.000000
1
23
1835
2017-07-01 00:02:19
2017-07-01 00:32:55
3416
3055
21249
Customer
1979.591607
0
24
1327
2017-07-01 00:02:20
2017-07-01 00:24:28
402
514
28025
Subscriber
1995.000000
2
25
704
2017-07-01 00:02:33
2017-07-01 00:14:18
3255
450
15139
Subscriber
1973.000000
1
26
1449
2017-07-01 00:02:42
2017-07-01 00:26:52
401
446
28973
Subscriber
1988.000000
1
27
2391
2017-07-01 00:02:44
2017-07-01 00:42:35
2008
511
27374
Subscriber
1952.000000
1
28
983
2017-07-01 00:02:52
2017-07-01 00:19:16
501
3146
20559
Subscriber
1990.000000
1
29
1323
2017-07-01 00:02:52
2017-07-01 00:24:55
355
3431
26055
Subscriber
1973.000000
2
...
...
...
...
...
...
...
...
...
...
1735569
254
2017-07-31 23:56:38
2017-08-01 00:00:52
3167
3171
28279
Subscriber
1969.000000
2
1735570
405
2017-07-31 23:56:56
2017-08-01 00:03:41
297
345
29735
Subscriber
1986.000000
1
1735571
398
2017-07-31 23:57:00
2017-08-01 00:03:38
3088
3083
18161
Subscriber
1988.000000
2
1735572
866
2017-07-31 23:57:06
2017-08-01 00:11:32
308
195
21589
Customer
1979.591607
0
1735573
425
2017-07-31 23:57:12
2017-08-01 00:04:17
285
432
15434
Subscriber
1960.000000
0
1735574
524
2017-07-31 23:57:14
2017-08-01 00:05:59
462
402
19699
Subscriber
1980.000000
1
1735575
1185
2017-07-31 23:57:17
2017-08-01 00:17:02
526
434
18723
Customer
1988.000000
1
1735576
436
2017-07-31 23:57:21
2017-08-01 00:04:37
306
360
20164
Subscriber
1986.000000
1
1735577
412
2017-07-31 23:57:31
2017-08-01 00:04:24
285
432
19574
Subscriber
1959.000000
1
1735578
592
2017-07-31 23:57:42
2017-08-01 00:07:34
241
3049
16333
Subscriber
1989.000000
1
1735579
330
2017-07-31 23:57:47
2017-08-01 00:03:18
3085
3086
29409
Subscriber
1982.000000
1
1735580
1506
2017-07-31 23:58:28
2017-08-01 00:23:34
528
3145
20944
Subscriber
1967.000000
2
1735581
1176
2017-07-31 23:58:40
2017-08-01 00:18:16
503
2006
29009
Customer
1979.000000
1
1735582
774
2017-07-31 23:58:42
2017-08-01 00:11:37
164
492
17972
Subscriber
1964.000000
1
1735583
1060
2017-07-31 23:58:45
2017-08-01 00:16:25
428
128
27508
Subscriber
1968.000000
2
1735584
364
2017-07-31 23:58:47
2017-08-01 00:04:51
383
509
27677
Subscriber
1966.000000
1
1735585
1343
2017-07-31 23:58:50
2017-08-01 00:21:14
161
406
20912
Subscriber
1985.000000
1
1735586
936
2017-07-31 23:58:54
2017-08-01 00:14:30
442
477
19612
Subscriber
1995.000000
1
1735587
277
2017-07-31 23:59:10
2017-08-01 00:03:47
495
423
30198
Subscriber
1988.000000
1
1735588
2294
2017-07-31 23:59:09
2017-08-01 00:37:24
232
3342
18769
Subscriber
1999.000000
1
1735589
1916
2017-07-31 23:59:11
2017-08-01 00:31:07
440
3338
25644
Subscriber
1992.000000
1
1735590
2244
2017-07-31 23:59:10
2017-08-01 00:36:35
3165
3163
30201
Subscriber
1950.000000
1
1735591
428
2017-07-31 23:59:11
2017-08-01 00:06:20
477
513
30127
Subscriber
1980.000000
1
1735592
215
2017-07-31 23:59:15
2017-08-01 00:02:51
3164
3170
18057
Subscriber
1970.000000
1
1735593
3058
2017-07-31 23:59:31
2017-08-01 00:50:30
3418
293
30161
Subscriber
1963.000000
1
1735594
560
2017-07-31 23:59:33
2017-08-01 00:08:53
3260
236
16126
Subscriber
1990.000000
2
1735595
272
2017-07-31 23:59:35
2017-08-01 00:04:08
247
434
27684
Subscriber
1999.000000
1
1735596
1211
2017-07-31 23:59:37
2017-08-01 00:19:49
3303
3308
28402
Subscriber
1975.000000
2
1735597
547
2017-07-31 23:59:48
2017-08-01 00:08:56
276
127
30150
Subscriber
1992.000000
1
1735598
1816
2017-07-31 23:59:57
2017-08-01 00:30:13
3091
258
19643
Customer
1979.591607
0
1735599 rows × 9 columns
In [12]:
import bisect
import datetime
try:
out_flow = pd.read_csv('out_flow.csv')
in_flow = pd.read_csv('in_flow.csv')
except:
begin = datetime.datetime(2017, 7, 1, 0, 0, 0)
end = datetime.datetime(2017, 7, 31, 23, 30, 0)
date_list = [ end - datetime.timedelta(seconds=x*60*30) for x in range(0, 1488)][::-1]
#print(date_list)
tmp = pd.DataFrame(np.zeros((len(station['id']), len(date_list))), columns=date_list)
in_flow = pd.DataFrame({'id', 'time', 'inflow', 'outflow'})
out_flow = pd.DataFrame({'id', 'time', 'inflow', 'outflow'})
get_idx = {}
for idx, row in station.iterrows():
get_idx[row.iloc[0]] = idx
for idx, row in path.iterrows():
date = datetime.datetime.strptime(row.iloc[1], "%Y-%m-%d %H:%M:%S")
out_num = bisect.bisect_right(date_list, date)
out_flow.iloc[30 * row['start station id'] + out_num] += 1
date = datetime.datetime.strptime(row.iloc[2], "%Y-%m-%d %H:%M:%S")
in_num = bisect.bisect_right(date_list, date)
in_flow.iloc[get_idx[row.iloc[4]], in_num] += 1
out_flow.to_csv('out_flow.csv', index=False)
in_flow.to_csv('in_flow.csv', index=False)
print('in-flow')
print(in_flow)
print('out-flow')
print(out_flow)
in-flow
id 2017-07-01 00:00:00 2017-07-01 00:30:00 2017-07-01 01:00:00 \
0 72 1.0 0.0 2.0
1 79 1.0 0.0 0.0
2 82 0.0 0.0 0.0
3 83 0.0 0.0 0.0
4 116 0.0 0.0 0.0
5 119 0.0 0.0 0.0
6 120 0.0 0.0 1.0
7 127 0.0 2.0 2.0
8 128 1.0 1.0 1.0
9 143 0.0 0.0 0.0
10 144 0.0 0.0 0.0
11 146 0.0 0.0 0.0
12 150 0.0 2.0 1.0
13 151 0.0 0.0 1.0
14 152 0.0 1.0 1.0
15 153 0.0 0.0 0.0
16 157 0.0 0.0 0.0
17 161 0.0 6.0 0.0
18 164 1.0 0.0 0.0
19 167 0.0 0.0 0.0
20 168 1.0 0.0 0.0
21 173 3.0 0.0 0.0
22 174 0.0 0.0 4.0
23 195 1.0 1.0 2.0
24 212 1.0 0.0 0.0
25 216 0.0 0.0 0.0
26 217 0.0 1.0 0.0
27 223 1.0 0.0 0.0
28 228 0.0 0.0 0.0
29 229 0.0 0.0 1.0
.. ... ... ... ...
604 3436 0.0 0.0 0.0
605 3437 0.0 0.0 0.0
606 3438 0.0 0.0 0.0
607 3440 0.0 0.0 1.0
608 3441 0.0 0.0 0.0
609 3443 0.0 0.0 0.0
610 3445 0.0 0.0 0.0
611 3447 0.0 1.0 0.0
612 3449 0.0 0.0 0.0
613 3452 0.0 0.0 0.0
614 3453 0.0 2.0 0.0
615 3454 0.0 0.0 3.0
616 3455 0.0 0.0 0.0
617 3456 0.0 0.0 0.0
618 3457 0.0 1.0 0.0
619 3458 0.0 3.0 2.0
620 3459 0.0 0.0 1.0
621 3461 0.0 0.0 0.0
622 3462 0.0 0.0 0.0
623 3463 0.0 2.0 0.0
624 3464 0.0 0.0 0.0
625 3466 0.0 0.0 1.0
626 3468 0.0 0.0 0.0
627 3469 0.0 0.0 0.0
628 3470 0.0 0.0 0.0
629 3472 0.0 0.0 2.0
630 3474 0.0 0.0 0.0
631 3476 0.0 0.0 0.0
632 3477 0.0 0.0 0.0
633 3478 0.0 0.0 0.0
2017-07-01 01:30:00 2017-07-01 02:00:00 2017-07-01 02:30:00 \
0 0.0 0.0 0.0
1 1.0 0.0 1.0
2 0.0 0.0 0.0
3 0.0 0.0 0.0
4 2.0 0.0 0.0
5 0.0 0.0 0.0
6 0.0 0.0 0.0
7 0.0 1.0 0.0
8 0.0 2.0 3.0
9 0.0 1.0 0.0
10 0.0 0.0 0.0
11 0.0 0.0 0.0
12 3.0 0.0 1.0
13 1.0 0.0 0.0
14 0.0 0.0 0.0
15 0.0 2.0 0.0
16 0.0 1.0 0.0
17 1.0 1.0 2.0
18 0.0 0.0 0.0
19 0.0 0.0 0.0
20 5.0 0.0 1.0
21 0.0 0.0 0.0
22 0.0 1.0 1.0
23 0.0 0.0 0.0
24 0.0 0.0 0.0
25 0.0 0.0 0.0
26 0.0 0.0 0.0
27 1.0 0.0 0.0
28 2.0 0.0 0.0
29 1.0 0.0 2.0
.. ... ... ...
604 0.0 0.0 0.0
605 0.0 0.0 0.0
606 0.0 1.0 1.0
607 1.0 0.0 0.0
608 0.0 0.0 0.0
609 0.0 0.0 0.0
610 0.0 0.0 0.0
611 0.0 0.0 2.0
612 1.0 0.0 0.0
613 0.0 0.0 0.0
614 0.0 0.0 0.0
615 1.0 0.0 0.0
616 0.0 1.0 0.0
617 0.0 0.0 0.0
618 0.0 0.0 0.0
619 0.0 0.0 0.0
620 0.0 0.0 0.0
621 0.0 0.0 0.0
622 1.0 0.0 0.0
623 1.0 0.0 0.0
624 0.0 0.0 0.0
625 0.0 0.0 0.0
626 0.0 0.0 0.0
627 0.0 0.0 0.0
628 0.0 0.0 0.0
629 0.0 0.0 0.0
630 0.0 0.0 0.0
631 0.0 0.0 0.0
632 0.0 0.0 0.0
633 0.0 0.0 0.0
2017-07-01 03:00:00 2017-07-01 03:30:00 2017-07-01 04:00:00 \
0 0.0 0.0 0.0
1 0.0 0.0 0.0
2 0.0 0.0 0.0
3 0.0 0.0 0.0
4 0.0 0.0 0.0
5 0.0 0.0 0.0
6 1.0 0.0 0.0
7 1.0 0.0 0.0
8 0.0 0.0 1.0
9 0.0 0.0 0.0
10 0.0 0.0 0.0
11 0.0 0.0 0.0
12 0.0 1.0 0.0
13 1.0 0.0 0.0
14 0.0 0.0 1.0
15 0.0 0.0 0.0
16 0.0 0.0 0.0
17 0.0 0.0 0.0
18 0.0 0.0 0.0
19 0.0 0.0 0.0
20 0.0 0.0 0.0
21 0.0 0.0 0.0
22 2.0 0.0 0.0
23 1.0 0.0 0.0
24 0.0 0.0 0.0
25 0.0 0.0 0.0
26 0.0 0.0 0.0
27 0.0 0.0 0.0
28 0.0 0.0 0.0
29 2.0 0.0 0.0
.. ... ... ...
604 0.0 0.0 0.0
605 0.0 0.0 0.0
606 0.0 0.0 0.0
607 1.0 0.0 0.0
608 0.0 0.0 0.0
609 0.0 0.0 0.0
610 0.0 0.0 0.0
611 0.0 0.0 0.0
612 0.0 0.0 0.0
613 0.0 0.0 0.0
614 0.0 0.0 0.0
615 0.0 1.0 0.0
616 0.0 0.0 0.0
617 0.0 0.0 0.0
618 0.0 0.0 1.0
619 0.0 0.0 0.0
620 0.0 0.0 0.0
621 0.0 0.0 0.0
622 0.0 0.0 0.0
623 0.0 0.0 0.0
624 0.0 0.0 0.0
625 0.0 0.0 0.0
626 0.0 0.0 0.0
627 0.0 0.0 0.0
628 0.0 0.0 0.0
629 0.0 0.0 0.0
630 0.0 0.0 0.0
631 0.0 0.0 0.0
632 0.0 0.0 0.0
633 0.0 0.0 0.0
... 2017-07-31 19:00:00 2017-07-31 19:30:00 \
0 ... 5.0 7.0
1 ... 2.0 6.0
2 ... 1.0 3.0
3 ... 1.0 3.0
4 ... 2.0 5.0
5 ... 0.0 1.0
6 ... 4.0 6.0
7 ... 22.0 16.0
8 ... 13.0 15.0
9 ... 9.0 1.0
10 ... 1.0 2.0
11 ... 4.0 7.0
12 ... 2.0 4.0
13 ... 13.0 16.0
14 ... 3.0 7.0
15 ... 6.0 3.0
16 ... 2.0 6.0
17 ... 6.0 7.0
18 ... 2.0 4.0
19 ... 4.0 5.0
20 ... 13.0 11.0
21 ... 4.0 6.0
22 ... 6.0 8.0
23 ... 2.0 3.0
24 ... 8.0 9.0
25 ... 4.0 0.0
26 ... 11.0 6.0
27 ... 9.0 4.0
28 ... 2.0 2.0
29 ... 28.0 13.0
.. ... ... ...
604 ... 10.0 5.0
605 ... 2.0 1.0
606 ... 2.0 1.0
607 ... 1.0 4.0
608 ... 2.0 0.0
609 ... 1.0 2.0
610 ... 0.0 0.0
611 ... 6.0 1.0
612 ... 2.0 1.0
613 ... 0.0 0.0
614 ... 4.0 1.0
615 ... 3.0 2.0
616 ... 2.0 1.0
617 ... 0.0 1.0
618 ... 0.0 0.0
619 ... 6.0 1.0
620 ... 2.0 2.0
621 ... 6.0 10.0
622 ... 2.0 0.0
623 ... 5.0 4.0
624 ... 0.0 0.0
625 ... 1.0 3.0
626 ... 1.0 0.0
627 ... 3.0 1.0
628 ... 0.0 0.0
629 ... 11.0 4.0
630 ... 3.0 1.0
631 ... 3.0 4.0
632 ... 2.0 0.0
633 ... 1.0 0.0
2017-07-31 20:00:00 2017-07-31 20:30:00 2017-07-31 21:00:00 \
0 10.0 6.0 2.0
1 1.0 0.0 4.0
2 1.0 4.0 3.0
3 0.0 1.0 1.0
4 7.0 8.0 2.0
5 1.0 0.0 0.0
6 0.0 0.0 0.0
7 5.0 6.0 6.0
8 11.0 12.0 3.0
9 6.0 4.0 2.0
10 0.0 0.0 0.0
11 4.0 4.0 0.0
12 4.0 5.0 1.0
13 12.0 11.0 6.0
14 5.0 4.0 2.0
15 1.0 3.0 3.0
16 5.0 0.0 0.0
17 9.0 6.0 6.0
18 6.0 1.0 1.0
19 5.0 4.0 1.0
20 8.0 6.0 1.0
21 1.0 2.0 5.0
22 6.0 3.0 1.0
23 4.0 5.0 2.0
24 2.0 5.0 6.0
25 1.0 3.0 4.0
26 2.0 3.0 2.0
27 2.0 6.0 7.0
28 2.0 4.0 0.0
29 18.0 6.0 2.0
.. ... ... ...
604 5.0 3.0 2.0
605 2.0 6.0 2.0
606 0.0 2.0 5.0
607 0.0 2.0 5.0
608 0.0 0.0 0.0
609 0.0 1.0 1.0
610 0.0 0.0 0.0
611 3.0 1.0 2.0
612 2.0 0.0 0.0
613 0.0 0.0 0.0
614 4.0 2.0 0.0
615 7.0 3.0 4.0
616 0.0 1.0 1.0
617 3.0 1.0 1.0
618 0.0 0.0 0.0
619 3.0 3.0 1.0
620 0.0 0.0 3.0
621 4.0 8.0 2.0
622 2.0 3.0 2.0
623 8.0 3.0 2.0
624 0.0 0.0 0.0
625 4.0 1.0 1.0
626 0.0 0.0 0.0
627 0.0 0.0 3.0
628 0.0 0.0 0.0
629 2.0 1.0 1.0
630 3.0 3.0 2.0
631 2.0 1.0 2.0
632 0.0 0.0 0.0
633 1.0 2.0 0.0
2017-07-31 21:30:00 2017-07-31 22:00:00 2017-07-31 22:30:00 \
0 0.0 1.0 3.0
1 1.0 3.0 1.0
2 0.0 2.0 1.0
3 0.0 0.0 3.0
4 1.0 1.0 2.0
5 0.0 0.0 0.0
6 1.0 0.0 2.0
7 9.0 3.0 1.0
8 6.0 3.0 3.0
9 0.0 3.0 1.0
10 0.0 1.0 0.0
11 3.0 1.0 0.0
12 1.0 1.0 0.0
13 5.0 2.0 7.0
14 1.0 2.0 2.0
15 0.0 2.0 0.0
16 1.0 1.0 2.0
17 6.0 3.0 0.0
18 0.0 1.0 2.0
19 4.0 1.0 3.0
20 3.0 1.0 2.0
21 5.0 3.0 1.0
22 3.0 2.0 2.0
23 2.0 2.0 3.0
24 5.0 2.0 1.0
25 0.0 0.0 0.0
26 3.0 2.0 0.0
27 2.0 4.0 0.0
28 1.0 1.0 1.0
29 3.0 2.0 4.0
.. ... ... ...
604 3.0 4.0 0.0
605 0.0 2.0 0.0
606 2.0 0.0 0.0
607 2.0 1.0 0.0
608 0.0 0.0 0.0
609 2.0 1.0 0.0
610 0.0 0.0 0.0
611 1.0 1.0 0.0
612 0.0 1.0 2.0
613 0.0 0.0 0.0
614 0.0 0.0 0.0
615 1.0 1.0 1.0
616 0.0 3.0 0.0
617 1.0 2.0 0.0
618 0.0 0.0 0.0
619 0.0 0.0 0.0
620 3.0 1.0 0.0
621 1.0 1.0 0.0
622 1.0 3.0 1.0
623 2.0 2.0 8.0
624 0.0 0.0 0.0
625 1.0 0.0 0.0
626 0.0 0.0 0.0
627 1.0 0.0 0.0
628 0.0 0.0 0.0
629 0.0 1.0 0.0
630 0.0 2.0 1.0
631 2.0 1.0 1.0
632 0.0 0.0 0.0
633 0.0 0.0 0.0
2017-07-31 23:00:00 2017-07-31 23:30:00
0 0.0 3.0
1 0.0 1.0
2 1.0 0.0
3 0.0 1.0
4 1.0 3.0
5 0.0 1.0
6 2.0 1.0
7 3.0 6.0
8 2.0 1.0
9 1.0 0.0
10 0.0 0.0
11 0.0 1.0
12 1.0 1.0
13 2.0 6.0
14 2.0 0.0
15 0.0 0.0
16 0.0 0.0
17 1.0 2.0
18 0.0 0.0
19 2.0 1.0
20 0.0 0.0
21 2.0 0.0
22 7.0 2.0
23 1.0 3.0
24 1.0 2.0
25 0.0 0.0
26 0.0 0.0
27 1.0 2.0
28 0.0 0.0
29 4.0 2.0
.. ... ...
604 0.0 0.0
605 1.0 1.0
606 0.0 1.0
607 2.0 0.0
608 0.0 0.0
609 0.0 2.0
610 0.0 0.0
611 1.0 1.0
612 0.0 0.0
613 0.0 0.0
614 1.0 2.0
615 1.0 0.0
616 0.0 0.0
617 0.0 0.0
618 0.0 0.0
619 1.0 0.0
620 3.0 0.0
621 0.0 0.0
622 1.0 0.0
623 2.0 1.0
624 0.0 0.0
625 1.0 0.0
626 0.0 0.0
627 3.0 2.0
628 0.0 0.0
629 0.0 0.0
630 1.0 1.0
631 2.0 0.0
632 0.0 0.0
633 0.0 0.0
[634 rows x 1489 columns]
out-flow
id 2017-07-01 00:00:00 2017-07-01 00:30:00 2017-07-01 01:00:00 \
0 72 0.0 0.0 1.0
1 79 0.0 0.0 1.0
2 82 0.0 0.0 0.0
3 83 0.0 1.0 2.0
4 116 1.0 1.0 0.0
5 119 0.0 0.0 0.0
6 120 0.0 1.0 2.0
7 127 1.0 1.0 0.0
8 128 3.0 4.0 2.0
9 143 0.0 0.0 0.0
10 144 0.0 0.0 0.0
11 146 0.0 1.0 0.0
12 150 2.0 0.0 4.0
13 151 1.0 0.0 0.0
14 152 0.0 0.0 0.0
15 153 1.0 0.0 0.0
16 157 0.0 1.0 0.0
17 161 2.0 4.0 3.0
18 164 0.0 3.0 1.0
19 167 0.0 1.0 0.0
20 168 1.0 0.0 0.0
21 173 0.0 3.0 0.0
22 174 0.0 0.0 0.0
23 195 0.0 0.0 2.0
24 212 3.0 0.0 0.0
25 216 0.0 0.0 0.0
26 217 2.0 1.0 0.0
27 223 2.0 1.0 0.0
28 228 0.0 0.0 1.0
29 229 2.0 1.0 2.0
.. ... ... ... ...
604 3436 0.0 0.0 0.0
605 3437 0.0 0.0 0.0
606 3438 0.0 0.0 0.0
607 3440 1.0 0.0 0.0
608 3441 0.0 0.0 0.0
609 3443 0.0 0.0 0.0
610 3445 0.0 0.0 0.0
611 3447 1.0 0.0 0.0
612 3449 1.0 0.0 0.0
613 3452 0.0 0.0 0.0
614 3453 0.0 1.0 3.0
615 3454 0.0 1.0 0.0
616 3455 0.0 1.0 0.0
617 3456 0.0 0.0 0.0
618 3457 0.0 0.0 1.0
619 3458 0.0 3.0 0.0
620 3459 2.0 0.0 0.0
621 3461 0.0 0.0 0.0
622 3462 1.0 0.0 0.0
623 3463 0.0 0.0 1.0
624 3464 0.0 0.0 0.0
625 3466 1.0 0.0 1.0
626 3468 0.0 0.0 0.0
627 3469 0.0 0.0 0.0
628 3470 0.0 0.0 0.0
629 3472 0.0 3.0 3.0
630 3474 0.0 0.0 0.0
631 3476 0.0 0.0 0.0
632 3477 0.0 0.0 0.0
633 3478 0.0 0.0 0.0
2017-07-01 01:30:00 2017-07-01 02:00:00 2017-07-01 02:30:00 \
0 1.0 0.0 0.0
1 2.0 2.0 0.0
2 0.0 0.0 0.0
3 0.0 0.0 0.0
4 0.0 1.0 0.0
5 0.0 0.0 0.0
6 0.0 2.0 0.0
7 0.0 0.0 0.0
8 1.0 0.0 0.0
9 0.0 0.0 0.0
10 0.0 0.0 0.0
11 0.0 0.0 0.0
12 0.0 1.0 0.0
13 1.0 1.0 0.0
14 0.0 0.0 1.0
15 0.0 0.0 0.0
16 4.0 0.0 0.0
17 0.0 0.0 1.0
18 0.0 0.0 0.0
19 0.0 0.0 0.0
20 6.0 0.0 0.0
21 0.0 0.0 0.0
22 0.0 0.0 0.0
23 0.0 0.0 0.0
24 0.0 0.0 0.0
25 0.0 0.0 0.0
26 0.0 0.0 0.0
27 0.0 0.0 1.0
28 0.0 1.0 0.0
29 0.0 0.0 0.0
.. ... ... ...
604 0.0 0.0 0.0
605 0.0 0.0 0.0
606 0.0 0.0 0.0
607 0.0 0.0 0.0
608 0.0 0.0 0.0
609 0.0 0.0 0.0
610 0.0 0.0 0.0
611 0.0 0.0 1.0
612 0.0 0.0 0.0
613 0.0 0.0 0.0
614 0.0 0.0 0.0
615 2.0 0.0 1.0
616 0.0 0.0 0.0
617 0.0 0.0 0.0
618 0.0 0.0 0.0
619 0.0 0.0 1.0
620 0.0 0.0 0.0
621 0.0 0.0 0.0
622 0.0 0.0 1.0
623 0.0 0.0 0.0
624 0.0 0.0 0.0
625 0.0 0.0 0.0
626 0.0 0.0 0.0
627 0.0 0.0 0.0
628 0.0 0.0 0.0
629 2.0 0.0 0.0
630 0.0 0.0 0.0
631 0.0 0.0 0.0
632 0.0 0.0 0.0
633 0.0 0.0 0.0
2017-07-01 03:00:00 2017-07-01 03:30:00 2017-07-01 04:00:00 \
0 0.0 0.0 0.0
1 0.0 0.0 0.0
2 0.0 0.0 0.0
3 0.0 0.0 0.0
4 0.0 0.0 0.0
5 0.0 0.0 0.0
6 0.0 0.0 0.0
7 0.0 0.0 0.0
8 0.0 0.0 0.0
9 0.0 0.0 0.0
10 0.0 0.0 0.0
11 0.0 0.0 0.0
12 1.0 1.0 0.0
13 0.0 0.0 0.0
14 0.0 0.0 1.0
15 0.0 0.0 0.0
16 0.0 0.0 0.0
17 1.0 0.0 0.0
18 0.0 0.0 0.0
19 0.0 0.0 0.0
20 0.0 0.0 0.0
21 0.0 0.0 0.0
22 0.0 1.0 0.0
23 0.0 0.0 0.0
24 0.0 0.0 0.0
25 0.0 0.0 0.0
26 0.0 0.0 0.0
27 0.0 0.0 0.0
28 0.0 0.0 0.0
29 0.0 1.0 1.0
.. ... ... ...
604 0.0 0.0 0.0
605 0.0 0.0 0.0
606 0.0 0.0 0.0
607 1.0 0.0 0.0
608 0.0 0.0 0.0
609 0.0 0.0 0.0
610 0.0 0.0 0.0
611 0.0 0.0 0.0
612 0.0 0.0 0.0
613 0.0 0.0 0.0
614 0.0 0.0 0.0
615 0.0 0.0 1.0
616 0.0 0.0 0.0
617 0.0 0.0 0.0
618 0.0 0.0 0.0
619 0.0 0.0 0.0
620 0.0 0.0 0.0
621 0.0 0.0 0.0
622 0.0 0.0 0.0
623 0.0 0.0 0.0
624 0.0 0.0 0.0
625 0.0 0.0 1.0
626 0.0 0.0 0.0
627 0.0 0.0 0.0
628 0.0 0.0 0.0
629 0.0 2.0 0.0
630 0.0 0.0 1.0
631 0.0 0.0 0.0
632 0.0 0.0 0.0
633 0.0 0.0 0.0
... 2017-07-31 19:00:00 2017-07-31 19:30:00 \
0 ... 12.0 9.0
1 ... 2.0 6.0
2 ... 4.0 5.0
3 ... 6.0 1.0
4 ... 2.0 1.0
5 ... 1.0 0.0
6 ... 2.0 1.0
7 ... 15.0 14.0
8 ... 17.0 13.0
9 ... 8.0 0.0
10 ... 1.0 2.0
11 ... 3.0 5.0
12 ... 0.0 4.0
13 ... 19.0 16.0
14 ... 12.0 8.0
15 ... 4.0 4.0
16 ... 1.0 3.0
17 ... 9.0 5.0
18 ... 3.0 3.0
19 ... 4.0 4.0
20 ... 13.0 10.0
21 ... 4.0 6.0
22 ... 2.0 10.0
23 ... 1.0 3.0
24 ... 9.0 9.0
25 ... 1.0 1.0
26 ... 8.0 1.0
27 ... 1.0 4.0
28 ... 3.0 2.0
29 ... 13.0 19.0
.. ... ... ...
604 ... 6.0 7.0
605 ... 3.0 1.0
606 ... 2.0 1.0
607 ... 6.0 2.0
608 ... 0.0 1.0
609 ... 0.0 3.0
610 ... 0.0 0.0
611 ... 9.0 2.0
612 ... 0.0 0.0
613 ... 0.0 0.0
614 ... 2.0 1.0
615 ... 1.0 2.0
616 ... 0.0 0.0
617 ... 1.0 0.0
618 ... 0.0 0.0
619 ... 6.0 1.0
620 ... 2.0 2.0
621 ... 7.0 5.0
622 ... 2.0 1.0
623 ... 13.0 2.0
624 ... 0.0 0.0
625 ... 0.0 4.0
626 ... 0.0 0.0
627 ... 2.0 4.0
628 ... 0.0 0.0
629 ... 3.0 4.0
630 ... 4.0 1.0
631 ... 4.0 1.0
632 ... 0.0 0.0
633 ... 5.0 0.0
2017-07-31 20:00:00 2017-07-31 20:30:00 2017-07-31 21:00:00 \
0 7.0 4.0 2.0
1 3.0 0.0 2.0
2 3.0 3.0 1.0
3 5.0 0.0 0.0
4 7.0 9.0 5.0
5 0.0 0.0 0.0
6 2.0 0.0 1.0
7 7.0 5.0 10.0
8 5.0 17.0 3.0
9 5.0 1.0 4.0
10 0.0 0.0 0.0
11 3.0 4.0 4.0
12 4.0 5.0 2.0
13 10.0 9.0 6.0
14 9.0 3.0 4.0
15 0.0 4.0 2.0
16 3.0 2.0 2.0
17 11.0 7.0 2.0
18 5.0 3.0 1.0
19 6.0 4.0 1.0
20 9.0 7.0 1.0
21 1.0 2.0 1.0
22 8.0 3.0 0.0
23 4.0 5.0 3.0
24 1.0 3.0 7.0
25 0.0 3.0 2.0
26 3.0 5.0 7.0
27 4.0 8.0 4.0
28 2.0 4.0 0.0
29 16.0 16.0 5.0
.. ... ... ...
604 7.0 2.0 2.0
605 2.0 1.0 0.0
606 4.0 1.0 4.0
607 1.0 1.0 3.0
608 0.0 0.0 0.0
609 0.0 0.0 2.0
610 0.0 0.0 0.0
611 1.0 3.0 1.0
612 0.0 1.0 0.0
613 0.0 0.0 0.0
614 2.0 3.0 0.0
615 2.0 0.0 1.0
616 0.0 0.0 0.0
617 2.0 3.0 0.0
618 0.0 0.0 0.0
619 2.0 2.0 2.0
620 0.0 0.0 3.0
621 9.0 3.0 7.0
622 1.0 3.0 3.0
623 10.0 0.0 3.0
624 0.0 0.0 0.0
625 4.0 0.0 2.0
626 1.0 0.0 0.0
627 1.0 0.0 2.0
628 0.0 0.0 0.0
629 9.0 1.0 0.0
630 1.0 2.0 5.0
631 2.0 0.0 0.0
632 0.0 0.0 0.0
633 1.0 0.0 0.0
2017-07-31 21:30:00 2017-07-31 22:00:00 2017-07-31 22:30:00 \
0 2.0 3.0 3.0
1 3.0 0.0 3.0
2 1.0 1.0 0.0
3 2.0 0.0 0.0
4 0.0 1.0 1.0
5 0.0 0.0 0.0
6 0.0 1.0 1.0
7 4.0 9.0 3.0
8 3.0 5.0 4.0
9 2.0 1.0 0.0
10 0.0 0.0 0.0
11 3.0 1.0 4.0
12 0.0 1.0 0.0
13 9.0 1.0 6.0
14 7.0 0.0 1.0
15 1.0 1.0 1.0
16 5.0 4.0 1.0
17 9.0 9.0 3.0
18 0.0 1.0 0.0
19 1.0 4.0 1.0
20 1.0 3.0 0.0
21 7.0 3.0 2.0
22 0.0 2.0 3.0
23 0.0 2.0 2.0
24 4.0 2.0 4.0
25 1.0 1.0 2.0
26 5.0 3.0 1.0
27 2.0 3.0 1.0
28 1.0 1.0 1.0
29 7.0 2.0 2.0
.. ... ... ...
604 3.0 3.0 2.0
605 2.0 0.0 1.0
606 1.0 1.0 2.0
607 2.0 1.0 3.0
608 0.0 1.0 0.0
609 0.0 2.0 1.0
610 0.0 0.0 0.0
611 2.0 0.0 1.0
612 0.0 0.0 1.0
613 0.0 0.0 0.0
614 1.0 1.0 0.0
615 2.0 1.0 1.0
616 0.0 0.0 0.0
617 0.0 0.0 0.0
618 0.0 0.0 0.0
619 0.0 0.0 0.0
620 2.0 2.0 0.0
621 1.0 0.0 1.0
622 1.0 2.0 0.0
623 2.0 2.0 0.0
624 0.0 0.0 0.0
625 1.0 0.0 0.0
626 0.0 0.0 0.0
627 0.0 0.0 2.0
628 0.0 0.0 0.0
629 0.0 0.0 2.0
630 0.0 0.0 1.0
631 0.0 0.0 0.0
632 0.0 0.0 0.0
633 0.0 0.0 1.0
2017-07-31 23:00:00 2017-07-31 23:30:00
0 1.0 1.0
1 0.0 1.0
2 1.0 0.0
3 3.0 0.0
4 3.0 0.0
5 0.0 0.0
6 2.0 0.0
7 3.0 1.0
8 5.0 0.0
9 2.0 0.0
10 0.0 1.0
11 9.0 1.0
12 1.0 0.0
13 2.0 1.0
14 1.0 1.0
15 0.0 0.0
16 1.0 0.0
17 3.0 3.0
18 0.0 1.0
19 4.0 0.0
20 0.0 1.0
21 0.0 2.0
22 0.0 0.0
23 2.0 2.0
24 1.0 0.0
25 0.0 0.0
26 1.0 1.0
27 2.0 3.0
28 0.0 0.0
29 2.0 0.0
.. ... ...
604 0.0 0.0
605 0.0 0.0
606 0.0 0.0
607 1.0 0.0
608 0.0 0.0
609 0.0 1.0
610 0.0 0.0
611 0.0 0.0
612 1.0 1.0
613 0.0 0.0
614 1.0 2.0
615 0.0 0.0
616 0.0 1.0
617 0.0 0.0
618 0.0 0.0
619 1.0 0.0
620 2.0 1.0
621 0.0 0.0
622 2.0 0.0
623 2.0 1.0
624 0.0 0.0
625 1.0 0.0
626 0.0 0.0
627 0.0 0.0
628 0.0 0.0
629 0.0 0.0
630 0.0 0.0
631 0.0 0.0
632 0.0 0.0
633 0.0 0.0
[634 rows x 1489 columns]
In [10]:
from pyproj import Geod
wgs84_geod = Geod(ellps='WGS84')
def Distance(lat1,lon1,lat2,lon2):
az12,az21,dist = wgs84_geod.inv(lon1,lat1,lon2,lat2)
return dist
try:
dist = pd.read_csv('dist.csv')
except:
dist = pd.DataFrame(columns=['begin', 'end', 'dist'])
for idx1, row1 in station.iterrows():
for idx2, row2 in station.iterrows():
dist = dist.append(pd.Series([row1.iloc[0], row2.iloc[0], Distance(row1.iloc[2], row1.iloc[3], row2.iloc[2], row2.iloc[3])], index=['begin','end', 'dist']), ignore_index=True)
#print(row1.iloc[0], row2.iloc[0], Distance(row1.iloc[2], row1.iloc[3], row2.iloc[2], row2.iloc[3]))
#print(dist)
dist.to_csv('dist.csv', index=False)
dist
Out[10]:
begin
end
dist
0
72.0
72.0
0.000000
1
72.0
79.0
5454.887660
2
72.0
82.0
6251.852068
3
72.0
83.0
9385.185658
4
72.0
116.0
2902.567147
5
72.0
119.0
8017.980523
6
72.0
120.0
9406.865190
7
72.0
127.0
4093.238958
8
72.0
128.0
4525.685810
9
72.0
143.0
8315.137982
10
72.0
144.0
7729.624333
11
72.0
146.0
5809.148046
12
72.0
150.0
5269.452069
13
72.0
151.0
5023.733805
14
72.0
152.0
5972.838300
15
72.0
153.0
1982.665966
16
72.0
157.0
8483.861600
17
72.0
161.0
4245.835806
18
72.0
164.0
2530.547873
19
72.0
167.0
2538.067353
20
72.0
168.0
3060.890092
21
72.0
173.0
1079.593568
22
72.0
174.0
3520.114705
23
72.0
195.0
6613.424594
24
72.0
212.0
2870.916185
25
72.0
216.0
7429.595912
26
72.0
217.0
7162.714520
27
72.0
223.0
3310.421420
28
72.0
228.0
2333.778707
29
72.0
229.0
4423.978882
...
...
...
...
400659
3478.0
3436.0
7133.574376
400660
3478.0
3437.0
15343.248062
400661
3478.0
3438.0
13475.585153
400662
3478.0
3440.0
4246.080050
400663
3478.0
3441.0
10658.325912
400664
3478.0
3443.0
11830.298091
400665
3478.0
3445.0
15175.385265
400666
3478.0
3447.0
12989.537331
400667
3478.0
3449.0
8799.005948
400668
3478.0
3452.0
8549.782723
400669
3478.0
3453.0
8026.181423
400670
3478.0
3454.0
7884.224721
400671
3478.0
3455.0
4078.431514
400672
3478.0
3456.0
8336.518248
400673
3478.0
3457.0
12164.058313
400674
3478.0
3458.0
12047.870104
400675
3478.0
3459.0
11650.978189
400676
3478.0
3461.0
6417.978497
400677
3478.0
3462.0
10914.589079
400678
3478.0
3463.0
8867.450597
400679
3478.0
3464.0
10717.721983
400680
3478.0
3466.0
11278.430101
400681
3478.0
3468.0
8629.975167
400682
3478.0
3469.0
9264.882495
400683
3478.0
3470.0
1831.487926
400684
3478.0
3472.0
9513.521058
400685
3478.0
3474.0
7579.706374
400686
3478.0
3476.0
9064.965484
400687
3478.0
3477.0
248.394878
400688
3478.0
3478.0
0.000000
400689 rows × 3 columns
In [11]:
sz = station.shape[0]
n = sz * (sz-1) / 2
ans = 0
for idx, row in dist.iterrows():
if row.iloc[0] == row.iloc[1]:
continue
ans += row.iloc[2] / n
print('average distance:', ans, 'm')
average distance: 10786.9349309 m
In [12]:
from collections import defaultdict
import datetime
weekday = dict()
weekday = defaultdict(lambda: 0, weekday)
weekend = dict()
weekend = defaultdict(lambda: 0, weekend)
for idx, row in path.iterrows():
if datetime.datetime.strptime(row.iloc[1], "%Y-%m-%d %H:%M:%S").isoweekday() > 5: # weekend
weekend[(row.iloc[3], row.iloc[4])] += 1
else:
weekday[(row.iloc[3], row.iloc[4])] += 1
top_weekday = sorted(weekday, key=weekday.get)[::-1][:3]
top_weekend = sorted(weekend, key=weekend.get)[::-1][:3]
print(top_weekday)
print(top_weekend)
[(432.0, 3263.0), (3263.0, 432.0), (519.0, 498.0)]
[(3254.0, 3182.0), (3182.0, 3182.0), (3182.0, 3254.0)]
In [13]:
from collections import defaultdict
import datetime
weekday_in = dict()
weekday_in = defaultdict(lambda: 0, weekday_in)
weekend_in = dict()
weekend_in = defaultdict(lambda: 0, weekend_in)
weekday_out = dict()
weekday_out = defaultdict(lambda: 0, weekday_out)
weekend_out = dict()
weekend_out = defaultdict(lambda: 0, weekend_out)
allDay_in = dict()
allDay_in = defaultdict(lambda: 0, allDay_in)
allDay_out = dict()
allDay_out = defaultdict(lambda: 0, allDay_out)
allDay = dict()
allDay = defaultdict(lambda: 0, allDay)
for idx, row in path.iterrows():
if 'freq' not in station.columns:
allDay_in[row.iloc[4]] += 1
allDay_out[row.iloc[3]] += 1
allDay[row.iloc[4]] += 1
allDay[row.iloc[3]] += 1
if datetime.datetime.strptime(row.iloc[1], "%Y-%m-%d %H:%M:%S").isoweekday() > 5: # weekend
weekend_out[row.iloc[3]] += 1
weekend_in[row.iloc[4]] += 1
else:
weekday_out[row.iloc[3]] += 1
weekday_in[row.iloc[4]] += 1
if 'freq' not in station.columns:
station['in'] = station['id'].map(allDay_in)
station['out'] = station['id'].map(allDay_out)
station['freq'] = station['id'].map(allDay)
station.to_csv('station.csv', index=False)
top_weekday_in = sorted(weekday_in, key=weekday_in.get)[::-1][:3]
top_weekday_out = sorted(weekday_out, key=weekday_out.get)[::-1][:3]
top_weekend_in = sorted(weekend_in, key=weekend_in.get)[::-1][:3]
top_weekend_out = sorted(weekend_out, key=weekend_out.get)[::-1][:3]
print('weekday in_flow:', top_weekday_in)
print('weekday out_flow:', top_weekday_in)
print('weekend in_flow:', top_weekend_in)
print('weekend out_flow:', top_weekend_in)
weekday in_flow: [519.0, 402.0, 426.0]
weekday out_flow: [519.0, 402.0, 426.0]
weekend in_flow: [426.0, 435.0, 497.0]
weekend out_flow: [426.0, 435.0, 497.0]
drow line chart
In [14]:
import matplotlib.pyplot as plt
most = station.nlargest(1, 'freq').index[0]
in_most = in_flow.iloc[most][1:]
out_most = out_flow.iloc[most][1:]
plt.figure()
axes = plt.gca()
axes.set_ylim([-10,130])
out_most.plot(figsize=(15, 5))
in_most.plot(figsize=(15, 5))
plt.show()
In [15]:
from sklearn.metrics import pairwise
pairwise.pairwise_distances(np.array([in_most.tolist(), out_most.tolist()]), metric='minkowski', n_jobs=4, p=2)[0][1]
Out[15]:
171.40303381212365
In [16]:
plt.figure()
axes = plt.gca()
axes.set_ylim([-50,120])
in_most.sub(in_most.mean()).plot(figsize=(15, 5))
out_most.sub(out_most.mean()).plot(figsize=(15, 5))
plt.show()
# print(in_most.mean())
# print(out_most.mean())
pairwise.pairwise_distances(np.array([in_most.sub(in_most.mean()).tolist(), out_most.sub(out_most.mean()).tolist()]), metric='minkowski', n_jobs=4, p=2)[0][1]
Out[16]:
171.28531933087413
In [17]:
plt.figure()
axes = plt.gca()
axes.set_ylim([-5,10])
in_most.sub(in_most.mean()).divide(in_most.std()).plot(figsize=(15, 5))
out_most.sub(out_most.mean()).divide(out_most.std()).plot(figsize=(15, 5))
plt.show()
#print(in_most.mean(), in_most.std())
#print(out_most.mean(), out_most.std())
pairwise.pairwise_distances(np.array([in_most.sub(in_most.mean()).divide(in_most.std()).tolist(), out_most.sub(out_most.mean()).divide(out_most.std()).tolist()]), metric='minkowski', n_jobs=4, p=2)[0][1]
Out[17]:
11.990369115699348
In [18]:
from sklearn import linear_model
import matplotlib.pyplot as plt
reg_in = linear_model.LinearRegression(normalize=True)
reg_out = linear_model.LinearRegression(normalize=True)
y = [i for i in range(len(in_most))]
in_X = [[i] for i in in_most.tolist()]
out_X = [[i] for i in out_most.tolist()]
reg_in.fit(X=in_X, y=y)
reg_out.fit(X=out_X, y=y)
in_df = pd.DataFrame(reg_in.predict(in_X)).T
in_df.columns = in_most.keys()
out_df = pd.DataFrame(reg_out.predict(out_X)).T
out_df.columns = out_most.keys()
in_s = in_df.iloc[0][:]
out_s = out_df.iloc[0][:]
plt.figure()
in_s.plot(figsize=(15, 5))
out_s.plot(figsize=(15, 5))
plt.show()
pairwise.pairwise_distances(np.array([in_s.tolist(), out_s.tolist()]), metric='minkowski', n_jobs=4, p=2)[0][1]
Out[18]:
530.12990552891745
In [19]:
from sklearn.neighbors.kde import KernelDensity
in_tmp = [[i, j] for i,j in enumerate(in_most)]
out_tmp = [[j, i] for i,j in enumerate(out_most)]
kde_in = KernelDensity(kernel='gaussian', bandwidth=30).fit(in_tmp)
kde_out = KernelDensity(kernel='cosine', bandwidth=30).fit(out_tmp)
in_arr = kde_in.score_samples(in_tmp)
out_arr = kde_out.score_samples(out_tmp)
in_ss = pd.Series(in_arr.tolist(), index=in_most.keys())
out_ss = pd.Series(out_arr.tolist(), index=in_most.keys())
#print(in_ss)
plt.figure()
# axes = plt.gca()
# axes.set_ylim([0,1])
in_ss.plot(figsize=(15, 5))
out_ss.plot(figsize=(15, 5))
plt.show()
根據上面的資料,我們可以發現尖峰時刻是有大週期的,而有趣的是,除了大週期之外,每個大週期內,也都有小週期,大週期與小週期的成因,也許是可以研究的方向。
Content source: calee0219/Course
Similar notebooks: