In [34]:
%matplotlib inline
%config InlineBackend.figure_format='retina'
import dask.dataframe as dd
import dask.distributed
import numpy as np
import pandas as pd
import geopandas as gpd
from matplotlib.colors import SymLogNorm as symlog
from matplotlib import rcParams
import sklearn, sklearn.cluster
import matplotlib.pyplot as plt
import palettable
import seaborn as sns
pd.options.display.max_rows = 300
pd.options.display.max_columns = 100
In [2]:
client = dask.distributed.Client()
In [3]:
trips = dd.read_parquet('/data/all_trips.parquet', engine='fastparquet', index='trip_id',
columns=['pickup_datetime', 'dropoff_datetime',
'pickup_taxizone_id', 'dropoff_taxizone_id',
'trip_distance', 'fare_amount', 'trip_type'
])
/home/shekhar/anaconda3/lib/python3.5/site-packages/fastparquet/api.py:436: UserWarning: Regression warning: found category spec from fastparquet <= 0.0.6
warnings.warn('Regression warning: found category spec from '
In [4]:
trips.head()
Out[4]:
pickup_datetime
dropoff_datetime
pickup_taxizone_id
dropoff_taxizone_id
trip_distance
fare_amount
trip_type
trip_id
0
2009-01-01 00:00:00
2009-01-01 00:04:12
237.0
263.0
1.3
5.800000
yellow
1
2009-01-01 00:00:00
2009-01-01 00:05:03
114.0
249.0
0.9
5.400000
yellow
2
2009-01-01 00:00:02
2009-01-01 00:05:40
237.0
43.0
1.0
5.800000
yellow
3
2009-01-01 00:00:04
2009-01-01 00:03:08
261.0
261.0
0.8
4.600000
yellow
4
2009-01-01 00:00:07
2009-01-01 00:19:01
144.0
80.0
5.5
27.799999
yellow
In [7]:
trips.pickup_taxizone_id = trips.pickup_taxizone_id.fillna(266)
trips.dropoff_taxizone_id = trips.dropoff_taxizone_id.fillna(266)
trips = trips[trips.trip_type != 'uber']
In [8]:
mm = trips.groupby(['pickup_taxizone_id', 'dropoff_taxizone_id']).count().compute()
In [65]:
mm2 = mm.reset_index().sort_values('pickup_datetime', ascending=False)
# mm2 = mm2[mm2.pickup_datetime > 500]
In [66]:
mm2.pickup_taxizone_id = mm2.pickup_taxizone_id.astype(int)
mm2.dropoff_taxizone_id = mm2.dropoff_taxizone_id.astype(int)
mm2.head()
Out[66]:
pickup_taxizone_id
dropoff_taxizone_id
pickup_datetime
dropoff_datetime
trip_distance
fare_amount
trip_type
18445
266
266
25478914
25478914
25478914
25478914
25478914
16053
237
236
5694001
5694001
5694001
5694001
5694001
15857
236
237
5081429
5081429
5081429
5081429
5081429
16054
237
237
4815975
4815975
4815975
4815975
4815975
15856
236
236
4815481
4815481
4815481
4815481
4815481
In [67]:
k2 = np.zeros((268, 268))
In [69]:
for i, j, N in zip(mm2.pickup_taxizone_id.astype(int), mm2.dropoff_taxizone_id.astype(int), mm2.pickup_datetime):
k2[i, j] = N
In [70]:
k2 += 1.
k2 = np.log10(k2)
In [71]:
sns.heatmap(k2)
/home/shekhar/anaconda3/lib/python3.5/site-packages/matplotlib/font_manager.py:1297: UserWarning: findfont: Font family ['sans-serif'] not found. Falling back to DejaVu Sans
(prop.get_family(), self.defaultFamily[fontext]))
Out[71]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f4535254860>
In [125]:
k2.nbytes
Out[125]:
574592
In [ ]:
In [72]:
gdf = gpd.read_file('../shapefiles/taxi_zones.shp')
In [127]:
X1 = np.array([(p.x, p.y) for p in gdf.geometry.centroid.values])
km = sklearn.cluster.KMeans(45, random_state=43)
km.fit(X1)
gdf['labels'] = km.labels_
gdf.plot(column='labels', cmap=plt.cm.gist_ncar_r)
plt.gcf().set_size_inches(7, 7)
/home/shekhar/anaconda3/lib/python3.5/site-packages/matplotlib/font_manager.py:1297: UserWarning: findfont: Font family ['sans-serif'] not found. Falling back to DejaVu Sans
(prop.get_family(), self.defaultFamily[fontext]))
In [74]:
gdf.head()
Out[74]:
LocationID
OBJECTID
Shape_Area
Shape_Leng
borough
geometry
zone
labels
0
1
1
0.000782
0.116357
EWR
POLYGON ((933100.9183527103 192536.0856972019,...
Newark Airport
30
1
2
2
0.004866
0.433470
Queens
(POLYGON ((1033269.243591294 172126.0078125, 1...
Jamaica Bay
41
2
3
3
0.000314
0.084341
Bronx
POLYGON ((1026308.769506663 256767.6975403726,...
Allerton/Pelham Gardens
26
3
4
4
0.000112
0.043567
Manhattan
POLYGON ((992073.4667968601 203714.0759887695,...
Alphabet City
15
4
5
5
0.000498
0.092146
Staten Island
POLYGON ((935843.3104932606 144283.335850656, ...
Arden Heights
2
In [75]:
gdf2 = gdf[['LocationID', 'labels']]
In [76]:
mm3 = mm2.merge(gdf2, left_on='pickup_taxizone_id', right_on='LocationID', how='left').drop('LocationID', axis=1)
In [77]:
mm3 = mm3.rename(columns={'labels': 'pickup_cluster_id'})
In [78]:
mm3
Out[78]:
pickup_taxizone_id
dropoff_taxizone_id
pickup_datetime
dropoff_datetime
trip_distance
fare_amount
trip_type
pickup_cluster_id
0
266
266
25478914
25478914
25478914
25478914
25478914
NaN
1
237
236
5694001
5694001
5694001
5694001
5694001
5.0
2
236
237
5081429
5081429
5081429
5081429
5081429
31.0
3
237
237
4815975
4815975
4815975
4815975
4815975
5.0
4
236
236
4815481
4815481
4815481
4815481
4815481
31.0
5
237
161
3059531
3059531
3059531
3059531
3059531
5.0
6
237
162
2924670
2924670
2924670
2924670
2924670
5.0
7
239
142
2873235
2873235
2873235
2873235
2873235
31.0
8
79
79
2816956
2816956
2816956
2816956
2816956
15.0
9
239
238
2718197
2718197
2718197
2718197
2718197
31.0
10
142
239
2647112
2647112
2647112
2647112
2647112
18.0
11
161
161
2586617
2586617
2586617
2586617
2586617
18.0
12
186
230
2550821
2550821
2550821
2550821
2550821
44.0
13
161
237
2527754
2527754
2527754
2527754
2527754
18.0
14
186
161
2525270
2525270
2525270
2525270
2525270
44.0
15
107
170
2503489
2503489
2503489
2503489
2503489
44.0
16
48
48
2491840
2491840
2491840
2491840
2491840
18.0
17
230
186
2460674
2460674
2460674
2460674
2460674
18.0
18
141
236
2381177
2381177
2381177
2381177
2381177
5.0
19
238
239
2363896
2363896
2363896
2363896
2363896
31.0
20
48
68
2341972
2341972
2341972
2341972
2341972
18.0
21
234
170
2326896
2326896
2326896
2326896
2326896
44.0
22
170
161
2219762
2219762
2219762
2219762
2219762
44.0
23
236
161
2174157
2174157
2174157
2174157
2174157
31.0
24
141
237
2167450
2167450
2167450
2167450
2167450
5.0
25
237
141
2145887
2145887
2145887
2145887
2145887
5.0
26
68
68
2130762
2130762
2130762
2130762
2130762
44.0
27
162
170
2126148
2126148
2126148
2126148
2126148
18.0
28
170
170
2125852
2125852
2125852
2125852
2125852
44.0
29
79
107
2116225
2116225
2116225
2116225
2116225
15.0
30
170
234
2101772
2101772
2101772
2101772
2101772
44.0
31
239
239
2086732
2086732
2086732
2086732
2086732
31.0
32
230
161
2077512
2077512
2077512
2077512
2077512
18.0
33
48
161
2065388
2065388
2065388
2065388
2065388
18.0
34
186
170
2046542
2046542
2046542
2046542
2046542
44.0
35
236
162
2034974
2034974
2034974
2034974
2034974
31.0
36
164
161
2030221
2030221
2030221
2030221
2030221
44.0
37
141
141
2013400
2013400
2013400
2013400
2013400
5.0
38
163
161
2009192
2009192
2009192
2009192
2009192
18.0
39
186
234
1986150
1986150
1986150
1986150
1986150
44.0
40
237
142
1974961
1974961
1974961
1974961
1974961
5.0
41
161
230
1972389
1972389
1972389
1972389
1972389
18.0
42
142
237
1967034
1967034
1967034
1967034
1967034
18.0
43
161
234
1947178
1947178
1947178
1947178
1947178
18.0
44
161
164
1941897
1941897
1941897
1941897
1941897
18.0
45
162
162
1939644
1939644
1939644
1939644
1939644
18.0
46
230
230
1938713
1938713
1938713
1938713
1938713
18.0
47
236
239
1937838
1937838
1937838
1937838
1937838
31.0
48
161
236
1929287
1929287
1929287
1929287
1929287
18.0
49
236
141
1911745
1911745
1911745
1911745
1911745
31.0
50
234
161
1911051
1911051
1911051
1911051
1911051
44.0
51
170
162
1904241
1904241
1904241
1904241
1904241
44.0
52
142
238
1897997
1897997
1897997
1897997
1897997
18.0
53
237
140
1891457
1891457
1891457
1891457
1891457
5.0
54
239
236
1882957
1882957
1882957
1882957
1882957
31.0
55
142
48
1878419
1878419
1878419
1878419
1878419
18.0
56
162
237
1874894
1874894
1874894
1874894
1874894
18.0
57
263
141
1869891
1869891
1869891
1869891
1869891
31.0
58
48
230
1843792
1843792
1843792
1843792
1843792
18.0
59
170
107
1836602
1836602
1836602
1836602
1836602
44.0
60
263
236
1832637
1832637
1832637
1832637
1832637
31.0
61
142
161
1811615
1811615
1811615
1811615
1811615
18.0
62
162
230
1805658
1805658
1805658
1805658
1805658
18.0
63
107
79
1803263
1803263
1803263
1803263
1803263
44.0
64
142
163
1799617
1799617
1799617
1799617
1799617
18.0
65
142
142
1795936
1795936
1795936
1795936
1795936
18.0
66
234
186
1788592
1788592
1788592
1788592
1788592
44.0
67
262
236
1786877
1786877
1786877
1786877
1786877
5.0
68
234
68
1786075
1786075
1786075
1786075
1786075
44.0
69
142
230
1762959
1762959
1762959
1762959
1762959
18.0
70
138
230
1759989
1759989
1759989
1759989
1759989
6.0
71
161
186
1756978
1756978
1756978
1756978
1756978
18.0
72
79
170
1754538
1754538
1754538
1754538
1754538
15.0
73
236
262
1754113
1754113
1754113
1754113
1754113
31.0
74
68
48
1753478
1753478
1753478
1753478
1753478
44.0
75
161
170
1747067
1747067
1747067
1747067
1747067
18.0
76
234
79
1744766
1744766
1744766
1744766
1744766
44.0
77
162
161
1742637
1742637
1742637
1742637
1742637
18.0
78
100
161
1741597
1741597
1741597
1741597
1741597
44.0
79
238
142
1735227
1735227
1735227
1735227
1735227
31.0
80
236
238
1733182
1733182
1733182
1733182
1733182
31.0
81
170
186
1715733
1715733
1715733
1715733
1715733
44.0
82
161
163
1713528
1713528
1713528
1713528
1713528
18.0
83
163
230
1683297
1683297
1683297
1683297
1683297
18.0
84
141
263
1683113
1683113
1683113
1683113
1683113
5.0
85
163
237
1653577
1653577
1653577
1653577
1653577
18.0
86
230
162
1640926
1640926
1640926
1640926
1640926
18.0
87
236
142
1631695
1631695
1631695
1631695
1631695
31.0
88
234
234
1630645
1630645
1630645
1630645
1630645
44.0
89
249
79
1623006
1623006
1623006
1623006
1623006
15.0
90
148
79
1621862
1621862
1621862
1621862
1621862
15.0
91
48
142
1621324
1621324
1621324
1621324
1621324
18.0
92
140
237
1612580
1612580
1612580
1612580
1612580
5.0
93
100
230
1603369
1603369
1603369
1603369
1603369
44.0
94
141
162
1602978
1602978
1602978
1602978
1602978
5.0
95
162
186
1597610
1597610
1597610
1597610
1597610
18.0
96
238
236
1594935
1594935
1594935
1594935
1594935
31.0
97
231
231
1594776
1594776
1594776
1594776
1594776
15.0
98
68
234
1592096
1592096
1592096
1592096
1592096
44.0
99
79
234
1591735
1591735
1591735
1591735
1591735
15.0
100
237
263
1582634
1582634
1582634
1582634
1582634
5.0
101
230
163
1567777
1567777
1567777
1567777
1567777
18.0
102
162
141
1564648
1564648
1564648
1564648
1564648
18.0
103
164
230
1563864
1563864
1563864
1563864
1563864
44.0
104
236
263
1562653
1562653
1562653
1562653
1562653
31.0
105
142
236
1561048
1561048
1561048
1561048
1561048
18.0
106
164
170
1544269
1544269
1544269
1544269
1544269
44.0
107
230
142
1541506
1541506
1541506
1541506
1541506
18.0
108
249
234
1539925
1539925
1539925
1539925
1539925
15.0
109
162
236
1526584
1526584
1526584
1526584
1526584
18.0
110
79
148
1524357
1524357
1524357
1524357
1524357
15.0
111
164
234
1515699
1515699
1515699
1515699
1515699
44.0
112
236
75
1510158
1510158
1510158
1510158
1510158
31.0
113
230
48
1507490
1507490
1507490
1507490
1507490
18.0
114
48
163
1506290
1506290
1506290
1506290
1506290
18.0
115
237
163
1505826
1505826
1505826
1505826
1505826
5.0
116
163
236
1493066
1493066
1493066
1493066
1493066
18.0
117
107
186
1491325
1491325
1491325
1491325
1491325
44.0
118
79
249
1486325
1486325
1486325
1486325
1486325
15.0
119
186
48
1482586
1482586
1482586
1482586
1482586
44.0
120
90
68
1476483
1476483
1476483
1476483
1476483
44.0
121
140
236
1474445
1474445
1474445
1474445
1474445
5.0
122
107
107
1474415
1474415
1474415
1474415
1474415
44.0
123
68
246
1470137
1470137
1470137
1470137
1470137
44.0
124
170
230
1458922
1458922
1458922
1458922
1458922
44.0
125
132
132
1456910
1456910
1456910
1456910
1456910
40.0
126
161
162
1453831
1453831
1453831
1453831
1453831
18.0
127
234
230
1442893
1442893
1442893
1442893
1442893
44.0
128
162
107
1437080
1437080
1437080
1437080
1437080
18.0
129
141
161
1432747
1432747
1432747
1432747
1432747
5.0
130
162
234
1425901
1425901
1425901
1425901
1425901
18.0
131
107
234
1425472
1425472
1425472
1425472
1425472
44.0
132
186
162
1422269
1422269
1422269
1422269
1422269
44.0
133
238
238
1421285
1421285
1421285
1421285
1421285
31.0
134
234
249
1419052
1419052
1419052
1419052
1419052
44.0
135
163
142
1418020
1418020
1418020
1418020
1418020
18.0
136
48
186
1411914
1411914
1411914
1411914
1411914
18.0
137
163
162
1409327
1409327
1409327
1409327
1409327
18.0
138
170
79
1407257
1407257
1407257
1407257
1407257
44.0
139
113
79
1392544
1392544
1392544
1392544
1392544
15.0
140
107
162
1392319
1392319
1392319
1392319
1392319
44.0
141
229
141
1386250
1386250
1386250
1386250
1386250
5.0
142
230
170
1382575
1382575
1382575
1382575
1382575
18.0
143
43
43
1376527
1376527
1376527
1376527
1376527
31.0
144
230
100
1368431
1368431
1368431
1368431
1368431
18.0
145
230
138
1365237
1365237
1365237
1365237
1365237
18.0
146
79
114
1358877
1358877
1358877
1358877
1358877
15.0
147
138
162
1353284
1353284
1353284
1353284
1353284
6.0
148
162
164
1343218
1343218
1343218
1343218
1343218
18.0
149
79
186
1336524
1336524
1336524
1336524
1336524
15.0
...
...
...
...
...
...
...
...
...
61099
214
87
1
1
1
1
1
35.0
61100
35
59
1
1
1
1
1
29.0
61101
121
153
1
1
1
1
1
11.0
61102
156
207
1
1
1
1
1
24.0
61103
167
64
1
1
1
1
1
28.0
61104
184
258
1
1
1
1
1
38.0
61105
165
176
1
1
1
1
1
17.0
61106
139
182
1
1
1
1
1
3.0
61107
126
11
1
1
1
1
1
28.0
61108
94
207
1
1
1
1
1
13.0
61109
81
134
1
1
1
1
1
26.0
61110
60
27
1
1
1
1
1
25.0
61111
58
196
1
1
1
1
1
38.0
61112
58
146
1
1
1
1
1
38.0
61113
8
206
1
1
1
1
1
21.0
61114
21
9
1
1
1
1
1
4.0
61115
9
182
1
1
1
1
1
27.0
61116
1
167
1
1
1
1
1
30.0
61117
259
135
1
1
1
1
1
26.0
61118
242
106
1
1
1
1
1
25.0
61119
30
186
1
1
1
1
1
41.0
61120
207
108
1
1
1
1
1
21.0
61121
200
92
1
1
1
1
1
13.0
61122
26
3
1
1
1
1
1
9.0
61123
21
34
1
1
1
1
1
4.0
61124
18
133
1
1
1
1
1
13.0
61125
98
3
1
1
1
1
1
32.0
61126
235
221
1
1
1
1
1
7.0
61127
222
18
1
1
1
1
1
29.0
61128
206
235
1
1
1
1
1
8.0
61129
190
84
1
1
1
1
1
9.0
61130
178
94
1
1
1
1
1
4.0
61131
58
97
1
1
1
1
1
38.0
61132
123
34
1
1
1
1
1
4.0
61133
122
91
1
1
1
1
1
32.0
61134
120
8
1
1
1
1
1
7.0
61135
171
34
1
1
1
1
1
27.0
61136
60
257
1
1
1
1
1
25.0
61137
59
82
1
1
1
1
1
7.0
61138
187
129
1
1
1
1
1
24.0
61139
51
22
1
1
1
1
1
26.0
61140
14
153
1
1
1
1
1
34.0
61141
206
169
1
1
1
1
1
8.0
61142
201
237
1
1
1
1
1
10.0
61143
99
232
1
1
1
1
1
2.0
61144
38
213
1
1
1
1
1
3.0
61145
241
19
1
1
1
1
1
13.0
61146
44
137
1
1
1
1
1
43.0
61147
122
222
1
1
1
1
1
32.0
61148
196
172
1
1
1
1
1
12.0
61149
171
29
1
1
1
1
1
27.0
61150
111
193
1
1
1
1
1
9.0
61151
96
245
1
1
1
1
1
12.0
61152
73
78
1
1
1
1
1
27.0
61153
206
177
1
1
1
1
1
8.0
61154
251
178
1
1
1
1
1
8.0
61155
34
215
1
1
1
1
1
0.0
61156
26
73
1
1
1
1
1
9.0
61157
18
22
1
1
1
1
1
13.0
61158
2
195
1
1
1
1
1
41.0
61159
6
243
1
1
1
1
1
35.0
61160
27
145
1
1
1
1
1
33.0
61161
41
103
1
1
1
1
1
42.0
61162
59
10
1
1
1
1
1
7.0
61163
64
52
1
1
1
1
1
14.0
61164
185
39
1
1
1
1
1
26.0
61165
192
60
1
1
1
1
1
11.0
61166
202
206
1
1
1
1
1
5.0
61167
69
22
1
1
1
1
1
7.0
61168
253
240
1
1
1
1
1
6.0
61169
242
19
1
1
1
1
1
25.0
61170
81
49
1
1
1
1
1
26.0
61171
31
210
1
1
1
1
1
13.0
61172
254
91
1
1
1
1
1
26.0
61173
180
12
1
1
1
1
1
29.0
61174
27
76
1
1
1
1
1
33.0
61175
254
64
1
1
1
1
1
26.0
61176
153
139
1
1
1
1
1
13.0
61177
149
169
1
1
1
1
1
4.0
61178
149
32
1
1
1
1
1
4.0
61179
98
150
1
1
1
1
1
32.0
61180
71
147
1
1
1
1
1
17.0
61181
47
118
1
1
1
1
1
7.0
61182
241
73
1
1
1
1
1
13.0
61183
222
247
1
1
1
1
1
29.0
61184
109
142
1
1
1
1
1
36.0
61185
51
222
1
1
1
1
1
26.0
61186
69
111
1
1
1
1
1
7.0
61187
119
214
1
1
1
1
1
7.0
61188
86
83
1
1
1
1
1
22.0
61189
108
185
1
1
1
1
1
4.0
61190
98
217
1
1
1
1
1
32.0
61191
94
217
1
1
1
1
1
13.0
61192
63
194
1
1
1
1
1
12.0
61193
59
97
1
1
1
1
1
7.0
61194
34
60
1
1
1
1
1
0.0
61195
29
172
1
1
1
1
1
4.0
61196
84
133
1
1
1
1
1
43.0
61197
122
21
1
1
1
1
1
32.0
61198
44
210
1
1
1
1
1
43.0
61199
22
212
1
1
1
1
1
34.0
61200
187
50
1
1
1
1
1
24.0
61201
221
91
1
1
1
1
1
35.0
61202
157
206
1
1
1
1
1
37.0
61203
123
254
1
1
1
1
1
4.0
61204
102
153
1
1
1
1
1
12.0
61205
98
147
1
1
1
1
1
32.0
61206
156
136
1
1
1
1
1
24.0
61207
118
250
1
1
1
1
1
19.0
61208
221
53
1
1
1
1
1
35.0
61209
167
98
1
1
1
1
1
28.0
61210
92
109
1
1
1
1
1
6.0
61211
78
96
1
1
1
1
1
25.0
61212
76
8
1
1
1
1
1
29.0
61213
243
199
1
1
1
1
1
7.0
61214
245
42
1
1
1
1
1
8.0
61215
15
31
1
1
1
1
1
27.0
61216
254
207
1
1
1
1
1
26.0
61217
10
44
1
1
1
1
1
23.0
61218
240
202
1
1
1
1
1
13.0
61219
226
44
1
1
1
1
1
37.0
61220
202
101
1
1
1
1
1
5.0
61221
39
101
1
1
1
1
1
29.0
61222
51
209
1
1
1
1
1
26.0
61223
18
190
1
1
1
1
1
13.0
61224
6
24
1
1
1
1
1
35.0
61225
139
169
1
1
1
1
1
3.0
61226
240
53
1
1
1
1
1
13.0
61227
227
120
1
1
1
1
1
9.0
61228
225
109
1
1
1
1
1
16.0
61229
250
96
1
1
1
1
1
25.0
61230
203
183
1
1
1
1
1
3.0
61231
69
149
1
1
1
1
1
7.0
61232
44
22
1
1
1
1
1
43.0
61233
156
134
1
1
1
1
1
24.0
61234
117
1
1
1
1
1
1
22.0
61235
175
178
1
1
1
1
1
14.0
61236
193
44
1
1
1
1
1
5.0
61237
187
225
1
1
1
1
1
24.0
61238
165
175
1
1
1
1
1
17.0
61239
175
190
1
1
1
1
1
14.0
61240
97
99
1
1
1
1
1
20.0
61241
32
91
1
1
1
1
1
26.0
61242
21
115
1
1
1
1
1
4.0
61243
31
8
1
1
1
1
1
13.0
61244
19
169
1
1
1
1
1
14.0
61245
11
93
1
1
1
1
1
34.0
61246
1
47
1
1
1
1
1
30.0
61247
259
196
1
1
1
1
1
26.0
61248
265
2
1
1
1
1
1
NaN
61249 rows × 8 columns
In [79]:
mm3 = mm3.merge(gdf2, left_on='dropoff_taxizone_id', right_on='LocationID', how='left').drop('LocationID', axis=1)
mm3 = mm3.rename(columns={'labels': 'dropoff_cluster_id'})
In [81]:
mm4 = mm3.fillna(45.)
In [93]:
mm5 = (mm4['pickup_cluster_id dropoff_cluster_id pickup_datetime'.split()]
.groupby(['pickup_cluster_id', 'dropoff_cluster_id']).sum().reset_index())
In [94]:
mm5.pickup_cluster_id = mm5.pickup_cluster_id.astype(int)
mm5.dropoff_cluster_id = mm5.dropoff_cluster_id.astype(int)
In [96]:
mm5.head()
Out[96]:
pickup_cluster_id
dropoff_cluster_id
pickup_datetime
0
0
0
4851108
1
0
1
1886
2
0
2
77
3
0
3
1882
4
0
4
16032
In [95]:
k3 = np.zeros((46, 46))
In [98]:
for i, j, N in zip(mm5.pickup_cluster_id.astype(int), mm5.dropoff_cluster_id.astype(int), mm5.pickup_datetime):
k3[i, j] = N
In [99]:
k3 += 1.
k3 = np.log10(k3)
In [126]:
sns.heatmap(k3)
plt.gcf().set_size_inches(8, 8)
/home/shekhar/anaconda3/lib/python3.5/site-packages/matplotlib/font_manager.py:1297: UserWarning: findfont: Font family ['sans-serif'] not found. Falling back to DejaVu Sans
(prop.get_family(), self.defaultFamily[fontext]))
In [110]:
gdf['l'] = gdf['labels']
In [123]:
gdf[gdf.l == 18]
Out[123]:
LocationID
OBJECTID
Shape_Area
Shape_Leng
borough
geometry
zone
labels
l
47
48
48
0.000094
0.043747
Manhattan
POLYGON ((986694.3125067502 214463.8461607993,...
Clinton East
18
18
49
50
50
0.000173
0.055748
Manhattan
POLYGON ((985170.3721923977 221087.3887939602,...
Clinton West
18
18
141
142
142
0.000076
0.038176
Manhattan
POLYGON ((989380.3045081049 218980.2473062277,...
Lincoln Square East
18
18
142
143
143
0.000151
0.054180
Manhattan
POLYGON ((989338.1001118571 223572.2528185844,...
Lincoln Square West
18
18
160
161
161
0.000072
0.035804
Manhattan
POLYGON ((991081.0260630846 214453.6983589679,...
Midtown Center
18
18
161
162
162
0.000048
0.035270
Manhattan
POLYGON ((992224.354090333 214415.2926926613, ...
Midtown East
18
18
162
163
163
0.000041
0.034177
Manhattan
POLYGON ((989412.6634775698 219020.9428979903,...
Midtown North
18
18
229
230
230
0.000056
0.031028
Manhattan
POLYGON ((988786.8773103654 214532.0940539986,...
Times Sq/Theatre District
18
18
In [ ]:
Content source: r-shekhar/NYC-transport
Similar notebooks: