In [34]:
%matplotlib inline
%config InlineBackend.figure_format='retina'

import dask.dataframe as dd
import dask.distributed
import numpy as np
import pandas as pd
import geopandas as gpd

from matplotlib.colors import SymLogNorm as symlog
from matplotlib import rcParams

import sklearn, sklearn.cluster
import matplotlib.pyplot as plt
import palettable

import seaborn as sns

pd.options.display.max_rows = 300
pd.options.display.max_columns = 100

In [2]:
client = dask.distributed.Client()

In [3]:
trips = dd.read_parquet('/data/all_trips.parquet', engine='fastparquet', index='trip_id',
                       columns=['pickup_datetime', 'dropoff_datetime', 
                                'pickup_taxizone_id', 'dropoff_taxizone_id',
                                'trip_distance', 'fare_amount', 'trip_type'
                               ])


/home/shekhar/anaconda3/lib/python3.5/site-packages/fastparquet/api.py:436: UserWarning: Regression warning: found category spec from fastparquet <= 0.0.6
  warnings.warn('Regression warning: found category spec from '

In [4]:
trips.head()


Out[4]:
pickup_datetime dropoff_datetime pickup_taxizone_id dropoff_taxizone_id trip_distance fare_amount trip_type
trip_id
0 2009-01-01 00:00:00 2009-01-01 00:04:12 237.0 263.0 1.3 5.800000 yellow
1 2009-01-01 00:00:00 2009-01-01 00:05:03 114.0 249.0 0.9 5.400000 yellow
2 2009-01-01 00:00:02 2009-01-01 00:05:40 237.0 43.0 1.0 5.800000 yellow
3 2009-01-01 00:00:04 2009-01-01 00:03:08 261.0 261.0 0.8 4.600000 yellow
4 2009-01-01 00:00:07 2009-01-01 00:19:01 144.0 80.0 5.5 27.799999 yellow

In [7]:
trips.pickup_taxizone_id = trips.pickup_taxizone_id.fillna(266)
trips.dropoff_taxizone_id = trips.dropoff_taxizone_id.fillna(266)
trips = trips[trips.trip_type != 'uber']

In [8]:
mm = trips.groupby(['pickup_taxizone_id', 'dropoff_taxizone_id']).count().compute()

In [65]:
mm2 = mm.reset_index().sort_values('pickup_datetime', ascending=False)
# mm2 = mm2[mm2.pickup_datetime > 500]

In [66]:
mm2.pickup_taxizone_id = mm2.pickup_taxizone_id.astype(int)
mm2.dropoff_taxizone_id = mm2.dropoff_taxizone_id.astype(int)
mm2.head()


Out[66]:
pickup_taxizone_id dropoff_taxizone_id pickup_datetime dropoff_datetime trip_distance fare_amount trip_type
18445 266 266 25478914 25478914 25478914 25478914 25478914
16053 237 236 5694001 5694001 5694001 5694001 5694001
15857 236 237 5081429 5081429 5081429 5081429 5081429
16054 237 237 4815975 4815975 4815975 4815975 4815975
15856 236 236 4815481 4815481 4815481 4815481 4815481

In [67]:
k2 = np.zeros((268, 268))

In [69]:
for i, j, N in zip(mm2.pickup_taxizone_id.astype(int), mm2.dropoff_taxizone_id.astype(int), mm2.pickup_datetime):
    k2[i, j] = N

In [70]:
k2 += 1.
k2 = np.log10(k2)

In [71]:
sns.heatmap(k2)


/home/shekhar/anaconda3/lib/python3.5/site-packages/matplotlib/font_manager.py:1297: UserWarning: findfont: Font family ['sans-serif'] not found. Falling back to DejaVu Sans
  (prop.get_family(), self.defaultFamily[fontext]))
Out[71]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f4535254860>

In [125]:
k2.nbytes


Out[125]:
574592

In [ ]:


In [72]:
gdf = gpd.read_file('../shapefiles/taxi_zones.shp')

In [127]:
X1 = np.array([(p.x, p.y) for p in gdf.geometry.centroid.values])
km = sklearn.cluster.KMeans(45, random_state=43)
km.fit(X1)
gdf['labels'] = km.labels_
gdf.plot(column='labels', cmap=plt.cm.gist_ncar_r)
plt.gcf().set_size_inches(7, 7)


/home/shekhar/anaconda3/lib/python3.5/site-packages/matplotlib/font_manager.py:1297: UserWarning: findfont: Font family ['sans-serif'] not found. Falling back to DejaVu Sans
  (prop.get_family(), self.defaultFamily[fontext]))

In [74]:
gdf.head()


Out[74]:
LocationID OBJECTID Shape_Area Shape_Leng borough geometry zone labels
0 1 1 0.000782 0.116357 EWR POLYGON ((933100.9183527103 192536.0856972019,... Newark Airport 30
1 2 2 0.004866 0.433470 Queens (POLYGON ((1033269.243591294 172126.0078125, 1... Jamaica Bay 41
2 3 3 0.000314 0.084341 Bronx POLYGON ((1026308.769506663 256767.6975403726,... Allerton/Pelham Gardens 26
3 4 4 0.000112 0.043567 Manhattan POLYGON ((992073.4667968601 203714.0759887695,... Alphabet City 15
4 5 5 0.000498 0.092146 Staten Island POLYGON ((935843.3104932606 144283.335850656, ... Arden Heights 2

In [75]:
gdf2 = gdf[['LocationID', 'labels']]

In [76]:
mm3 = mm2.merge(gdf2, left_on='pickup_taxizone_id', right_on='LocationID', how='left').drop('LocationID', axis=1)

In [77]:
mm3 = mm3.rename(columns={'labels': 'pickup_cluster_id'})

In [78]:
mm3


Out[78]:
pickup_taxizone_id dropoff_taxizone_id pickup_datetime dropoff_datetime trip_distance fare_amount trip_type pickup_cluster_id
0 266 266 25478914 25478914 25478914 25478914 25478914 NaN
1 237 236 5694001 5694001 5694001 5694001 5694001 5.0
2 236 237 5081429 5081429 5081429 5081429 5081429 31.0
3 237 237 4815975 4815975 4815975 4815975 4815975 5.0
4 236 236 4815481 4815481 4815481 4815481 4815481 31.0
5 237 161 3059531 3059531 3059531 3059531 3059531 5.0
6 237 162 2924670 2924670 2924670 2924670 2924670 5.0
7 239 142 2873235 2873235 2873235 2873235 2873235 31.0
8 79 79 2816956 2816956 2816956 2816956 2816956 15.0
9 239 238 2718197 2718197 2718197 2718197 2718197 31.0
10 142 239 2647112 2647112 2647112 2647112 2647112 18.0
11 161 161 2586617 2586617 2586617 2586617 2586617 18.0
12 186 230 2550821 2550821 2550821 2550821 2550821 44.0
13 161 237 2527754 2527754 2527754 2527754 2527754 18.0
14 186 161 2525270 2525270 2525270 2525270 2525270 44.0
15 107 170 2503489 2503489 2503489 2503489 2503489 44.0
16 48 48 2491840 2491840 2491840 2491840 2491840 18.0
17 230 186 2460674 2460674 2460674 2460674 2460674 18.0
18 141 236 2381177 2381177 2381177 2381177 2381177 5.0
19 238 239 2363896 2363896 2363896 2363896 2363896 31.0
20 48 68 2341972 2341972 2341972 2341972 2341972 18.0
21 234 170 2326896 2326896 2326896 2326896 2326896 44.0
22 170 161 2219762 2219762 2219762 2219762 2219762 44.0
23 236 161 2174157 2174157 2174157 2174157 2174157 31.0
24 141 237 2167450 2167450 2167450 2167450 2167450 5.0
25 237 141 2145887 2145887 2145887 2145887 2145887 5.0
26 68 68 2130762 2130762 2130762 2130762 2130762 44.0
27 162 170 2126148 2126148 2126148 2126148 2126148 18.0
28 170 170 2125852 2125852 2125852 2125852 2125852 44.0
29 79 107 2116225 2116225 2116225 2116225 2116225 15.0
30 170 234 2101772 2101772 2101772 2101772 2101772 44.0
31 239 239 2086732 2086732 2086732 2086732 2086732 31.0
32 230 161 2077512 2077512 2077512 2077512 2077512 18.0
33 48 161 2065388 2065388 2065388 2065388 2065388 18.0
34 186 170 2046542 2046542 2046542 2046542 2046542 44.0
35 236 162 2034974 2034974 2034974 2034974 2034974 31.0
36 164 161 2030221 2030221 2030221 2030221 2030221 44.0
37 141 141 2013400 2013400 2013400 2013400 2013400 5.0
38 163 161 2009192 2009192 2009192 2009192 2009192 18.0
39 186 234 1986150 1986150 1986150 1986150 1986150 44.0
40 237 142 1974961 1974961 1974961 1974961 1974961 5.0
41 161 230 1972389 1972389 1972389 1972389 1972389 18.0
42 142 237 1967034 1967034 1967034 1967034 1967034 18.0
43 161 234 1947178 1947178 1947178 1947178 1947178 18.0
44 161 164 1941897 1941897 1941897 1941897 1941897 18.0
45 162 162 1939644 1939644 1939644 1939644 1939644 18.0
46 230 230 1938713 1938713 1938713 1938713 1938713 18.0
47 236 239 1937838 1937838 1937838 1937838 1937838 31.0
48 161 236 1929287 1929287 1929287 1929287 1929287 18.0
49 236 141 1911745 1911745 1911745 1911745 1911745 31.0
50 234 161 1911051 1911051 1911051 1911051 1911051 44.0
51 170 162 1904241 1904241 1904241 1904241 1904241 44.0
52 142 238 1897997 1897997 1897997 1897997 1897997 18.0
53 237 140 1891457 1891457 1891457 1891457 1891457 5.0
54 239 236 1882957 1882957 1882957 1882957 1882957 31.0
55 142 48 1878419 1878419 1878419 1878419 1878419 18.0
56 162 237 1874894 1874894 1874894 1874894 1874894 18.0
57 263 141 1869891 1869891 1869891 1869891 1869891 31.0
58 48 230 1843792 1843792 1843792 1843792 1843792 18.0
59 170 107 1836602 1836602 1836602 1836602 1836602 44.0
60 263 236 1832637 1832637 1832637 1832637 1832637 31.0
61 142 161 1811615 1811615 1811615 1811615 1811615 18.0
62 162 230 1805658 1805658 1805658 1805658 1805658 18.0
63 107 79 1803263 1803263 1803263 1803263 1803263 44.0
64 142 163 1799617 1799617 1799617 1799617 1799617 18.0
65 142 142 1795936 1795936 1795936 1795936 1795936 18.0
66 234 186 1788592 1788592 1788592 1788592 1788592 44.0
67 262 236 1786877 1786877 1786877 1786877 1786877 5.0
68 234 68 1786075 1786075 1786075 1786075 1786075 44.0
69 142 230 1762959 1762959 1762959 1762959 1762959 18.0
70 138 230 1759989 1759989 1759989 1759989 1759989 6.0
71 161 186 1756978 1756978 1756978 1756978 1756978 18.0
72 79 170 1754538 1754538 1754538 1754538 1754538 15.0
73 236 262 1754113 1754113 1754113 1754113 1754113 31.0
74 68 48 1753478 1753478 1753478 1753478 1753478 44.0
75 161 170 1747067 1747067 1747067 1747067 1747067 18.0
76 234 79 1744766 1744766 1744766 1744766 1744766 44.0
77 162 161 1742637 1742637 1742637 1742637 1742637 18.0
78 100 161 1741597 1741597 1741597 1741597 1741597 44.0
79 238 142 1735227 1735227 1735227 1735227 1735227 31.0
80 236 238 1733182 1733182 1733182 1733182 1733182 31.0
81 170 186 1715733 1715733 1715733 1715733 1715733 44.0
82 161 163 1713528 1713528 1713528 1713528 1713528 18.0
83 163 230 1683297 1683297 1683297 1683297 1683297 18.0
84 141 263 1683113 1683113 1683113 1683113 1683113 5.0
85 163 237 1653577 1653577 1653577 1653577 1653577 18.0
86 230 162 1640926 1640926 1640926 1640926 1640926 18.0
87 236 142 1631695 1631695 1631695 1631695 1631695 31.0
88 234 234 1630645 1630645 1630645 1630645 1630645 44.0
89 249 79 1623006 1623006 1623006 1623006 1623006 15.0
90 148 79 1621862 1621862 1621862 1621862 1621862 15.0
91 48 142 1621324 1621324 1621324 1621324 1621324 18.0
92 140 237 1612580 1612580 1612580 1612580 1612580 5.0
93 100 230 1603369 1603369 1603369 1603369 1603369 44.0
94 141 162 1602978 1602978 1602978 1602978 1602978 5.0
95 162 186 1597610 1597610 1597610 1597610 1597610 18.0
96 238 236 1594935 1594935 1594935 1594935 1594935 31.0
97 231 231 1594776 1594776 1594776 1594776 1594776 15.0
98 68 234 1592096 1592096 1592096 1592096 1592096 44.0
99 79 234 1591735 1591735 1591735 1591735 1591735 15.0
100 237 263 1582634 1582634 1582634 1582634 1582634 5.0
101 230 163 1567777 1567777 1567777 1567777 1567777 18.0
102 162 141 1564648 1564648 1564648 1564648 1564648 18.0
103 164 230 1563864 1563864 1563864 1563864 1563864 44.0
104 236 263 1562653 1562653 1562653 1562653 1562653 31.0
105 142 236 1561048 1561048 1561048 1561048 1561048 18.0
106 164 170 1544269 1544269 1544269 1544269 1544269 44.0
107 230 142 1541506 1541506 1541506 1541506 1541506 18.0
108 249 234 1539925 1539925 1539925 1539925 1539925 15.0
109 162 236 1526584 1526584 1526584 1526584 1526584 18.0
110 79 148 1524357 1524357 1524357 1524357 1524357 15.0
111 164 234 1515699 1515699 1515699 1515699 1515699 44.0
112 236 75 1510158 1510158 1510158 1510158 1510158 31.0
113 230 48 1507490 1507490 1507490 1507490 1507490 18.0
114 48 163 1506290 1506290 1506290 1506290 1506290 18.0
115 237 163 1505826 1505826 1505826 1505826 1505826 5.0
116 163 236 1493066 1493066 1493066 1493066 1493066 18.0
117 107 186 1491325 1491325 1491325 1491325 1491325 44.0
118 79 249 1486325 1486325 1486325 1486325 1486325 15.0
119 186 48 1482586 1482586 1482586 1482586 1482586 44.0
120 90 68 1476483 1476483 1476483 1476483 1476483 44.0
121 140 236 1474445 1474445 1474445 1474445 1474445 5.0
122 107 107 1474415 1474415 1474415 1474415 1474415 44.0
123 68 246 1470137 1470137 1470137 1470137 1470137 44.0
124 170 230 1458922 1458922 1458922 1458922 1458922 44.0
125 132 132 1456910 1456910 1456910 1456910 1456910 40.0
126 161 162 1453831 1453831 1453831 1453831 1453831 18.0
127 234 230 1442893 1442893 1442893 1442893 1442893 44.0
128 162 107 1437080 1437080 1437080 1437080 1437080 18.0
129 141 161 1432747 1432747 1432747 1432747 1432747 5.0
130 162 234 1425901 1425901 1425901 1425901 1425901 18.0
131 107 234 1425472 1425472 1425472 1425472 1425472 44.0
132 186 162 1422269 1422269 1422269 1422269 1422269 44.0
133 238 238 1421285 1421285 1421285 1421285 1421285 31.0
134 234 249 1419052 1419052 1419052 1419052 1419052 44.0
135 163 142 1418020 1418020 1418020 1418020 1418020 18.0
136 48 186 1411914 1411914 1411914 1411914 1411914 18.0
137 163 162 1409327 1409327 1409327 1409327 1409327 18.0
138 170 79 1407257 1407257 1407257 1407257 1407257 44.0
139 113 79 1392544 1392544 1392544 1392544 1392544 15.0
140 107 162 1392319 1392319 1392319 1392319 1392319 44.0
141 229 141 1386250 1386250 1386250 1386250 1386250 5.0
142 230 170 1382575 1382575 1382575 1382575 1382575 18.0
143 43 43 1376527 1376527 1376527 1376527 1376527 31.0
144 230 100 1368431 1368431 1368431 1368431 1368431 18.0
145 230 138 1365237 1365237 1365237 1365237 1365237 18.0
146 79 114 1358877 1358877 1358877 1358877 1358877 15.0
147 138 162 1353284 1353284 1353284 1353284 1353284 6.0
148 162 164 1343218 1343218 1343218 1343218 1343218 18.0
149 79 186 1336524 1336524 1336524 1336524 1336524 15.0
... ... ... ... ... ... ... ... ...
61099 214 87 1 1 1 1 1 35.0
61100 35 59 1 1 1 1 1 29.0
61101 121 153 1 1 1 1 1 11.0
61102 156 207 1 1 1 1 1 24.0
61103 167 64 1 1 1 1 1 28.0
61104 184 258 1 1 1 1 1 38.0
61105 165 176 1 1 1 1 1 17.0
61106 139 182 1 1 1 1 1 3.0
61107 126 11 1 1 1 1 1 28.0
61108 94 207 1 1 1 1 1 13.0
61109 81 134 1 1 1 1 1 26.0
61110 60 27 1 1 1 1 1 25.0
61111 58 196 1 1 1 1 1 38.0
61112 58 146 1 1 1 1 1 38.0
61113 8 206 1 1 1 1 1 21.0
61114 21 9 1 1 1 1 1 4.0
61115 9 182 1 1 1 1 1 27.0
61116 1 167 1 1 1 1 1 30.0
61117 259 135 1 1 1 1 1 26.0
61118 242 106 1 1 1 1 1 25.0
61119 30 186 1 1 1 1 1 41.0
61120 207 108 1 1 1 1 1 21.0
61121 200 92 1 1 1 1 1 13.0
61122 26 3 1 1 1 1 1 9.0
61123 21 34 1 1 1 1 1 4.0
61124 18 133 1 1 1 1 1 13.0
61125 98 3 1 1 1 1 1 32.0
61126 235 221 1 1 1 1 1 7.0
61127 222 18 1 1 1 1 1 29.0
61128 206 235 1 1 1 1 1 8.0
61129 190 84 1 1 1 1 1 9.0
61130 178 94 1 1 1 1 1 4.0
61131 58 97 1 1 1 1 1 38.0
61132 123 34 1 1 1 1 1 4.0
61133 122 91 1 1 1 1 1 32.0
61134 120 8 1 1 1 1 1 7.0
61135 171 34 1 1 1 1 1 27.0
61136 60 257 1 1 1 1 1 25.0
61137 59 82 1 1 1 1 1 7.0
61138 187 129 1 1 1 1 1 24.0
61139 51 22 1 1 1 1 1 26.0
61140 14 153 1 1 1 1 1 34.0
61141 206 169 1 1 1 1 1 8.0
61142 201 237 1 1 1 1 1 10.0
61143 99 232 1 1 1 1 1 2.0
61144 38 213 1 1 1 1 1 3.0
61145 241 19 1 1 1 1 1 13.0
61146 44 137 1 1 1 1 1 43.0
61147 122 222 1 1 1 1 1 32.0
61148 196 172 1 1 1 1 1 12.0
61149 171 29 1 1 1 1 1 27.0
61150 111 193 1 1 1 1 1 9.0
61151 96 245 1 1 1 1 1 12.0
61152 73 78 1 1 1 1 1 27.0
61153 206 177 1 1 1 1 1 8.0
61154 251 178 1 1 1 1 1 8.0
61155 34 215 1 1 1 1 1 0.0
61156 26 73 1 1 1 1 1 9.0
61157 18 22 1 1 1 1 1 13.0
61158 2 195 1 1 1 1 1 41.0
61159 6 243 1 1 1 1 1 35.0
61160 27 145 1 1 1 1 1 33.0
61161 41 103 1 1 1 1 1 42.0
61162 59 10 1 1 1 1 1 7.0
61163 64 52 1 1 1 1 1 14.0
61164 185 39 1 1 1 1 1 26.0
61165 192 60 1 1 1 1 1 11.0
61166 202 206 1 1 1 1 1 5.0
61167 69 22 1 1 1 1 1 7.0
61168 253 240 1 1 1 1 1 6.0
61169 242 19 1 1 1 1 1 25.0
61170 81 49 1 1 1 1 1 26.0
61171 31 210 1 1 1 1 1 13.0
61172 254 91 1 1 1 1 1 26.0
61173 180 12 1 1 1 1 1 29.0
61174 27 76 1 1 1 1 1 33.0
61175 254 64 1 1 1 1 1 26.0
61176 153 139 1 1 1 1 1 13.0
61177 149 169 1 1 1 1 1 4.0
61178 149 32 1 1 1 1 1 4.0
61179 98 150 1 1 1 1 1 32.0
61180 71 147 1 1 1 1 1 17.0
61181 47 118 1 1 1 1 1 7.0
61182 241 73 1 1 1 1 1 13.0
61183 222 247 1 1 1 1 1 29.0
61184 109 142 1 1 1 1 1 36.0
61185 51 222 1 1 1 1 1 26.0
61186 69 111 1 1 1 1 1 7.0
61187 119 214 1 1 1 1 1 7.0
61188 86 83 1 1 1 1 1 22.0
61189 108 185 1 1 1 1 1 4.0
61190 98 217 1 1 1 1 1 32.0
61191 94 217 1 1 1 1 1 13.0
61192 63 194 1 1 1 1 1 12.0
61193 59 97 1 1 1 1 1 7.0
61194 34 60 1 1 1 1 1 0.0
61195 29 172 1 1 1 1 1 4.0
61196 84 133 1 1 1 1 1 43.0
61197 122 21 1 1 1 1 1 32.0
61198 44 210 1 1 1 1 1 43.0
61199 22 212 1 1 1 1 1 34.0
61200 187 50 1 1 1 1 1 24.0
61201 221 91 1 1 1 1 1 35.0
61202 157 206 1 1 1 1 1 37.0
61203 123 254 1 1 1 1 1 4.0
61204 102 153 1 1 1 1 1 12.0
61205 98 147 1 1 1 1 1 32.0
61206 156 136 1 1 1 1 1 24.0
61207 118 250 1 1 1 1 1 19.0
61208 221 53 1 1 1 1 1 35.0
61209 167 98 1 1 1 1 1 28.0
61210 92 109 1 1 1 1 1 6.0
61211 78 96 1 1 1 1 1 25.0
61212 76 8 1 1 1 1 1 29.0
61213 243 199 1 1 1 1 1 7.0
61214 245 42 1 1 1 1 1 8.0
61215 15 31 1 1 1 1 1 27.0
61216 254 207 1 1 1 1 1 26.0
61217 10 44 1 1 1 1 1 23.0
61218 240 202 1 1 1 1 1 13.0
61219 226 44 1 1 1 1 1 37.0
61220 202 101 1 1 1 1 1 5.0
61221 39 101 1 1 1 1 1 29.0
61222 51 209 1 1 1 1 1 26.0
61223 18 190 1 1 1 1 1 13.0
61224 6 24 1 1 1 1 1 35.0
61225 139 169 1 1 1 1 1 3.0
61226 240 53 1 1 1 1 1 13.0
61227 227 120 1 1 1 1 1 9.0
61228 225 109 1 1 1 1 1 16.0
61229 250 96 1 1 1 1 1 25.0
61230 203 183 1 1 1 1 1 3.0
61231 69 149 1 1 1 1 1 7.0
61232 44 22 1 1 1 1 1 43.0
61233 156 134 1 1 1 1 1 24.0
61234 117 1 1 1 1 1 1 22.0
61235 175 178 1 1 1 1 1 14.0
61236 193 44 1 1 1 1 1 5.0
61237 187 225 1 1 1 1 1 24.0
61238 165 175 1 1 1 1 1 17.0
61239 175 190 1 1 1 1 1 14.0
61240 97 99 1 1 1 1 1 20.0
61241 32 91 1 1 1 1 1 26.0
61242 21 115 1 1 1 1 1 4.0
61243 31 8 1 1 1 1 1 13.0
61244 19 169 1 1 1 1 1 14.0
61245 11 93 1 1 1 1 1 34.0
61246 1 47 1 1 1 1 1 30.0
61247 259 196 1 1 1 1 1 26.0
61248 265 2 1 1 1 1 1 NaN

61249 rows × 8 columns


In [79]:
mm3 = mm3.merge(gdf2, left_on='dropoff_taxizone_id', right_on='LocationID', how='left').drop('LocationID', axis=1)
mm3 = mm3.rename(columns={'labels': 'dropoff_cluster_id'})

In [81]:
mm4 = mm3.fillna(45.)

In [93]:
mm5 = (mm4['pickup_cluster_id dropoff_cluster_id pickup_datetime'.split()] 
    .groupby(['pickup_cluster_id', 'dropoff_cluster_id']).sum().reset_index())

In [94]:
mm5.pickup_cluster_id = mm5.pickup_cluster_id.astype(int)
mm5.dropoff_cluster_id = mm5.dropoff_cluster_id.astype(int)

In [96]:
mm5.head()


Out[96]:
pickup_cluster_id dropoff_cluster_id pickup_datetime
0 0 0 4851108
1 0 1 1886
2 0 2 77
3 0 3 1882
4 0 4 16032

In [95]:
k3 = np.zeros((46, 46))

In [98]:
for i, j, N in zip(mm5.pickup_cluster_id.astype(int), mm5.dropoff_cluster_id.astype(int), mm5.pickup_datetime):
    k3[i, j] = N

In [99]:
k3 += 1.
k3 = np.log10(k3)

In [126]:
sns.heatmap(k3)
plt.gcf().set_size_inches(8, 8)


/home/shekhar/anaconda3/lib/python3.5/site-packages/matplotlib/font_manager.py:1297: UserWarning: findfont: Font family ['sans-serif'] not found. Falling back to DejaVu Sans
  (prop.get_family(), self.defaultFamily[fontext]))

In [110]:
gdf['l'] = gdf['labels']

In [123]:
gdf[gdf.l == 18]


Out[123]:
LocationID OBJECTID Shape_Area Shape_Leng borough geometry zone labels l
47 48 48 0.000094 0.043747 Manhattan POLYGON ((986694.3125067502 214463.8461607993,... Clinton East 18 18
49 50 50 0.000173 0.055748 Manhattan POLYGON ((985170.3721923977 221087.3887939602,... Clinton West 18 18
141 142 142 0.000076 0.038176 Manhattan POLYGON ((989380.3045081049 218980.2473062277,... Lincoln Square East 18 18
142 143 143 0.000151 0.054180 Manhattan POLYGON ((989338.1001118571 223572.2528185844,... Lincoln Square West 18 18
160 161 161 0.000072 0.035804 Manhattan POLYGON ((991081.0260630846 214453.6983589679,... Midtown Center 18 18
161 162 162 0.000048 0.035270 Manhattan POLYGON ((992224.354090333 214415.2926926613, ... Midtown East 18 18
162 163 163 0.000041 0.034177 Manhattan POLYGON ((989412.6634775698 219020.9428979903,... Midtown North 18 18
229 230 230 0.000056 0.031028 Manhattan POLYGON ((988786.8773103654 214532.0940539986,... Times Sq/Theatre District 18 18

In [ ]: