In [2]:
%matplotlib inline
%load_ext autoreload
%autoreload 2
In [3]:
import matplotlib
plt = matplotlib.pyplot
In [12]:
import sqlalchemy
import pandas as pd
import dask.dataframe as dd
import geopandas
import os
# from bokeh.io import push_notebook, show, output_notebook
# from bokeh.layouts import row
# from bokeh.plotting import figure
# output_notebook()
In [5]:
ct = geopandas.read_file('../shapefiles/nyct2010.shp')
tz = geopandas.read_file('../shapefiles/taxi_zones.shp')
In [10]:
ct.plot(cmap=plt.cm.viridis)
plt.gcf().set_size_inches(12, 8)
In [11]:
tz.plot(cmap=plt.cm.viridis)
tz[:4].plot(ax=plt.gca())
plt.gcf().set_size_inches(12, 8)
In [9]:
help(ct.plot)
Help on method plot in module geopandas.geodataframe:
plot(*args, **kwargs) method of geopandas.geodataframe.GeoDataFrame instance
Plot a GeoDataFrame
Generate a plot of a GeoDataFrame with matplotlib. If a
column is specified, the plot coloring will be based on values
in that column. Otherwise, a categorical plot of the
geometries in the `geometry` column will be generated.
Parameters
----------
GeoDataFrame
The GeoDataFrame to be plotted. Currently Polygon,
MultiPolygon, LineString, MultiLineString and Point
geometries can be plotted.
column : str (default None)
The name of the column to be plotted.
categorical : bool (default False)
If False, cmap will reflect numerical values of the
column being plotted. For non-numerical columns (or if
column=None), this will be set to True.
cmap : str (default 'Set1')
The name of a colormap recognized by matplotlib.
color : str (default None)
If specified, all objects will be colored uniformly.
linewidth : float (default 1.0)
Line width for geometries.
legend : bool (default False)
Plot a legend (Experimental; currently for categorical
plots only)
ax : matplotlib.pyplot.Artist (default None)
axes on which to draw the plot
scheme : pysal.esda.mapclassify.Map_Classifier
Choropleth classification schemes (requires PySAL)
k : int (default 5)
Number of classes (ignored if scheme is None)
vmin : None or float (default None)
Minimum value of cmap. If None, the minimum data value
in the column to be plotted is used.
vmax : None or float (default None)
Maximum value of cmap. If None, the maximum data value
in the column to be plotted is used.
figsize
Size of the resulting matplotlib.figure.Figure. If the argument
axes is given explicitly, figsize is ignored.
**color_kwds : dict
Color options to be passed on to the actual plot function
Returns
-------
matplotlib axes instance
In [18]:
engine = sqlalchemy.engine.create_engine(open(os.path.expanduser('~/.sqlconninfo')).read())
In [21]:
zz = pd.read_sql('SELECT distinct dropoff_location_id from taxiloc_col;', engine)
zz
Out[21]:
dropoff_location_id
0
NaN
1
128.0
2
167.0
3
1994.0
4
516.0
5
1814.0
6
690.0
7
1846.0
8
1627.0
9
1351.0
10
1192.0
11
1392.0
12
752.0
13
969.0
14
768.0
15
184.0
16
544.0
17
1631.0
18
1955.0
19
32.0
20
1034.0
21
1062.0
22
1804.0
23
457.0
24
372.0
25
1251.0
26
1255.0
27
1436.0
28
325.0
29
11.0
...
...
2137
632.0
2138
813.0
2139
533.0
2140
163.0
2141
1644.0
2142
1565.0
2143
1104.0
2144
742.0
2145
59.0
2146
1776.0
2147
1937.0
2148
196.0
2149
602.0
2150
1981.0
2151
2133.0
2152
614.0
2153
554.0
2154
1147.0
2155
172.0
2156
929.0
2157
547.0
2158
834.0
2159
1608.0
2160
810.0
2161
907.0
2162
666.0
2163
1991.0
2164
960.0
2165
1259.0
2166
2075.0
2167 rows × 1 columns
In [22]:
zz.sort_values('dropoff_location_id')
Out[22]:
dropoff_location_id
864
1.0
288
2.0
1934
3.0
1708
4.0
1635
5.0
1487
6.0
1028
7.0
486
8.0
1141
9.0
1016
10.0
29
11.0
1807
12.0
1668
13.0
794
14.0
1298
15.0
1481
16.0
440
17.0
298
18.0
1904
19.0
580
20.0
243
21.0
401
22.0
1109
23.0
1950
24.0
1408
25.0
1802
26.0
2012
27.0
1458
28.0
1196
29.0
616
30.0
...
...
1598
2138.0
268
2139.0
224
2140.0
714
2141.0
1060
2142.0
279
2143.0
1034
2144.0
830
2145.0
572
2146.0
1156
2147.0
1443
2148.0
968
2149.0
1145
2150.0
1510
2151.0
383
2152.0
961
2153.0
678
2154.0
1448
2155.0
1986
2156.0
52
2157.0
1240
2158.0
1768
2159.0
764
2160.0
1853
2161.0
311
2162.0
1048
2163.0
682
2164.0
804
2165.0
318
2166.0
0
NaN
2167 rows × 1 columns
In [27]:
zz2 = pd.read_sql('SELECT gid FROM nyct2010 order by gid;', engine)
In [28]:
zz2
Out[28]:
gid
0
1
1
2
2
3
3
4
4
5
5
6
6
7
7
8
8
9
9
10
10
11
11
12
12
13
13
14
14
15
15
16
16
17
17
18
18
19
19
20
20
21
21
22
22
23
23
24
24
25
25
26
26
27
27
28
28
29
29
30
...
...
2136
2137
2137
2138
2138
2139
2139
2140
2140
2141
2141
2142
2142
2143
2143
2144
2144
2145
2145
2146
2146
2147
2147
2148
2148
2149
2149
2150
2150
2151
2151
2152
2152
2153
2153
2154
2154
2155
2155
2156
2156
2157
2157
2158
2158
2159
2159
2160
2160
2161
2161
2162
2162
2163
2163
2164
2164
2165
2165
2166
2166 rows × 1 columns
In [29]:
mm = pd.read_sql("""
select v.trip_id, v.pickup_datetime, v.pickup_location_id,
t.dropoff_location_id, t.pickup_location_id, t. from taxi_ingest_col v
left join taxiloc_col t ON t.trip_id = v.trip_id where v.trip_type='uber';""", engine)
In [33]:
mm.sort_values('trip_id').reset_index()
Out[33]:
index
trip_id
pickup_datetime
pickup_location_id
dropoff_location_id
pickup_location_id
0
4223749
1
2014-04-01 00:11:00
NaN
1952.0
140.0
1
15239447
2
2014-04-01 00:17:00
NaN
NaN
NaN
2
3050454
3
2014-04-01 00:21:00
NaN
861.0
79.0
3
16308845
4
2014-04-01 00:28:00
NaN
17.0
161.0
4
6645867
5
2014-04-01 00:33:00
NaN
15.0
162.0
5
16344172
6
2014-04-01 00:33:00
NaN
NaN
NaN
6
15573173
7
2014-04-01 00:39:00
NaN
929.0
148.0
7
18364278
8
2014-04-01 00:45:00
NaN
1283.0
230.0
8
15793477
9
2014-04-01 00:55:00
NaN
1895.0
68.0
9
16382356
10
2014-04-01 01:01:00
NaN
901.0
230.0
10
18070994
11
2014-04-01 01:19:00
NaN
1470.0
79.0
11
14361065
12
2014-04-01 01:48:00
NaN
15.0
162.0
12
18511262
13
2014-04-01 01:49:00
NaN
2146.0
79.0
13
12155771
14
2014-04-01 02:11:00
NaN
2056.0
132.0
14
12890255
15
2014-04-01 02:25:00
NaN
1131.0
7.0
15
73145
16
2014-04-01 02:31:00
NaN
1952.0
140.0
16
12560321
17
2014-04-01 02:43:00
NaN
16.0
161.0
17
15279839
18
2014-04-01 03:22:00
NaN
915.0
4.0
18
14324347
19
2014-04-01 03:35:00
NaN
2098.0
246.0
19
18043551
20
2014-04-01 03:35:00
NaN
NaN
NaN
20
5361848
21
2014-04-01 03:41:00
NaN
19.0
163.0
21
6976075
22
2014-04-01 04:11:00
NaN
2098.0
246.0
22
15462742
23
2014-04-01 04:15:00
NaN
1274.0
188.0
23
15426228
24
2014-04-01 04:19:00
NaN
10.0
79.0
24
15206398
25
2014-04-01 04:20:00
NaN
NaN
1.0
25
5427883
26
2014-04-01 04:26:00
NaN
NaN
NaN
26
17960749
27
2014-04-01 04:27:00
NaN
NaN
1.0
27
15096377
28
2014-04-01 04:38:00
NaN
NaN
1.0
28
17409001
29
2014-04-01 04:47:00
NaN
1469.0
144.0
29
9179270
30
2014-04-01 04:49:00
NaN
969.0
113.0
...
...
...
...
...
...
...
18804776
10335756
18804777
2015-05-08 15:39:00
163.0
NaN
NaN
18804777
8750533
18804778
2015-05-08 15:39:00
239.0
NaN
NaN
18804778
15586114
18804779
2015-05-08 15:39:00
239.0
NaN
NaN
18804779
10417067
18804780
2015-05-08 15:39:00
238.0
NaN
NaN
18804780
7258994
18804781
2015-05-08 15:39:00
211.0
NaN
NaN
18804781
829039
18804782
2015-05-08 15:39:00
68.0
NaN
NaN
18804782
6453876
18804783
2015-05-08 15:40:00
170.0
NaN
NaN
18804783
11878920
18804784
2015-05-08 15:40:00
211.0
NaN
NaN
18804784
13034597
18804785
2015-05-08 15:40:00
236.0
NaN
NaN
18804785
5355562
18804786
2015-05-08 15:40:00
161.0
NaN
NaN
18804786
8981194
18804787
2015-05-08 15:40:00
186.0
NaN
NaN
18804787
6145197
18804788
2015-05-08 15:41:00
50.0
NaN
NaN
18804788
6092065
18804789
2015-05-08 15:41:00
13.0
NaN
NaN
18804789
7725382
18804790
2015-05-08 15:41:00
192.0
NaN
NaN
18804790
11338953
18804791
2015-05-08 15:41:00
262.0
NaN
NaN
18804791
10573322
18804792
2015-05-08 15:41:00
237.0
NaN
NaN
18804792
13838265
18804793
2015-05-08 15:41:00
233.0
NaN
NaN
18804793
5190055
18804794
2015-05-08 15:41:00
148.0
NaN
NaN
18804794
11067070
18804795
2015-05-08 15:41:00
33.0
NaN
NaN
18804795
860331
18804796
2015-05-08 15:42:00
232.0
NaN
NaN
18804796
4774047
18804797
2015-05-08 15:42:00
79.0
NaN
NaN
18804797
16272033
18804798
2015-05-08 15:42:00
37.0
NaN
NaN
18804798
7988832
18804799
2015-05-08 15:42:00
161.0
NaN
NaN
18804799
16084217
18804800
2015-05-08 15:42:00
7.0
NaN
NaN
18804800
14647480
18804801
2015-05-08 15:43:00
25.0
NaN
NaN
18804801
10317667
18804802
2015-05-08 15:43:00
186.0
NaN
NaN
18804802
16943149
18804803
2015-05-08 15:43:00
263.0
NaN
NaN
18804803
724204
18804804
2015-05-08 15:43:00
90.0
NaN
NaN
18804804
5167200
18804805
2015-05-08 15:44:00
45.0
NaN
NaN
18804805
10857339
18804806
2015-05-08 15:44:00
144.0
NaN
NaN
18804806 rows × 6 columns
In [ ]:
Content source: r-shekhar/NYC-transport
Similar notebooks: