In [2]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [3]:
import matplotlib
plt = matplotlib.pyplot

In [12]:
import sqlalchemy
import pandas as pd
import dask.dataframe as dd
import geopandas
import os

# from bokeh.io import push_notebook, show, output_notebook
# from bokeh.layouts import row
# from bokeh.plotting import figure
# output_notebook()

In [5]:
ct = geopandas.read_file('../shapefiles/nyct2010.shp')
tz = geopandas.read_file('../shapefiles/taxi_zones.shp')

In [10]:
ct.plot(cmap=plt.cm.viridis)

plt.gcf().set_size_inches(12, 8)



In [11]:
tz.plot(cmap=plt.cm.viridis)
tz[:4].plot(ax=plt.gca())
plt.gcf().set_size_inches(12, 8)



In [9]:
help(ct.plot)


Help on method plot in module geopandas.geodataframe:

plot(*args, **kwargs) method of geopandas.geodataframe.GeoDataFrame instance
    Plot a GeoDataFrame
    
    Generate a plot of a GeoDataFrame with matplotlib.  If a
    column is specified, the plot coloring will be based on values
    in that column.  Otherwise, a categorical plot of the
    geometries in the `geometry` column will be generated.
    
    Parameters
    ----------
    
    GeoDataFrame
        The GeoDataFrame to be plotted.  Currently Polygon,
        MultiPolygon, LineString, MultiLineString and Point
        geometries can be plotted.
    
    column : str (default None)
        The name of the column to be plotted.
    
    categorical : bool (default False)
        If False, cmap will reflect numerical values of the
        column being plotted.  For non-numerical columns (or if
        column=None), this will be set to True.
    
    cmap : str (default 'Set1')
        The name of a colormap recognized by matplotlib.
    
    color : str (default None)
        If specified, all objects will be colored uniformly.
    
    linewidth : float (default 1.0)
        Line width for geometries.
    
    legend : bool (default False)
        Plot a legend (Experimental; currently for categorical
        plots only)
    
    ax : matplotlib.pyplot.Artist (default None)
        axes on which to draw the plot
    
    scheme : pysal.esda.mapclassify.Map_Classifier
        Choropleth classification schemes (requires PySAL)
    
    k   : int (default 5)
        Number of classes (ignored if scheme is None)
    
    vmin : None or float (default None)
    
        Minimum value of cmap. If None, the minimum data value
        in the column to be plotted is used.
    
    vmax : None or float (default None)
    
        Maximum value of cmap. If None, the maximum data value
        in the column to be plotted is used.
    
    figsize
        Size of the resulting matplotlib.figure.Figure. If the argument
        axes is given explicitly, figsize is ignored.
    
    **color_kwds : dict
        Color options to be passed on to the actual plot function
    
    Returns
    -------
    
    matplotlib axes instance


In [18]:
engine = sqlalchemy.engine.create_engine(open(os.path.expanduser('~/.sqlconninfo')).read())

In [21]:
zz = pd.read_sql('SELECT distinct dropoff_location_id from taxiloc_col;', engine)
zz


Out[21]:
dropoff_location_id
0 NaN
1 128.0
2 167.0
3 1994.0
4 516.0
5 1814.0
6 690.0
7 1846.0
8 1627.0
9 1351.0
10 1192.0
11 1392.0
12 752.0
13 969.0
14 768.0
15 184.0
16 544.0
17 1631.0
18 1955.0
19 32.0
20 1034.0
21 1062.0
22 1804.0
23 457.0
24 372.0
25 1251.0
26 1255.0
27 1436.0
28 325.0
29 11.0
... ...
2137 632.0
2138 813.0
2139 533.0
2140 163.0
2141 1644.0
2142 1565.0
2143 1104.0
2144 742.0
2145 59.0
2146 1776.0
2147 1937.0
2148 196.0
2149 602.0
2150 1981.0
2151 2133.0
2152 614.0
2153 554.0
2154 1147.0
2155 172.0
2156 929.0
2157 547.0
2158 834.0
2159 1608.0
2160 810.0
2161 907.0
2162 666.0
2163 1991.0
2164 960.0
2165 1259.0
2166 2075.0

2167 rows × 1 columns


In [22]:
zz.sort_values('dropoff_location_id')


Out[22]:
dropoff_location_id
864 1.0
288 2.0
1934 3.0
1708 4.0
1635 5.0
1487 6.0
1028 7.0
486 8.0
1141 9.0
1016 10.0
29 11.0
1807 12.0
1668 13.0
794 14.0
1298 15.0
1481 16.0
440 17.0
298 18.0
1904 19.0
580 20.0
243 21.0
401 22.0
1109 23.0
1950 24.0
1408 25.0
1802 26.0
2012 27.0
1458 28.0
1196 29.0
616 30.0
... ...
1598 2138.0
268 2139.0
224 2140.0
714 2141.0
1060 2142.0
279 2143.0
1034 2144.0
830 2145.0
572 2146.0
1156 2147.0
1443 2148.0
968 2149.0
1145 2150.0
1510 2151.0
383 2152.0
961 2153.0
678 2154.0
1448 2155.0
1986 2156.0
52 2157.0
1240 2158.0
1768 2159.0
764 2160.0
1853 2161.0
311 2162.0
1048 2163.0
682 2164.0
804 2165.0
318 2166.0
0 NaN

2167 rows × 1 columns


In [27]:
zz2 = pd.read_sql('SELECT gid FROM nyct2010 order by gid;', engine)

In [28]:
zz2


Out[28]:
gid
0 1
1 2
2 3
3 4
4 5
5 6
6 7
7 8
8 9
9 10
10 11
11 12
12 13
13 14
14 15
15 16
16 17
17 18
18 19
19 20
20 21
21 22
22 23
23 24
24 25
25 26
26 27
27 28
28 29
29 30
... ...
2136 2137
2137 2138
2138 2139
2139 2140
2140 2141
2141 2142
2142 2143
2143 2144
2144 2145
2145 2146
2146 2147
2147 2148
2148 2149
2149 2150
2150 2151
2151 2152
2152 2153
2153 2154
2154 2155
2155 2156
2156 2157
2157 2158
2158 2159
2159 2160
2160 2161
2161 2162
2162 2163
2163 2164
2164 2165
2165 2166

2166 rows × 1 columns


In [29]:
mm = pd.read_sql("""
    select v.trip_id, v.pickup_datetime, v.pickup_location_id, 
           t.dropoff_location_id, t.pickup_location_id, t. from taxi_ingest_col v 
           left join taxiloc_col t ON t.trip_id = v.trip_id where v.trip_type='uber';""", engine)

In [33]:
mm.sort_values('trip_id').reset_index()


Out[33]:
index trip_id pickup_datetime pickup_location_id dropoff_location_id pickup_location_id
0 4223749 1 2014-04-01 00:11:00 NaN 1952.0 140.0
1 15239447 2 2014-04-01 00:17:00 NaN NaN NaN
2 3050454 3 2014-04-01 00:21:00 NaN 861.0 79.0
3 16308845 4 2014-04-01 00:28:00 NaN 17.0 161.0
4 6645867 5 2014-04-01 00:33:00 NaN 15.0 162.0
5 16344172 6 2014-04-01 00:33:00 NaN NaN NaN
6 15573173 7 2014-04-01 00:39:00 NaN 929.0 148.0
7 18364278 8 2014-04-01 00:45:00 NaN 1283.0 230.0
8 15793477 9 2014-04-01 00:55:00 NaN 1895.0 68.0
9 16382356 10 2014-04-01 01:01:00 NaN 901.0 230.0
10 18070994 11 2014-04-01 01:19:00 NaN 1470.0 79.0
11 14361065 12 2014-04-01 01:48:00 NaN 15.0 162.0
12 18511262 13 2014-04-01 01:49:00 NaN 2146.0 79.0
13 12155771 14 2014-04-01 02:11:00 NaN 2056.0 132.0
14 12890255 15 2014-04-01 02:25:00 NaN 1131.0 7.0
15 73145 16 2014-04-01 02:31:00 NaN 1952.0 140.0
16 12560321 17 2014-04-01 02:43:00 NaN 16.0 161.0
17 15279839 18 2014-04-01 03:22:00 NaN 915.0 4.0
18 14324347 19 2014-04-01 03:35:00 NaN 2098.0 246.0
19 18043551 20 2014-04-01 03:35:00 NaN NaN NaN
20 5361848 21 2014-04-01 03:41:00 NaN 19.0 163.0
21 6976075 22 2014-04-01 04:11:00 NaN 2098.0 246.0
22 15462742 23 2014-04-01 04:15:00 NaN 1274.0 188.0
23 15426228 24 2014-04-01 04:19:00 NaN 10.0 79.0
24 15206398 25 2014-04-01 04:20:00 NaN NaN 1.0
25 5427883 26 2014-04-01 04:26:00 NaN NaN NaN
26 17960749 27 2014-04-01 04:27:00 NaN NaN 1.0
27 15096377 28 2014-04-01 04:38:00 NaN NaN 1.0
28 17409001 29 2014-04-01 04:47:00 NaN 1469.0 144.0
29 9179270 30 2014-04-01 04:49:00 NaN 969.0 113.0
... ... ... ... ... ... ...
18804776 10335756 18804777 2015-05-08 15:39:00 163.0 NaN NaN
18804777 8750533 18804778 2015-05-08 15:39:00 239.0 NaN NaN
18804778 15586114 18804779 2015-05-08 15:39:00 239.0 NaN NaN
18804779 10417067 18804780 2015-05-08 15:39:00 238.0 NaN NaN
18804780 7258994 18804781 2015-05-08 15:39:00 211.0 NaN NaN
18804781 829039 18804782 2015-05-08 15:39:00 68.0 NaN NaN
18804782 6453876 18804783 2015-05-08 15:40:00 170.0 NaN NaN
18804783 11878920 18804784 2015-05-08 15:40:00 211.0 NaN NaN
18804784 13034597 18804785 2015-05-08 15:40:00 236.0 NaN NaN
18804785 5355562 18804786 2015-05-08 15:40:00 161.0 NaN NaN
18804786 8981194 18804787 2015-05-08 15:40:00 186.0 NaN NaN
18804787 6145197 18804788 2015-05-08 15:41:00 50.0 NaN NaN
18804788 6092065 18804789 2015-05-08 15:41:00 13.0 NaN NaN
18804789 7725382 18804790 2015-05-08 15:41:00 192.0 NaN NaN
18804790 11338953 18804791 2015-05-08 15:41:00 262.0 NaN NaN
18804791 10573322 18804792 2015-05-08 15:41:00 237.0 NaN NaN
18804792 13838265 18804793 2015-05-08 15:41:00 233.0 NaN NaN
18804793 5190055 18804794 2015-05-08 15:41:00 148.0 NaN NaN
18804794 11067070 18804795 2015-05-08 15:41:00 33.0 NaN NaN
18804795 860331 18804796 2015-05-08 15:42:00 232.0 NaN NaN
18804796 4774047 18804797 2015-05-08 15:42:00 79.0 NaN NaN
18804797 16272033 18804798 2015-05-08 15:42:00 37.0 NaN NaN
18804798 7988832 18804799 2015-05-08 15:42:00 161.0 NaN NaN
18804799 16084217 18804800 2015-05-08 15:42:00 7.0 NaN NaN
18804800 14647480 18804801 2015-05-08 15:43:00 25.0 NaN NaN
18804801 10317667 18804802 2015-05-08 15:43:00 186.0 NaN NaN
18804802 16943149 18804803 2015-05-08 15:43:00 263.0 NaN NaN
18804803 724204 18804804 2015-05-08 15:43:00 90.0 NaN NaN
18804804 5167200 18804805 2015-05-08 15:44:00 45.0 NaN NaN
18804805 10857339 18804806 2015-05-08 15:44:00 144.0 NaN NaN

18804806 rows × 6 columns


In [ ]: