In [1]:
# Import geojson file into geopandas dataframe and remove streets with no rating
import geopandas as gpd
import pandas as pd
from shapely.ops import cascaded_union
import matplotlib.pyplot as plt
%matplotlib inline

traf_vol = gpd.read_file('../data/external/nyc-traffic/AADT_2015_tdv.shp')
traf_vol = traf_vol[['AADT', 'geometry']]
traf_vol = traf_vol[traf_vol.AADT != 0 ]
traf_vol.head()


Out[1]:
AADT geometry
0 6052.0 LINESTRING Z (596607.9900000002 4527724.560000...
1 10157.0 LINESTRING Z (595417.6699999999 4523667.830000...
2 24026.0 LINESTRING Z (595429.790000001 4522723.2400000...
3 4125.0 LINESTRING Z (596005.1500000013 4524587.100000...
5 7710.0 LINESTRING Z (590222.8699999992 4519238.850000...

In [3]:
# Check crs
traf_vol.crs


Out[3]:
{'init': 'epsg:26918'}

In [2]:
# Import boundary shapes for boroughs
nyc_bdry = gpd.read_file('../data/external/nyc-boroughs/nybb_16c/nybb.shp')
nyc_bdry = nyc_bdry.to_crs({'init': 'epsg:26918'})
nyc_bdry


Out[2]:
BoroCode BoroName Shape_Area Shape_Leng geometry
0 5 Staten Island 1.623819e+09 330476.026530 (POLYGON ((580375.2841427148 4491060.736972133...
1 2 Bronx 1.186816e+09 463934.696838 (POLYGON ((593066.6583069805 4516675.69796374,...
2 1 Manhattan 6.364539e+08 358410.672987 (POLYGON ((583578.4583573499 4504203.877188512...
3 3 Brooklyn 1.937474e+09 741074.709979 (POLYGON ((595882.1112212804 4492983.277099772...
4 4 Queens 3.044835e+09 895762.746967 (POLYGON ((598434.3019410267 4494444.181005036...

In [37]:
# Plot borough boundaries
nyc_bdry.plot()


Out[37]:
<matplotlib.axes._subplots.AxesSubplot at 0x11d44cf60>

In [3]:
# Merge manhattan and brooklyn boundaries
nyc_geom = cascaded_union(nyc_bdry.ix[[2,3]].geometry)
nyc_geom


Out[3]:

In [11]:
# Plot manhattan boundary
nyc_bdry_manh = nyc_bdry.ix[2].geometry
nyc_bdry_manh


Out[11]:

In [4]:
# Plot brooklyn boundary
nyc_bdry_bk = nyc_bdry.ix[3].geometry
nyc_bdry_bk


Out[4]:

In [4]:
# Create traffic volume dataframe
nyc_traf_vol = traf_vol[traf_vol.geometry.within(nyc_geom)]
nyc_traf_vol.head()


Out[4]:
AADT geometry
1061 27531.0 LINESTRING Z (585366.2699799668 4499643.321042...
1062 10661.0 LINESTRING Z (588441.9530327898 4499205.228442...
1063 9072.0 LINESTRING Z (588338.2676470634 4502859.053529...
1064 18257.0 LINESTRING Z (584096.2300000014 4498501.480000...
1065 6773.0 LINESTRING Z (587306.5999999996 4492230.980000...

In [5]:
# Create Manhattan traffic volume dataframe
nyc_traf_vol_manh = traf_vol[traf_vol.geometry.within(nyc_bdry_manh)]
nyc_traf_vol_manh.head()


Out[5]:
AADT geometry
6339 5462.0 LINESTRING Z (586364.7400000012 4509091.450000...
6340 22299.0 LINESTRING Z (583187.7600000007 4506520.760000...
6343 8817.0 LINESTRING Z (586525.8490566043 4516669.790754...
6345 17886.0 LINESTRING Z (585331.7200000016 4510544.540000...
6346 21107.0 LINESTRING Z (586168.7817910435 4514535.440373...

In [6]:
# Create Brooklyn traffic volume dataframe
nyc_traf_vol_bk = traf_vol[traf_vol.geometry.within(nyc_bdry_bk)]
nyc_traf_vol_bk.head()


Out[6]:
AADT geometry
1061 27531.0 LINESTRING Z (585366.2699799668 4499643.321042...
1062 10661.0 LINESTRING Z (588441.9530327898 4499205.228442...
1063 9072.0 LINESTRING Z (588338.2676470634 4502859.053529...
1064 18257.0 LINESTRING Z (584096.2300000014 4498501.480000...
1065 6773.0 LINESTRING Z (587306.5999999996 4492230.980000...

In [5]:
# Import citibike station csv into geopandas dataframe
from shapely.geometry import Point

stations = pd.read_csv('../data/processed/stations.csv')

geometry = gpd.GeoSeries([Point(xy) for xy in zip(stations.Longitude, stations.Latitude)])
geometry = geometry.buffer(.0005)
geo_stations = gpd.GeoDataFrame(stations, geometry=geometry)
geo_stations.crs = {'init' :'epsg:4326'}
geo_stations.head()


Out[5]:
Station_id Station_Name Location Latitude Longitude Zip geometry
0 72 W 52 St & 11 Ave W 52 St & 11 Ave 40.767272 -73.993929 10019 POLYGON ((-73.99342888 40.76727216, -73.993431...
1 79 Franklin St & W Broadway Franklin St & W Broadway 40.719116 -74.006667 10013 POLYGON ((-74.00616660999999 40.71911552, -74....
2 82 St James Pl & Pearl St St James Pl & Pearl St 40.711174 -74.000165 10038 POLYGON ((-73.99966544999999 40.71117416, -73....
3 83 Atlantic Ave & Fort Greene Pl Atlantic Ave & Fort Greene Pl 40.683826 -73.976323 11217 POLYGON ((-73.97582328 40.68382604, -73.975825...
4 116 W 17 St & 8 Ave W 17 St & 8 Ave 40.741776 -74.001497 10011 POLYGON ((-74.00099745999999 40.74177603, -74....

In [6]:
geo_stations = geo_stations.to_crs({'init': 'epsg:26918'})

In [44]:
geo_stations.crs


Out[44]:
{'init': 'epsg:26918'}

In [8]:
len(geo_stations)


Out[8]:
664

In [7]:
# Merge for the map
stations_traf_vol = gpd.sjoin(geo_stations, nyc_traf_vol, how="inner", op='intersects')
stations_traf_vol.AADT = pd.to_numeric(stations_traf_vol.AADT)
nyc_t = nyc_traf_vol.to_crs({'init': 'epsg:4326'})
nyc_t.loc[stations_traf_vol.index_right.unique(), :].to_csv("../data/map/traffic.csv")

In [46]:
# Merge street quality data with citibike stations using Geopandas Spatial Merge
stations_traf_vol_manh = gpd.sjoin(geo_stations, nyc_traf_vol_manh, how="inner", op='intersects')
stations_traf_vol_manh.drop(['index_right', 'Location', 'geometry', 'Latitude', 'Longitude', 'Station_Name'], axis=1, inplace=True)
stations_traf_vol_manh.sort_index(inplace=True)
stations_traf_vol_manh.AADT = pd.to_numeric(stations_traf_vol_manh.AADT)
stations_traf_vol_manh.head(10)


Out[46]:
Station_id AADT
0 72 5908.0
0 72 23833.0
1 79 1776.0
1 79 22156.0
1 79 4522.0
2 82 21202.0
2 82 12423.0
4 116 23984.0
4 116 7912.0
7 127 11741.0

In [48]:
# Find average of street quality ratings for each station
stations_traf_vol_manh = stations_traf_vol_manh.groupby(['Station_id']).mean()
stations_traf_vol_manh.reset_index(inplace=True)
stations_traf_vol_manh.head()


Out[48]:
Station_id AADT
0 72 14870.500000
1 79 9484.666667
2 82 16812.500000
3 116 15948.000000
4 127 8063.500000

In [49]:
len(stations_traf_vol_manh)


Out[49]:
328

In [50]:
# Merge street quality data with citibike stations using Geopandas Spatial Merge
stations_traf_vol_bk = gpd.sjoin(geo_stations, nyc_traf_vol_bk, how="inner", op='intersects')
stations_traf_vol_bk.drop(['index_right', 'Location', 'geometry', 'Latitude', 'Longitude', 'Station_Name'], axis=1, inplace=True)
stations_traf_vol_bk.sort_index(inplace=True)
stations_traf_vol_bk.AADT = pd.to_numeric(stations_traf_vol_bk.AADT)
stations_traf_vol_bk.head(10)


Out[50]:
Station_id AADT
3 83 57885.0
3 83 26067.0
5 119 123631.0
5 119 10282.0
6 120 6907.0
10 143 3011.0
10 143 2180.0
10 143 6163.0
10 143 14412.0
11 144 12995.0

In [51]:
# Find average of street quality ratings for each station
stations_traf_vol_bk = stations_traf_vol_bk.groupby(['Station_id']).mean()
stations_traf_vol_bk.reset_index(inplace=True)
stations_traf_vol_bk.head()


Out[51]:
Station_id AADT
0 83 41976.0
1 119 66956.5
2 120 6907.0
3 143 6441.5
4 144 8644.5

In [52]:
len(stations_traf_vol_bk)


Out[52]:
207

In [56]:
stations_traf_vol = stations_traf_vol_manh.append(stations_traf_vol_bk)
stations_traf_vol.sort_values('Station_id', inplace=True)
stations_traf_vol.reset_index(inplace=True)
stations_traf_vol.drop('index', axis=1, inplace=True)
stations_traf_vol.head()


Out[56]:
Station_id AADT
0 72 14870.500000
1 79 9484.666667
2 82 16812.500000
3 83 41976.000000
4 116 15948.000000

In [58]:
# Save to CSV
stations_traf_vol.to_csv("../data/processed/traffic-volume.csv")

In [ ]:


In [ ]: