In [1]:
# Import geojson file into geopandas dataframe and remove streets with no rating
import geopandas as gpd
import pandas as pd
from shapely.ops import cascaded_union
import matplotlib.pyplot as plt
%matplotlib inline
traf_vol = gpd.read_file('../data/external/nyc-traffic/AADT_2015_tdv.shp')
traf_vol = traf_vol[['AADT', 'geometry']]
traf_vol = traf_vol[traf_vol.AADT != 0 ]
traf_vol.head()
Out[1]:
In [3]:
# Check crs
traf_vol.crs
Out[3]:
In [2]:
# Import boundary shapes for boroughs
nyc_bdry = gpd.read_file('../data/external/nyc-boroughs/nybb_16c/nybb.shp')
nyc_bdry = nyc_bdry.to_crs({'init': 'epsg:26918'})
nyc_bdry
Out[2]:
In [37]:
# Plot borough boundaries
nyc_bdry.plot()
Out[37]:
In [3]:
# Merge manhattan and brooklyn boundaries
nyc_geom = cascaded_union(nyc_bdry.ix[[2,3]].geometry)
nyc_geom
Out[3]:
In [11]:
# Plot manhattan boundary
nyc_bdry_manh = nyc_bdry.ix[2].geometry
nyc_bdry_manh
Out[11]:
In [4]:
# Plot brooklyn boundary
nyc_bdry_bk = nyc_bdry.ix[3].geometry
nyc_bdry_bk
Out[4]:
In [4]:
# Create traffic volume dataframe
nyc_traf_vol = traf_vol[traf_vol.geometry.within(nyc_geom)]
nyc_traf_vol.head()
Out[4]:
In [5]:
# Create Manhattan traffic volume dataframe
nyc_traf_vol_manh = traf_vol[traf_vol.geometry.within(nyc_bdry_manh)]
nyc_traf_vol_manh.head()
Out[5]:
In [6]:
# Create Brooklyn traffic volume dataframe
nyc_traf_vol_bk = traf_vol[traf_vol.geometry.within(nyc_bdry_bk)]
nyc_traf_vol_bk.head()
Out[6]:
In [5]:
# Import citibike station csv into geopandas dataframe
from shapely.geometry import Point
stations = pd.read_csv('../data/processed/stations.csv')
geometry = gpd.GeoSeries([Point(xy) for xy in zip(stations.Longitude, stations.Latitude)])
geometry = geometry.buffer(.0005)
geo_stations = gpd.GeoDataFrame(stations, geometry=geometry)
geo_stations.crs = {'init' :'epsg:4326'}
geo_stations.head()
Out[5]:
In [6]:
geo_stations = geo_stations.to_crs({'init': 'epsg:26918'})
In [44]:
geo_stations.crs
Out[44]:
In [8]:
len(geo_stations)
Out[8]:
In [7]:
# Merge for the map
stations_traf_vol = gpd.sjoin(geo_stations, nyc_traf_vol, how="inner", op='intersects')
stations_traf_vol.AADT = pd.to_numeric(stations_traf_vol.AADT)
nyc_t = nyc_traf_vol.to_crs({'init': 'epsg:4326'})
nyc_t.loc[stations_traf_vol.index_right.unique(), :].to_csv("../data/map/traffic.csv")
In [46]:
# Merge street quality data with citibike stations using Geopandas Spatial Merge
stations_traf_vol_manh = gpd.sjoin(geo_stations, nyc_traf_vol_manh, how="inner", op='intersects')
stations_traf_vol_manh.drop(['index_right', 'Location', 'geometry', 'Latitude', 'Longitude', 'Station_Name'], axis=1, inplace=True)
stations_traf_vol_manh.sort_index(inplace=True)
stations_traf_vol_manh.AADT = pd.to_numeric(stations_traf_vol_manh.AADT)
stations_traf_vol_manh.head(10)
Out[46]:
In [48]:
# Find average of street quality ratings for each station
stations_traf_vol_manh = stations_traf_vol_manh.groupby(['Station_id']).mean()
stations_traf_vol_manh.reset_index(inplace=True)
stations_traf_vol_manh.head()
Out[48]:
In [49]:
len(stations_traf_vol_manh)
Out[49]:
In [50]:
# Merge street quality data with citibike stations using Geopandas Spatial Merge
stations_traf_vol_bk = gpd.sjoin(geo_stations, nyc_traf_vol_bk, how="inner", op='intersects')
stations_traf_vol_bk.drop(['index_right', 'Location', 'geometry', 'Latitude', 'Longitude', 'Station_Name'], axis=1, inplace=True)
stations_traf_vol_bk.sort_index(inplace=True)
stations_traf_vol_bk.AADT = pd.to_numeric(stations_traf_vol_bk.AADT)
stations_traf_vol_bk.head(10)
Out[50]:
In [51]:
# Find average of street quality ratings for each station
stations_traf_vol_bk = stations_traf_vol_bk.groupby(['Station_id']).mean()
stations_traf_vol_bk.reset_index(inplace=True)
stations_traf_vol_bk.head()
Out[51]:
In [52]:
len(stations_traf_vol_bk)
Out[52]:
In [56]:
stations_traf_vol = stations_traf_vol_manh.append(stations_traf_vol_bk)
stations_traf_vol.sort_values('Station_id', inplace=True)
stations_traf_vol.reset_index(inplace=True)
stations_traf_vol.drop('index', axis=1, inplace=True)
stations_traf_vol.head()
Out[56]:
In [58]:
# Save to CSV
stations_traf_vol.to_csv("../data/processed/traffic-volume.csv")
In [ ]:
In [ ]: