Clean Street Quality Data and Merge to Citibike Data


In [2]:
# Import geojson file into geopandas dataframe and remove streets with no rating
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

st_quality = gpd.read_file('../data/external/street-assessment/StreetAssessmentRating/StreetAssessmentRating.shp')
st_quality = st_quality[['Rating_B', 'geometry']]
st_quality = st_quality[st_quality.Rating_B != 0 ]
st_quality = st_quality.to_crs({'init' :'epsg:4326'})
st_quality.head()


Out[2]:
Rating_B geometry
5 8 LINESTRING (-74.25505718173595 40.505211202870...
6 8 LINESTRING (-74.25422295126857 40.506165510159...
7 8 LINESTRING (-74.25317481600673 40.504767963667...
8 7 LINESTRING (-74.2530943012904 40.5062970118639...
9 6 LINESTRING (-74.25493722035228 40.507701030695...

In [12]:
# Check crs
st_quality.crs


Out[12]:
{'init': 'epsg:4326'}

In [3]:
# Import citibike station csv into geopandas dataframe
from shapely.geometry import Point

stations = pd.read_csv('../data/processed/stations.csv')

geometry = gpd.GeoSeries([Point(xy) for xy in zip(stations.Longitude, stations.Latitude)])
geometry = geometry.buffer(.0005)
geo_stations = gpd.GeoDataFrame(stations, geometry=geometry)
geo_stations.crs = {'init' :'epsg:4326'}
geo_stations.head()


Out[3]:
Station_id Station_Name Location Latitude Longitude Zip geometry
0 72 W 52 St & 11 Ave W 52 St & 11 Ave 40.767272 -73.993929 10019 POLYGON ((-73.99342888 40.76727216, -73.993431...
1 79 Franklin St & W Broadway Franklin St & W Broadway 40.719116 -74.006667 10013 POLYGON ((-74.00616660999999 40.71911552, -74....
2 82 St James Pl & Pearl St St James Pl & Pearl St 40.711174 -74.000165 10038 POLYGON ((-73.99966544999999 40.71117416, -73....
3 83 Atlantic Ave & Fort Greene Pl Atlantic Ave & Fort Greene Pl 40.683826 -73.976323 11217 POLYGON ((-73.97582328 40.68382604, -73.975825...
4 116 W 17 St & 8 Ave W 17 St & 8 Ave 40.741776 -74.001497 10011 POLYGON ((-74.00099745999999 40.74177603, -74....

In [3]:
# Check number of stations before merge
len(geo_stations)


Out[3]:
664

In [15]:
# Check crs
geo_stations.crs


Out[15]:
{'init': 'epsg:4326'}

In [4]:
# Merge street quality data with citibike stations using Geopandas Spatial Merge
stations_st_quality = gpd.sjoin(geo_stations, st_quality, how="inner", op='intersects')

In [5]:
# Save only for streets
st_quality.loc[stations_st_quality.index_right.unique(), :].to_csv("../data/map/street-assessment.csv")

In [6]:
stations_st_quality.drop(['index_right', 'Location'], axis=1, inplace=True)
stations_st_quality.head(10)


Out[6]:
Station_id Station_Name Latitude Longitude Zip geometry Rating_B
0 72 W 52 St & 11 Ave 40.767272 -73.993929 10019 POLYGON ((-73.99342888 40.76727216, -73.993431... 8
0 72 W 52 St & 11 Ave 40.767272 -73.993929 10019 POLYGON ((-73.99342888 40.76727216, -73.993431... 8
0 72 W 52 St & 11 Ave 40.767272 -73.993929 10019 POLYGON ((-73.99342888 40.76727216, -73.993431... 8
1 79 Franklin St & W Broadway 40.719116 -74.006667 10013 POLYGON ((-74.00616660999999 40.71911552, -74.... 9
1 79 Franklin St & W Broadway 40.719116 -74.006667 10013 POLYGON ((-74.00616660999999 40.71911552, -74.... 8
1 79 Franklin St & W Broadway 40.719116 -74.006667 10013 POLYGON ((-74.00616660999999 40.71911552, -74.... 9
1 79 Franklin St & W Broadway 40.719116 -74.006667 10013 POLYGON ((-74.00616660999999 40.71911552, -74.... 9
1 79 Franklin St & W Broadway 40.719116 -74.006667 10013 POLYGON ((-74.00616660999999 40.71911552, -74.... 8
1 79 Franklin St & W Broadway 40.719116 -74.006667 10013 POLYGON ((-74.00616660999999 40.71911552, -74.... 8
1 79 Franklin St & W Broadway 40.719116 -74.006667 10013 POLYGON ((-74.00616660999999 40.71911552, -74.... 9

In [17]:
# Find average of street quality ratings for each station
stations_st_quality = stations_st_quality.groupby(['Station_id', 'Station_Name', 'Latitude', 'Longitude']).mean()
stations_st_quality.reset_index(inplace=True)
stations_st_quality.head()


Out[17]:
Station_id Station_Name Latitude Longitude Rating_B
0 72 W 52 St & 11 Ave 40.767272 -73.993929 8.000000
1 79 Franklin St & W Broadway 40.719116 -74.006667 8.571429
2 82 St James Pl & Pearl St 40.711174 -74.000165 7.333333
3 83 Atlantic Ave & Fort Greene Pl 40.683826 -73.976323 7.500000
4 116 W 17 St & 8 Ave 40.741776 -74.001497 8.500000

In [18]:
# Check number of stations after merge
len(stations_st_quality)


Out[18]:
604

In [19]:
# Output merged file to csv
stations_st_quality.drop(['Station_Name', 'Latitude', 'Longitude'], axis=1, inplace=True)
# Save to CSV
stations_st_quality.to_csv("../data/processed/street-assessment.csv")

In [ ]: