In [2]:
import numpy as np
import pandas as pd
import geopandas as gp
import os
from shapely.geometry import Point
In [11]:
#Importing park properties
#https://data.cityofnewyork.us/api/geospatial/rjaj-zgq7?method=export&format=GeoJSON
parks = gp.read_file('../data/external/parks.geojson')
parks = parks.to_crs({'init' :'epsg:4326'})
parks.head()
Out[11]:
In [4]:
parks.drop([u'acres', u'address', u'borough', u'communityb',
u'councildis', u'gispropnum',
u'nys_assemb', u'nys_senate', u'typecatego',
u'us_congres', u'waterfront', u'zipcode'], axis = 1, inplace = True)
In [5]:
# Importing CitiBike Data
stations = pd.read_csv('../data/processed/stations.csv')
stations = stations.dropna()
stations.drop(['Station_Name'], axis= 1, inplace=True)
stations.head()
Out[5]:
In [6]:
#Creating a buffer of 0.0005
geometry = gp.GeoSeries([Point(xy) for xy in zip(stations.Longitude, stations.Latitude)])
geometry = geometry.buffer(.0005) # Using buffer of 0.0005
geo_stations = gp.GeoDataFrame(stations, geometry=geometry)
geo_stations.crs = {'init' :'epsg:4326'}
geo_stations.to_file('../data/interim/geo_stations')
geo_stations.head()
Out[6]:
In [7]:
#Merging datasets
bike_park = gp.sjoin(geo_stations, parks, how = 'inner', op = 'intersects')
bike_park.head()
Out[7]:
In [13]:
# Create map data
parks.iloc[bike_park.index_right.unique(), :].to_csv("../data/map/parks.csv")
In [8]:
#Grouping data
bike_park = bike_park.groupby(['Station_id' , 'Location', 'location', 'signname']).count()
bike_park.head()
Out[8]:
In [9]:
bike_park.drop(['Latitude', 'Longitude', 'geometry', 'index_right'], axis = 1, inplace=True)
bike_park.head()
Out[9]:
In [10]:
# Creating index 1 for all CitiBike stations in proximity to subway entrances
bike_park = bike_park.reset_index()
bike_park.rename(columns={'Location' : 'citibike_location', 'location' : 'park_location'}, inplace=True)
bike_park['park'] = 1
bike_park.head()
Out[10]:
In [11]:
# Save to CSV
bike_park.to_csv('../data/processed/parks.csv')