In [5]:
import numpy as np
import pandas as pd
import geopandas as gp
import os
from shapely.geometry import Point
In [6]:
# Importing CitiBike Data
stations = pd.read_csv('../data/processed/stations.csv')
stations = stations.dropna()
geometry = gp.GeoSeries([Point(xy) for xy in zip(stations.Longitude, stations.Latitude)])
geometry = geometry.buffer(.0005) # Using buffer of 0.0005
geo_stations = gp.GeoDataFrame(stations, geometry=geometry)
geo_stations.crs = {'init' :'epsg:4326'}
geo_stations.to_file('../data/interim/geo_stations')
geo_stations.head()
Out[6]:
In [7]:
# Importing Subway Entrance Data
# Imported geojson file from https://data.cityofnewyork.us/api/geospatial/drex-xx56?method=export&format=GeoJSON
subway_data = gp.read_file('../data/external/subway-entrances.geojson')
subway_data = subway_data[['geometry', 'line', 'name']]
subway_data = subway_data.dropna()
subway_data = subway_data.to_crs({'init' :'epsg:4326'})
subway_data.head()
Out[7]:
In [8]:
# Creating an inner join between CitiBike data and subway entrance data
aaron = gp.sjoin(geo_stations, subway_data, how = 'inner', op = 'intersects')
aaron.head()
Out[8]:
In [12]:
# Save for map
subway_data.iloc[aaron.index_right.unique(), :].to_csv("../data/map/subway-entrances.csv")
In [5]:
# Grouping the important columns viz. CitiBike station id, Street Location and the subway line in proximity
a = aaron.groupby(['Station_id' , 'Location', 'line']).count()
a.head()
Out[5]:
In [6]:
# Droping extraneous columns
a.drop(['Station_Name', 'Latitude', 'Longitude', 'geometry', 'index_right', 'name'], axis =1, inplace = True)
a.head()
Out[6]:
In [7]:
# Creating index 1 for all CitiBike stations in proximity to subway entrances
a = a.reset_index()
a['subway_entrance'] = 1
a.head()
Out[7]:
In [8]:
# Save to CSV
a.to_csv('../data/processed/subway-entrances.csv')