In [1]:
import geopandas as gp
import pandas as pd
import numpy as np
import os
from shapely.geometry import Point
Borough Code:
1 - Manhattan
2 - Bronx
3 - Brooklyn
4 - Queens
5 - Staten Island
In [3]:
#Downloading census data with geo_location
#https://data.cityofnewyork.us/api/geospatial/fxpq-c8ku?method=export&format=GeoJSON
census = gp.read_file('../data/external/census-tracts.geojson')
census.head()
Out[3]:
In [4]:
#Converting census tract data into a numeric value
census['census_tract'] = pd.to_numeric(census['ct_2010'])
census['boro_code'] = pd.to_numeric(census.boro_code)
In [5]:
census.columns
Out[5]:
In [6]:
#Dropping extraneous data
census.drop([u'boro_ct_2010', u'boro_name', u'cdeligibil',
u'ct_2010', u'ctlabel', u'ntacode', u'ntaname', u'puma'], inplace = True, axis = 1)
census.head()
Out[6]:
In [7]:
#Checking size
census.shape
Out[7]:
In [9]:
#Downloading census tract data with population
pop_by_census = pd.read_csv('../data/external/nyc-population-census.csv')
pop_by_census.head()
Out[9]:
In [10]:
#Taking census by 2010 population
pop_by_census = pop_by_census[pop_by_census.Year == 2010]
total_pop = pop_by_census['Population'].sum()
pop_by_census['density'] = pop_by_census['Population']/total_pop
pop_by_census.head()
Out[10]:
In [11]:
pop_by_census.drop([u'Borough', u'Year', u'FIPS County Code'], axis = 1, inplace = True)
pop_by_census.head()
Out[11]:
In [12]:
#Renaming columns
pop_by_census.rename(columns = { 'DCP Borough Code' : 'boro_code', 'Census Tract': 'census_tract' }, inplace= True)
# pop_by_census.head()
pop_by_census.shape
Out[12]:
In [13]:
#Merging population with geo_file
census_pop = pd.merge(pop_by_census, census, on = ['census_tract', 'boro_code'], how = 'inner')
census_pop.crs = {'init' :'epsg:4326'}
census_pop.head()
Out[13]:
In [16]:
# Importing CitiBike Data
stations = pd.read_csv('../data/processed/stations.csv')
stations.head()
Out[16]:
In [ ]:
# ### Point sjoin
# geometry = gp.GeoSeries([Point(xy) for xy in zip(stations.Longitude, stations.Latitude)])
# point_stations = gp.GeoDataFrame(stations, geometry=geometry)
# point_stations.crs = {'init' :'epsg:4326'}
# point_stations.to_file('geo_stations')
# point_citibike_popdensity = gp.sjoin(point_stations, census_pop, how = 'inner', op = 'intersects')
# point_citibike_popdensity.head()
In [17]:
#Creating Buffer
geometry = gp.GeoSeries([Point(xy) for xy in zip(stations.Longitude, stations.Latitude)])
geometry = geometry.buffer(.0005) # Using buffer of 0.0005
geo_stations = gp.GeoDataFrame(stations, geometry=geometry)
geo_stations.crs = {'init' :'epsg:4326'}
geo_stations.head()
Out[17]:
In [18]:
citibike_popdensity = gp.sjoin(geo_stations, census_pop, how = 'inner', op = 'intersects')
citibike_popdensity.head()
Out[18]:
In [19]:
#Checking Boroughs where citibike stands are placed
citibike_popdensity.boro_code.unique()
Out[19]:
In [20]:
citibike_popdensity.columns
Out[20]:
In [21]:
#Dropping data
citibike_popdensity.drop([u'Location', u'Latitude', u'Longitude', u'index_right' ], axis=1, inplace=True)
citibike_popdensity.head()
Out[21]:
In [22]:
citibike_popdensity.shape
Out[22]:
In [23]:
# Count of citibike stations
(citibike_popdensity.groupby(['Station_id'])[['density']].count()).head(10)
Out[23]:
In [24]:
#Calculating population density around a citibike stand by grouing the data
#Using mean to calculate the average of citibike stops intersecting 3 buffers
grouped_data = citibike_popdensity.groupby(['Station_id', 'boro_code'])[['density']].mean()
grouped_data.reset_index(inplace=True)
grouped_data.head()
Out[24]:
In [25]:
grouped_data.to_csv('../data/processed/pop-density.csv')
In [ ]:
# #Merging with census tract data (0.0005 Buffer)
# merged_citibike_avgdensity = pd.merge(grouped_data, point_citibike_popdensity, on = 'Station_id', how = 'inner' )
# merged_citibike_avgdensity.head()
In [ ]:
# #Merging with census tract data (Point)
# merged_citibike_avgdensity = pd.merge(grouped_data, citibike_popdensity, on = 'Station_id', how = 'inner' )
# merged_citibike_avgdensity.head()
In [ ]:
# merged_citibike_avgdensity.columns
In [ ]:
# # For buffer = 0.0005
# merged_citibike_avgdensity.drop([ u'Station_Name', u'geometry', u'boro_code_y', u'Population', \
# u'density', u'shape_area', u'shape_leng', 'census_tract'], axis = 1, inplace = True)
In [ ]:
#Dropping unnecessary data
# # For point buffer
# merged_citibike_avgdensity.drop([ u'Station_Name', u'geometry', u'boro_code_y', u'Population', \
# u'density', u'shape_area', u'shape_leng', u'Latitude', u'Longitude', \
# u'index_right', u'Location', 'census_tract'], axis = 1, inplace = True)
In [ ]:
# merged_citibike_avgdensity.head()
In [ ]:
# #Average population density per citibike stand
# merged_citibike_avgdensity.rename(columns={'boro_code_x' : 'Borough_code'}, inplace = True)
# merged_citibike_avgdensity.head()
In [ ]:
# merged_citibike_avgdensity = merged_citibike_avgdensity.groupby(['Station_id']).mean()
# merged_citibike_avgdensity.reset_index(inplace=True)
In [ ]:
# merged_citibike_avgdensity.head()