Note: I did not make changes to the Pull data file



In [1]:

    
import geopandas as gp
import pandas as pd
import numpy as np
import os
from shapely.geometry import Point

Borough Code:

1 - Manhattan

2 - Bronx

3 - Brooklyn

4 - Queens

5 - Staten Island



In [3]:

    
#Downloading census data with geo_location
#https://data.cityofnewyork.us/api/geospatial/fxpq-c8ku?method=export&format=GeoJSON
census = gp.read_file('../data/external/census-tracts.geojson')
census.head()









    Out[3]:






  
    
      
      boro_code
      boro_ct_2010
      boro_name
      cdeligibil
      ct_2010
      ctlabel
      geometry
      ntacode
      ntaname
      puma
      shape_area
      shape_leng
    
  
  
    
      0
      5
      5000900
      Staten Island
      I
      000900
      9
      (POLYGON ((-74.07920577013245 40.6434307837456...
      SI22
      West New Brighton-New Brighton-St. George
      3903
      2497009.69813
      7729.01679376
    
    
      1
      5
      5007400
      Staten Island
      I
      007400
      74
      (POLYGON ((-74.05974734759452 40.5938486115672...
      SI14
      Grasmere-Arrochar-Ft. Wadsworth
      3902
      5788237.79601
      9902.94847281
    
    
      2
      1
      1003200
      Manhattan
      I
      003200
      32
      (POLYGON ((-73.97990650235904 40.7268657730023...
      MN22
      East Village
      3809
      2334190.23228
      6358.38668446
    
    
      3
      1
      1009800
      Manhattan
      I
      009800
      98
      (POLYGON ((-73.96432543478758 40.7563815309909...
      MN19
      Turtle Bay-East Midtown
      3808
      1906016.35002
      5534.19981063
    
    
      4
      1
      1010000
      Manhattan
      I
      010000
      100
      (POLYGON ((-73.96802436915851 40.7595781400528...
      MN19
      Turtle Bay-East Midtown
      3808
      1860938.37721
      5692.16873705



In [4]:

    
#Converting census tract data into a numeric value
census['census_tract'] = pd.to_numeric(census['ct_2010'])
census['boro_code'] = pd.to_numeric(census.boro_code)



In [5]:

    
census.columns









    Out[5]:





Index(['boro_code', 'boro_ct_2010', 'boro_name', 'cdeligibil', 'ct_2010',
       'ctlabel', 'geometry', 'ntacode', 'ntaname', 'puma', 'shape_area',
       'shape_leng', 'census_tract'],
      dtype='object')



In [6]:

    
#Dropping extraneous data
census.drop([u'boro_ct_2010',    u'boro_name',   u'cdeligibil',
            u'ct_2010',      u'ctlabel', u'ntacode',    u'ntaname',         u'puma'], inplace = True, axis = 1)
census.head()









    Out[6]:






  
    
      
      boro_code
      geometry
      shape_area
      shape_leng
      census_tract
    
  
  
    
      0
      5
      (POLYGON ((-74.07920577013245 40.6434307837456...
      2497009.69813
      7729.01679376
      900
    
    
      1
      5
      (POLYGON ((-74.05974734759452 40.5938486115672...
      5788237.79601
      9902.94847281
      7400
    
    
      2
      1
      (POLYGON ((-73.97990650235904 40.7268657730023...
      2334190.23228
      6358.38668446
      3200
    
    
      3
      1
      (POLYGON ((-73.96432543478758 40.7563815309909...
      1906016.35002
      5534.19981063
      9800
    
    
      4
      1
      (POLYGON ((-73.96802436915851 40.7595781400528...
      1860938.37721
      5692.16873705
      10000



In [7]:

    
#Checking size
census.shape









    Out[7]:





(2166, 5)



In [9]:

    
#Downloading census tract data with population
pop_by_census = pd.read_csv('../data/external/nyc-population-census.csv')
pop_by_census.head()









    Out[9]:






  
    
      
      Borough
      Year
      FIPS County Code
      DCP Borough Code
      Census Tract
      Population
    
  
  
    
      0
      Bronx
      2000
      5
      2
      100
      12780
    
    
      1
      Bronx
      2000
      5
      2
      200
      3545
    
    
      2
      Bronx
      2000
      5
      2
      400
      3314
    
    
      3
      Bronx
      2000
      5
      2
      1600
      5237
    
    
      4
      Bronx
      2000
      5
      2
      1900
      1584



In [10]:

    
#Taking census by 2010 population
pop_by_census = pop_by_census[pop_by_census.Year == 2010]
total_pop = pop_by_census['Population'].sum()
pop_by_census['density'] = pop_by_census['Population']/total_pop
pop_by_census.head()









    Out[10]:






  
    
      
      Borough
      Year
      FIPS County Code
      DCP Borough Code
      Census Tract
      Population
      density
    
  
  
    
      2168
      Bronx
      2010
      5
      2
      100
      11091
      0.001357
    
    
      2169
      Bronx
      2010
      5
      2
      200
      4334
      0.000530
    
    
      2170
      Bronx
      2010
      5
      2
      400
      5503
      0.000673
    
    
      2171
      Bronx
      2010
      5
      2
      1600
      5643
      0.000690
    
    
      2172
      Bronx
      2010
      5
      2
      1900
      1917
      0.000234



In [11]:

    
pop_by_census.drop([u'Borough', u'Year', u'FIPS County Code'], axis = 1, inplace = True)
pop_by_census.head()









    Out[11]:






  
    
      
      DCP Borough Code
      Census Tract
      Population
      density
    
  
  
    
      2168
      2
      100
      11091
      0.001357
    
    
      2169
      2
      200
      4334
      0.000530
    
    
      2170
      2
      400
      5503
      0.000673
    
    
      2171
      2
      1600
      5643
      0.000690
    
    
      2172
      2
      1900
      1917
      0.000234



In [12]:

    
#Renaming columns
pop_by_census.rename(columns = { 'DCP Borough Code' : 'boro_code', 'Census Tract': 'census_tract' }, inplace= True)
# pop_by_census.head()
pop_by_census.shape









    Out[12]:





(2168, 4)



In [13]:

    
#Merging population with geo_file
census_pop = pd.merge(pop_by_census, census, on = ['census_tract', 'boro_code'], how = 'inner')
census_pop.crs = {'init' :'epsg:4326'}
census_pop.head()









    Out[13]:






  
    
      
      boro_code
      census_tract
      Population
      density
      geometry
      shape_area
      shape_leng
    
  
  
    
      0
      2
      100
      11091
      0.001357
      (POLYGON ((-73.87287195903875 40.7859750278047...
      18154596.0081
      18903.3467294
    
    
      1
      2
      200
      4334
      0.000530
      (POLYGON ((-73.85651604030653 40.8052412204751...
      5004821.2311
      15591.2827425
    
    
      2
      2
      400
      5503
      0.000673
      (POLYGON ((-73.84610660457847 40.8130999892054...
      8562150.11049
      24707.0790039
    
    
      3
      2
      1600
      5643
      0.000690
      (POLYGON ((-73.85513639815333 40.8224361893100...
      5221330.06703
      9671.30620489
    
    
      4
      2
      1900
      1917
      0.000234
      (POLYGON ((-73.89680883223774 40.7958084451597...
      17964481.0319
      29989.8448165



In [16]:

    
# Importing CitiBike Data
stations = pd.read_csv('../data/processed/stations.csv')
stations.head()









    Out[16]:






  
    
      
      Station_id
      Station_Name
      Location
      Latitude
      Longitude
      Zip
    
  
  
    
      0
      72
      W 52 St & 11 Ave
      W 52 St & 11 Ave
      40.767272
      -73.993929
      10019
    
    
      1
      79
      Franklin St & W Broadway
      Franklin St & W Broadway
      40.719116
      -74.006667
      10013
    
    
      2
      82
      St James Pl & Pearl St
      St James Pl & Pearl St
      40.711174
      -74.000165
      10038
    
    
      3
      83
      Atlantic Ave & Fort Greene Pl
      Atlantic Ave & Fort Greene Pl
      40.683826
      -73.976323
      11217
    
    
      4
      116
      W 17 St & 8 Ave
      W 17 St & 8 Ave
      40.741776
      -74.001497
      10011



In [ ]:

    
# ### Point sjoin
# geometry = gp.GeoSeries([Point(xy) for xy in zip(stations.Longitude, stations.Latitude)])
# point_stations = gp.GeoDataFrame(stations, geometry=geometry)
# point_stations.crs = {'init' :'epsg:4326'}
# point_stations.to_file('geo_stations')
# point_citibike_popdensity = gp.sjoin(point_stations, census_pop, how = 'inner', op = 'intersects')
# point_citibike_popdensity.head()



In [17]:

    
#Creating Buffer
geometry = gp.GeoSeries([Point(xy) for xy in zip(stations.Longitude, stations.Latitude)])
geometry = geometry.buffer(.0005)  # Using buffer of 0.0005
geo_stations = gp.GeoDataFrame(stations, geometry=geometry)
geo_stations.crs = {'init' :'epsg:4326'}
geo_stations.head()









    Out[17]:






  
    
      
      Station_id
      Station_Name
      Location
      Latitude
      Longitude
      Zip
      geometry
    
  
  
    
      0
      72
      W 52 St & 11 Ave
      W 52 St & 11 Ave
      40.767272
      -73.993929
      10019
      POLYGON ((-73.99342888 40.76727216, -73.993431...
    
    
      1
      79
      Franklin St & W Broadway
      Franklin St & W Broadway
      40.719116
      -74.006667
      10013
      POLYGON ((-74.00616660999999 40.71911552, -74....
    
    
      2
      82
      St James Pl & Pearl St
      St James Pl & Pearl St
      40.711174
      -74.000165
      10038
      POLYGON ((-73.99966544999999 40.71117416, -73....
    
    
      3
      83
      Atlantic Ave & Fort Greene Pl
      Atlantic Ave & Fort Greene Pl
      40.683826
      -73.976323
      11217
      POLYGON ((-73.97582328 40.68382604, -73.975825...
    
    
      4
      116
      W 17 St & 8 Ave
      W 17 St & 8 Ave
      40.741776
      -74.001497
      10011
      POLYGON ((-74.00099745999999 40.74177603, -74....



In [18]:

    
citibike_popdensity = gp.sjoin(geo_stations, census_pop, how = 'inner', op = 'intersects')
citibike_popdensity.head()









    Out[18]:






  
    
      
      Station_id
      Station_Name
      Location
      Latitude
      Longitude
      Zip
      geometry
      index_right
      boro_code
      census_tract
      Population
      density
      shape_area
      shape_leng
    
  
  
    
      0
      72
      W 52 St & 11 Ave
      W 52 St & 11 Ave
      40.767272
      -73.993929
      10019
      POLYGON ((-73.99342888 40.76727216, -73.993431...
      1234
      1
      13500
      6596
      0.000807
      4513475.18656
      15450.2036171
    
    
      237
      480
      W 53 St & 10 Ave
      W 53 St & 10 Ave
      40.766697
      -73.990617
      10019
      POLYGON ((-73.99011727999999 40.76669671, -73....
      1234
      1
      13500
      6596
      0.000807
      4513475.18656
      15450.2036171
    
    
      267
      513
      W 56 St & 10 Ave
      W 56 St & 10 Ave
      40.768254
      -73.988639
      10019
      POLYGON ((-73.988139 40.768254, -73.9881414076...
      1234
      1
      13500
      6596
      0.000807
      4513475.18656
      15450.2036171
    
    
      1
      79
      Franklin St & W Broadway
      Franklin St & W Broadway
      40.719116
      -74.006667
      10013
      POLYGON ((-74.00616660999999 40.71911552, -74....
      1132
      1
      3300
      5156
      0.000631
      3677727.6405
      8170.75614772
    
    
      12
      146
      Hudson St & Reade St
      Hudson St & Reade St
      40.716250
      -74.009106
      10013
      POLYGON ((-74.00860589999999 40.71625008, -74....
      1132
      1
      3300
      5156
      0.000631
      3677727.6405
      8170.75614772



In [19]:

    
#Checking Boroughs where citibike stands are placed
citibike_popdensity.boro_code.unique()









    Out[19]:





array([1, 3, 4])



In [20]:

    
citibike_popdensity.columns









    Out[20]:





Index(['Station_id', 'Station_Name', 'Location', 'Latitude', 'Longitude',
       'Zip', 'geometry', 'index_right', 'boro_code', 'census_tract',
       'Population', 'density', 'shape_area', 'shape_leng'],
      dtype='object')



In [21]:

    
#Dropping data
citibike_popdensity.drop([u'Location',     u'Latitude',    u'Longitude', u'index_right' ], axis=1, inplace=True)
citibike_popdensity.head()









    Out[21]:






  
    
      
      Station_id
      Station_Name
      Zip
      geometry
      boro_code
      census_tract
      Population
      density
      shape_area
      shape_leng
    
  
  
    
      0
      72
      W 52 St & 11 Ave
      10019
      POLYGON ((-73.99342888 40.76727216, -73.993431...
      1
      13500
      6596
      0.000807
      4513475.18656
      15450.2036171
    
    
      237
      480
      W 53 St & 10 Ave
      10019
      POLYGON ((-73.99011727999999 40.76669671, -73....
      1
      13500
      6596
      0.000807
      4513475.18656
      15450.2036171
    
    
      267
      513
      W 56 St & 10 Ave
      10019
      POLYGON ((-73.988139 40.768254, -73.9881414076...
      1
      13500
      6596
      0.000807
      4513475.18656
      15450.2036171
    
    
      1
      79
      Franklin St & W Broadway
      10013
      POLYGON ((-74.00616660999999 40.71911552, -74....
      1
      3300
      5156
      0.000631
      3677727.6405
      8170.75614772
    
    
      12
      146
      Hudson St & Reade St
      10013
      POLYGON ((-74.00860589999999 40.71625008, -74....
      1
      3300
      5156
      0.000631
      3677727.6405
      8170.75614772



In [22]:

    
citibike_popdensity.shape









    Out[22]:





(1099, 10)



In [23]:

    
# Count of citibike stations 
(citibike_popdensity.groupby(['Station_id'])[['density']].count()).head(10)



In [24]:

    
#Calculating population density around a citibike stand by grouing the data
#Using mean to calculate the average of citibike stops intersecting 3 buffers
grouped_data = citibike_popdensity.groupby(['Station_id', 'boro_code'])[['density']].mean()
grouped_data.reset_index(inplace=True)
grouped_data.head()



In [25]:

    
grouped_data.to_csv('../data/processed/pop-density.csv')



In [ ]:

    
# #Merging with census tract data (0.0005 Buffer)
# merged_citibike_avgdensity = pd.merge(grouped_data, point_citibike_popdensity, on = 'Station_id', how = 'inner' )
# merged_citibike_avgdensity.head()



In [ ]:

    
# #Merging with census tract data (Point)
# merged_citibike_avgdensity = pd.merge(grouped_data, citibike_popdensity, on = 'Station_id', how = 'inner' )
# merged_citibike_avgdensity.head()



In [ ]:

    
# merged_citibike_avgdensity.columns



In [ ]:

    
# # For buffer = 0.0005
# merged_citibike_avgdensity.drop([ u'Station_Name', u'geometry',  u'boro_code_y',    u'Population', \
#             u'density',   u'shape_area',   u'shape_leng', 'census_tract'], axis = 1, inplace  = True)



In [ ]:

    
#Dropping unnecessary data
# # For point buffer
# merged_citibike_avgdensity.drop([ u'Station_Name', u'geometry',  u'boro_code_y', u'Population', \
#             u'density',   u'shape_area',   u'shape_leng', u'Latitude',    u'Longitude', \
#                             u'index_right',  u'Location', 'census_tract'], axis = 1, inplace = True)



In [ ]:

    
# merged_citibike_avgdensity.head()



In [ ]:

    
# #Average population density per citibike stand 
# merged_citibike_avgdensity.rename(columns={'boro_code_x' : 'Borough_code'}, inplace = True)
# merged_citibike_avgdensity.head()



In [ ]:

    
# merged_citibike_avgdensity = merged_citibike_avgdensity.groupby(['Station_id']).mean()
# merged_citibike_avgdensity.reset_index(inplace=True)



In [ ]:

    
# merged_citibike_avgdensity.head()

Questions:

Is census tract data a good reference dataset to calculate population density around a citibike station? Do we need a larger or smaller dataset?
Should we capture the census tract information from where the population density is calculated?
Is the population density calculated by the formula density = pop_tract/total_population a good indicator of population density?

	boro_code	boro_ct_2010	boro_name	cdeligibil	ct_2010	ctlabel	geometry	ntacode	ntaname	puma	shape_area	shape_leng
0	5	5000900	Staten Island	I	000900	9	(POLYGON ((-74.07920577013245 40.6434307837456...	SI22	West New Brighton-New Brighton-St. George	3903	2497009.69813	7729.01679376
1	5	5007400	Staten Island	I	007400	74	(POLYGON ((-74.05974734759452 40.5938486115672...	SI14	Grasmere-Arrochar-Ft. Wadsworth	3902	5788237.79601	9902.94847281
2	1	1003200	Manhattan	I	003200	32	(POLYGON ((-73.97990650235904 40.7268657730023...	MN22	East Village	3809	2334190.23228	6358.38668446
3	1	1009800	Manhattan	I	009800	98	(POLYGON ((-73.96432543478758 40.7563815309909...	MN19	Turtle Bay-East Midtown	3808	1906016.35002	5534.19981063
4	1	1010000	Manhattan	I	010000	100	(POLYGON ((-73.96802436915851 40.7595781400528...	MN19	Turtle Bay-East Midtown	3808	1860938.37721	5692.16873705

	Borough	Year	FIPS County Code	DCP Borough Code	Census Tract	Population
0	Bronx	2000	5	2	100	12780
1	Bronx	2000	5	2	200	3545
2	Bronx	2000	5	2	400	3314
3	Bronx	2000	5	2	1600	5237
4	Bronx	2000	5	2	1900	1584

	Borough	Year	FIPS County Code	DCP Borough Code	Census Tract	Population	density
2168	Bronx	2010	5	2	100	11091	0.001357
2169	Bronx	2010	5	2	200	4334	0.000530
2170	Bronx	2010	5	2	400	5503	0.000673
2171	Bronx	2010	5	2	1600	5643	0.000690
2172	Bronx	2010	5	2	1900	1917	0.000234

	boro_code	census_tract	Population	density	geometry	shape_area	shape_leng
0	2	100	11091	0.001357	(POLYGON ((-73.87287195903875 40.7859750278047...	18154596.0081	18903.3467294
1	2	200	4334	0.000530	(POLYGON ((-73.85651604030653 40.8052412204751...	5004821.2311	15591.2827425
2	2	400	5503	0.000673	(POLYGON ((-73.84610660457847 40.8130999892054...	8562150.11049	24707.0790039
3	2	1600	5643	0.000690	(POLYGON ((-73.85513639815333 40.8224361893100...	5221330.06703	9671.30620489
4	2	1900	1917	0.000234	(POLYGON ((-73.89680883223774 40.7958084451597...	17964481.0319	29989.8448165

	Station_id	Station_Name	Location	Latitude	Longitude	Zip
0	72	W 52 St & 11 Ave	W 52 St & 11 Ave	40.767272	-73.993929	10019
1	79	Franklin St & W Broadway	Franklin St & W Broadway	40.719116	-74.006667	10013
2	82	St James Pl & Pearl St	St James Pl & Pearl St	40.711174	-74.000165	10038
3	83	Atlantic Ave & Fort Greene Pl	Atlantic Ave & Fort Greene Pl	40.683826	-73.976323	11217
4	116	W 17 St & 8 Ave	W 17 St & 8 Ave	40.741776	-74.001497	10011

	Station_id	Station_Name	Zip	geometry	boro_code	census_tract	Population	density	shape_area	shape_leng
0	72	W 52 St & 11 Ave	10019	POLYGON ((-73.99342888 40.76727216, -73.993431...	1	13500	6596	0.000807	4513475.18656	15450.2036171
237	480	W 53 St & 10 Ave	10019	POLYGON ((-73.99011727999999 40.76669671, -73....	1	13500	6596	0.000807	4513475.18656	15450.2036171
267	513	W 56 St & 10 Ave	10019	POLYGON ((-73.988139 40.768254, -73.9881414076...	1	13500	6596	0.000807	4513475.18656	15450.2036171
1	79	Franklin St & W Broadway	10013	POLYGON ((-74.00616660999999 40.71911552, -74....	1	3300	5156	0.000631	3677727.6405	8170.75614772
12	146	Hudson St & Reade St	10013	POLYGON ((-74.00860589999999 40.71625008, -74....	1	3300	5156	0.000631	3677727.6405	8170.75614772

	Station_id	boro_code	density
0	72	1	0.000807
1	79	1	0.000631
2	82	1	0.000511
3	83	3	0.000231
4	116	1	0.000742