In [1]:
import time, pandas as pd, requests
In [2]:
def census_geocode(lon, lat, benchmark='Public_AR_Current', vintage='Current_Current',
output_format='json', layers='Census Tracts'):
url = ('https://geocoding.geo.census.gov/geocoder/geographies/coordinates?'
'benchmark={benchmark}&vintage={vintage}&x={lon}&y={lat}&format={output_format}&layers={layers}')
prepared_url = url.format(benchmark=benchmark, vintage=vintage, lon=lon, lat=lat,
output_format=output_format, layers=layers)
response = requests.get(prepared_url)
return response
In [3]:
def geocode_row(row):
response = census_geocode(lon=row['longitude'], lat=row['latitude'])
result = response.json()['result']
tract = result['geographies']['Census Tracts'][0]
return pd.Series(tract)
In [4]:
# load the dataset of rental listings
listings = pd.read_csv('data/listings.csv')
In [5]:
# reverse geocode each listing's lat-long to tract
tracts = listings.apply(lambda row: geocode_row(row), axis=1)
In [6]:
# look at the tracts we got back
tracts[['GEOID', 'AREALAND', 'AREAWATER']].head()
Out[6]:
In [7]:
# merge the listings with the tracts' geoids
geoids = pd.DataFrame(tracts['GEOID'])
listings_geoids = pd.concat([listings, geoids], axis=1)
listings_geoids.head()
Out[7]:
In [ ]: