In [15]:
import os.path
from census import Census
from us import states
import requests
import geopandas as gpd
import pandas as pd
import zipfile
# Specify state and county to download (select one)
loc_name, state_codes, county_codes = "maryland", states.MD.fips, None
loc_name, state_codes, county_codes = "delmarva", [states.MD.fips, states.DE.fips, states.VA.fips], None
if county_codes is not None:
county_list = ["{:03d}".format(county_id) for county_id in county_codes]
else:
county_list = None
# CENSUS API Stuff
CENSUS_API = #YourAPIKeyHere
c = Census(CENSUS_API) # Initialize census class with API key
# Generate codes for census variables of interest
var_ids = ["B19001_0{:02d}E".format(x) for x in range(2, 18)] # Household income over 12 months
# TIGER Stuff
TIGER_BASE_URL = 'http://www2.census.gov/geo/tiger/TIGER2013/'
TIGER_TRACT_DIR = 'TRACT/'
TIGER_BLOCKGROUP_DIR = 'BG/'
TIGER_WATER_DIR = 'AREAWATER/'
tiger_zip_file = 'tl_2013_{0}_tract.zip'.format(state_code)
tiger_shape_file = 'tl_2013_{0}_tract.shp'.format(state_code)
FULL_TIGER_URL = TIGER_BASE_URL + TIGER_TRACT_DIR + tiger_zip_file
# Local Storage Parameters
LOCAL_DATA_DIR = './data/'
GEO_SUB_DIR = 'geo/'
ATTR_FILE_END = '_census_data.csv'
attr_outfile = LOCAL_DATA_DIR + loc_name + ATTR_FILE_END
GEO_FILE_END = '_geo_data.json'
geo_outfile = LOCAL_DATA_DIR + loc_name + GEO_FILE_END
In [4]:
census_data = c.acs.get(var_ids, {'for': 'tract:*', 'in': 'state:{0}'.format(state_code)})
census_df = pd.DataFrame(census_data)
In [5]:
census_df.head()
Out[5]:
In [16]:
def build_bg_fips(record):
fips_code = record['state'] + record['county'] + record['tract'] + record['block group']
return str(fips_code)
def build_tract_fips(record):
fips_code = record['state'] + record['county'] + record['tract']
return str(fips_code)
def census_bg_to_dataframe(var_list, state_code, county_codes):
fips_codes = []
all_records = []
for county in county_codes:
census_data = c.acs.get(var_list, {'for': 'tract:*', 'in': 'state:{0}'.format(state_code)})
for idx, record in enumerate(census_data):
# Build fips codes
fips_code = build_bg_fips(record)
census_data[idx]["fips"] = fips_code
# Eliminate original code components
key_list = ['state', 'county', 'tract', 'block group']
for key in key_list:
if key in census_data[idx]:
del census_data[idx][key]
all_records.extend(census_data)
census_df = pd.DataFrame(all_records)
census_df = census_df.set_index("fips")
return census_df
def census_tracts_to_dataframe(var_list, state_codes):
fips_codes = []
all_records = []
for state_id in state_codes:
census_data = c.acs.get(var_list, {'for': 'tract:*', 'in': 'state:{0}'.format(state_id)})
for idx, record in enumerate(census_data):
# Build fips codes
fips_code = build_tract_fips(record)
census_data[idx]["fips"] = fips_code
# Eliminate original code components
key_list = ['state', 'county', 'tract']
for key in key_list:
if key in census_data[idx]:
del census_data[idx][key]
all_records.extend(census_data)
census_df = pd.DataFrame(all_records)
census_df = census_df.set_index("fips")
return census_df
In [17]:
# This segment of code will get household income estimates for each block group in Baltimore city
census_df = census_tracts_to_dataframe(var_ids, state_codes)
census_df.to_csv(attr_outfile)
In [19]:
for state_id in state_codes:
tiger_zip_file = 'tl_2013_{0}_tract.zip'.format(state_id)
FULL_TIGER_URL = TIGER_BASE_URL + TIGER_TRACT_DIR + tiger_zip_file
# Check if file is in directory, else download it
if os.path.isfile(LOCAL_DATA_DIR + GEO_SUB_DIR + tiger_zip_file):
print("Already had the file. Great.")
else:
r = requests.get(FULL_TIGER_URL)
if r.status_code == requests.codes.ok:
print("Got the file! Copying to disk.")
with open(LOCAL_DATA_DIR + GEO_SUB_DIR + tiger_zip_file, "wb") as f:
f.write(r.content)
else:
print("Something went wrong. Status code: ".format(r.status_code))
In [23]:
state_shapes = []
for idx, state_id in enumerate(state_codes):
tiger_zip_file = 'tl_2013_{0}_tract.zip'.format(state_id)
tiger_shape_file = 'tl_2013_{0}_tract.shp'.format(state_id)
# Unzip file, extract contents
zfile = zipfile.ZipFile(LOCAL_DATA_DIR + GEO_SUB_DIR + tiger_zip_file)
zfile.extractall(LOCAL_DATA_DIR + GEO_SUB_DIR)
# Load to GeoDataFrame
state_shape = gpd.GeoDataFrame.from_file(LOCAL_DATA_DIR + GEO_SUB_DIR + tiger_shape_file)
state_shapes.append(state_shape)
# Only keep counties that we are interested in
if county_list is not None:
shapes = shapes[shapes["COUNTYFP"].isin(county_list)]
shapes = gpd.GeoDataFrame( pd.concat(state_shapes, ignore_index=True) )
In [27]:
small_shapes = gpd.GeoDataFrame()
small_shapes["geometry"] = shapes["geometry"].simplify(tolerance=0.001) # Simplify geometry to reduce file size
small_shapes["fips"] = shapes["GEOID"]
small_shapes = small_shapes.set_index("fips")
small_json = small_shapes.to_json()
# Write to file
with open(geo_outfile, 'w') as f:
f.write(small_json)