Main File - Tract Level

This file is used for getting and acquiring census data at the tract level.



In [15]:

    
import os.path
from census import Census
from us import states
import requests
import geopandas as gpd
import pandas as pd
import zipfile

# Specify state and county to download (select one)
loc_name, state_codes, county_codes = "maryland", states.MD.fips, None
loc_name, state_codes, county_codes = "delmarva", [states.MD.fips, states.DE.fips, states.VA.fips], None

if county_codes is not None:
    county_list = ["{:03d}".format(county_id) for county_id in county_codes]
else:
    county_list = None

# CENSUS API Stuff
CENSUS_API = #YourAPIKeyHere
c = Census(CENSUS_API) # Initialize census class with API key

# Generate codes for census variables of interest
var_ids = ["B19001_0{:02d}E".format(x) for x in range(2, 18)] # Household income over 12 months

# TIGER Stuff
TIGER_BASE_URL = 'http://www2.census.gov/geo/tiger/TIGER2013/'
TIGER_TRACT_DIR = 'TRACT/'
TIGER_BLOCKGROUP_DIR = 'BG/'

TIGER_WATER_DIR = 'AREAWATER/'

tiger_zip_file = 'tl_2013_{0}_tract.zip'.format(state_code)
tiger_shape_file = 'tl_2013_{0}_tract.shp'.format(state_code)

FULL_TIGER_URL = TIGER_BASE_URL + TIGER_TRACT_DIR + tiger_zip_file

# Local Storage Parameters
LOCAL_DATA_DIR = './data/'
GEO_SUB_DIR = 'geo/'

ATTR_FILE_END = '_census_data.csv'
attr_outfile = LOCAL_DATA_DIR + loc_name + ATTR_FILE_END

GEO_FILE_END = '_geo_data.json'
geo_outfile = LOCAL_DATA_DIR + loc_name + GEO_FILE_END



In [4]:

    
census_data = c.acs.get(var_ids, {'for': 'tract:*', 'in': 'state:{0}'.format(state_code)})
census_df = pd.DataFrame(census_data)



In [5]:

    
census_df.head()









    Out[5]:






  
    
      
      B19001_002E
      B19001_003E
      B19001_004E
      B19001_005E
      B19001_006E
      B19001_007E
      B19001_008E
      B19001_009E
      B19001_010E
      B19001_011E
      B19001_012E
      B19001_013E
      B19001_014E
      B19001_015E
      B19001_016E
      B19001_017E
      county
      state
      tract
    
  
  
    
      0
      101
      97
      81
      105
      76
      126
      67
      51
      80
      202
      136
      159
      113
      18
      8
      0
      001
      24
      000100
    
    
      1
      94
      80
      127
      36
      60
      58
      37
      73
      44
      122
      95
      138
      119
      32
      40
      17
      001
      24
      000200
    
    
      2
      86
      167
      37
      103
      51
      213
      72
      38
      12
      76
      109
      118
      39
      22
      10
      7
      001
      24
      000300
    
    
      3
      87
      77
      95
      53
      108
      89
      89
      55
      87
      90
      80
      95
      157
      64
      14
      18
      001
      24
      000400
    
    
      4
      298
      157
      75
      41
      63
      48
      42
      6
      43
      93
      40
      35
      37
      36
      8
      0
      001
      24
      000500

Get census (attribute) data



In [16]:

    
def build_bg_fips(record):
    fips_code = record['state'] + record['county'] + record['tract'] + record['block group']
    return str(fips_code)

def build_tract_fips(record):
    fips_code = record['state'] + record['county'] + record['tract']
    return str(fips_code)


def census_bg_to_dataframe(var_list, state_code, county_codes):
    fips_codes = []
    all_records = []
    
    for county in county_codes:        
        census_data = c.acs.get(var_list, {'for': 'tract:*', 'in': 'state:{0}'.format(state_code)})
        
        for idx, record in enumerate(census_data):
            # Build fips codes
            fips_code = build_bg_fips(record)
            census_data[idx]["fips"] = fips_code

            # Eliminate original code components
            key_list = ['state', 'county', 'tract', 'block group']
            for key in key_list:
                if key in census_data[idx]: 
                    del census_data[idx][key]
        
        all_records.extend(census_data)
        
    census_df = pd.DataFrame(all_records)
    census_df = census_df.set_index("fips")
                
    return census_df

def census_tracts_to_dataframe(var_list, state_codes):
    fips_codes = []
    all_records = []
    
    for state_id in state_codes:
        census_data = c.acs.get(var_list, {'for': 'tract:*', 'in': 'state:{0}'.format(state_id)})

        for idx, record in enumerate(census_data):

            # Build fips codes
            fips_code = build_tract_fips(record)
            census_data[idx]["fips"] = fips_code

            # Eliminate original code components
            key_list = ['state', 'county', 'tract']
            for key in key_list:
                if key in census_data[idx]: 
                    del census_data[idx][key]
        
        all_records.extend(census_data)
      
    census_df = pd.DataFrame(all_records)
    census_df = census_df.set_index("fips")
                
    return census_df



In [17]:

    
# This segment of code will get household income estimates for each block group in Baltimore city
census_df = census_tracts_to_dataframe(var_ids, state_codes)
census_df.to_csv(attr_outfile)

Get TIGER (shape) data



In [19]:

    
for state_id in state_codes:
    tiger_zip_file = 'tl_2013_{0}_tract.zip'.format(state_id)

    FULL_TIGER_URL = TIGER_BASE_URL + TIGER_TRACT_DIR + tiger_zip_file

    # Check if file is in directory, else download it
    if os.path.isfile(LOCAL_DATA_DIR + GEO_SUB_DIR + tiger_zip_file):
        print("Already had the file.  Great.")
    else:
        r = requests.get(FULL_TIGER_URL)

        if r.status_code == requests.codes.ok:
            print("Got the file! Copying to disk.")
            with open(LOCAL_DATA_DIR + GEO_SUB_DIR + tiger_zip_file, "wb") as f:
                f.write(r.content)
        else:
            print("Something went wrong. Status code: ".format(r.status_code))









    



Already had the file.  Great.
Got the file! Copying to disk.
Got the file! Copying to disk.

Trim shape data to match attributes



In [23]:

    
state_shapes = []
for idx, state_id in enumerate(state_codes):
    tiger_zip_file = 'tl_2013_{0}_tract.zip'.format(state_id)
    tiger_shape_file = 'tl_2013_{0}_tract.shp'.format(state_id)

    # Unzip file, extract contents
    zfile = zipfile.ZipFile(LOCAL_DATA_DIR + GEO_SUB_DIR + tiger_zip_file)
    zfile.extractall(LOCAL_DATA_DIR + GEO_SUB_DIR)

    # Load to GeoDataFrame
    state_shape = gpd.GeoDataFrame.from_file(LOCAL_DATA_DIR + GEO_SUB_DIR + tiger_shape_file)
    
    state_shapes.append(state_shape)
    
    # Only keep counties that we are interested in
    if county_list is not None:
        shapes = shapes[shapes["COUNTYFP"].isin(county_list)]

shapes = gpd.GeoDataFrame( pd.concat(state_shapes, ignore_index=True) )

Eliminate unneeded attributes, export shapes to geojson



In [27]:

    
small_shapes = gpd.GeoDataFrame()
small_shapes["geometry"] = shapes["geometry"].simplify(tolerance=0.001) # Simplify geometry to reduce file size
small_shapes["fips"] = shapes["GEOID"]
small_shapes = small_shapes.set_index("fips")
small_json = small_shapes.to_json()

# Write to file
with open(geo_outfile, 'w') as f:
    f.write(small_json)

	B19001_002E	B19001_003E	B19001_004E	B19001_005E	B19001_006E	B19001_007E	B19001_008E	B19001_009E	B19001_010E	B19001_011E	B19001_012E	B19001_013E	B19001_014E	B19001_015E	B19001_016E	B19001_017E	county	state	tract
0	101	97	81	105	76	126	67	51	80	202	136	159	113	18	8	0	001	24	000100
1	94	80	127	36	60	58	37	73	44	122	95	138	119	32	40	17	001	24	000200
2	86	167	37	103	51	213	72	38	12	76	109	118	39	22	10	7	001	24	000300
3	87	77	95	53	108	89	89	55	87	90	80	95	157	64	14	18	001	24	000400
4	298	157	75	41	63	48	42	6	43	93	40	35	37	36	8	0	001	24	000500