In [2]:
import census_api_util as api
from sqlalchemy import create_engine
import dbconfig
import pandas as pd
#test query_census_api function

In [3]:
engine = create_engine('postgresql://{conf.user}:{conf.password}@{conf.host}:5432/{conf.database}'.format(conf=dbconfig))

Look up columns for all tracts/block groups from 2010 decimal census (bunched in groups of 20 due to api quatas)


In [4]:
census_20 = api.query_census_api('census',39, '061','*','*',['H0030001',	'H0030002',	'H0030003',	'H0040002',	'H0040004',	'H0050002',	'H0060002',	'H0060003',	'H0060004',	'H0060005',	'H0060006',	'H0060007',	'H0060008',	'H0100001',	'H0130002',	'H0130003',	'H0130004',	'H0130005',	'H0130006',	'H0130007'], 2010, 'a6e317918af5d4097d792cabd992f41f2691b75e', verbose=True)
census_40 = api.query_census_api('census',39, '061','*','*',['H0130008',	'H0140002',	'H0140003',	'H0140004',	'H0140005',	'H0140006',	'H0140007',	'H0140008',	'H0140009',	'H0140010',	'H0140011',	'H0140012',	'H0140013',	'H0140014',	'H0140015',	'H0140016',	'H0140017',	'H0150001',	'H0150002',	'H0150003'], 2010, 'a6e317918af5d4097d792cabd992f41f2691b75e', verbose=True)
census_60 = api.query_census_api('census',39, '061','*','*',['H0150004',	'H0150005',	'H0150006',	'H0150007',	'H0190003',	'H0190004',	'H0190005',	'H0190006',	'H0190007',	'H0200002',	'H0200003',	'P0010001',	'P0030002',	'P0030003',	'P0030004',	'P0030005',	'P0030006',	'P0030007',	'P0030008',	'P0110001'], 2010, 'a6e317918af5d4097d792cabd992f41f2691b75e', verbose=True)
census_80 = api.query_census_api('census',39, '061','*','*',['P0120003',	'P0120004',	'P0120005',	'P0120006',	'P0120007',	'P0120008',	'P0120009',	'P0120010',	'P0120011',	'P0120012',	'P0120013',	'P0120014',	'P0120015',	'P0120016',	'P0120017',	'P0120018',	'P0120019',	'P0120020',	'P0120021',	'P0120022'], 2010, 'a6e317918af5d4097d792cabd992f41f2691b75e', verbose=True)
census_100 = api.query_census_api('census',39, '061','*','*',['P0120023',	'P0120024',	'P0120025',	'P0120026',	'P0120027',	'P0120028',	'P0120029',	'P0120030',	'P0120031',	'P0120032',	'P0120033',	'P0120034',	'P0120035',	'P0120036',	'P0120037',	'P0120038',	'P0120039',	'P0120040',	'P0120041',	'P0120042'], 2010, 'a6e317918af5d4097d792cabd992f41f2691b75e', verbose=True)
census_111 = api.query_census_api('census',39, '061','*','*',['P0120043',	'P0120044',	'P0120045',	'P0120046',	'P0120047',	'P0120048',	'P0120049',	'P0180001',	'P0180002',	'P0180003',	'P0180004'], 2010, 'a6e317918af5d4097d792cabd992f41f2691b75e', verbose=True)


API URL:
http://api.census.gov/data/2010/sf1?get=H0030001,H0030002,H0030003,H0040002,H0040004,H0050002,H0060002,H0060003,H0060004,H0060005,H0060006,H0060007,H0060008,H0100001,H0130002,H0130003,H0130004,H0130005,H0130006,H0130007&for=block+group:*&in=state:39+county:061&key=a6e317918af5d4097d792cabd992f41f2691b75e

API URL:
http://api.census.gov/data/2010/sf1?get=H0130008,H0140002,H0140003,H0140004,H0140005,H0140006,H0140007,H0140008,H0140009,H0140010,H0140011,H0140012,H0140013,H0140014,H0140015,H0140016,H0140017,H0150001,H0150002,H0150003&for=block+group:*&in=state:39+county:061&key=a6e317918af5d4097d792cabd992f41f2691b75e

API URL:
http://api.census.gov/data/2010/sf1?get=H0150004,H0150005,H0150006,H0150007,H0190003,H0190004,H0190005,H0190006,H0190007,H0200002,H0200003,P0010001,P0030002,P0030003,P0030004,P0030005,P0030006,P0030007,P0030008,P0110001&for=block+group:*&in=state:39+county:061&key=a6e317918af5d4097d792cabd992f41f2691b75e

API URL:
http://api.census.gov/data/2010/sf1?get=P0120003,P0120004,P0120005,P0120006,P0120007,P0120008,P0120009,P0120010,P0120011,P0120012,P0120013,P0120014,P0120015,P0120016,P0120017,P0120018,P0120019,P0120020,P0120021,P0120022&for=block+group:*&in=state:39+county:061&key=a6e317918af5d4097d792cabd992f41f2691b75e

API URL:
http://api.census.gov/data/2010/sf1?get=P0120023,P0120024,P0120025,P0120026,P0120027,P0120028,P0120029,P0120030,P0120031,P0120032,P0120033,P0120034,P0120035,P0120036,P0120037,P0120038,P0120039,P0120040,P0120041,P0120042&for=block+group:*&in=state:39+county:061&key=a6e317918af5d4097d792cabd992f41f2691b75e

API URL:
http://api.census.gov/data/2010/sf1?get=P0120043,P0120044,P0120045,P0120046,P0120047,P0120048,P0120049,P0180001,P0180002,P0180003,P0180004&for=block+group:*&in=state:39+county:061&key=a6e317918af5d4097d792cabd992f41f2691b75e

In [5]:
#look up same values from 2000 census
census_00_20 = api.query_census_api('census',39, '061','*','*',['H003001'], 2000, 'a6e317918af5d4097d792cabd992f41f2691b75e', verbose=True)


API URL:
http://api.census.gov/data/2000/sf1?get=H003001&for=block+group:*&in=state:39+county:061&key=a6e317918af5d4097d792cabd992f41f2691b75e

merge all dataframes to one based on pairs of tracts and block groups and drop 'state' and 'county' columns


In [5]:
census_df = [census_20, census_40, census_60, census_80, census_100, census_111]

In [6]:
census_df_clean = [df.drop(['state', 'county'], axis=1) for df in census_df]

In [7]:
census_df_clean[0].head()


Out[7]:
H0030001 H0030002 H0030003 H0040002 H0040004 H0050002 H0060002 H0060003 H0060004 H0060005 ... H0060008 H0100001 H0130002 H0130003 H0130004 H0130005 H0130006 H0130007 tract block group
0 576 516 60 1 513 50 9 491 1 3 ... 11 1053 230 137 91 34 10 10 000200 1
1 142 94 48 0 94 16 77 11 0 3 ... 2 150 53 31 6 3 1 0 000700 1
2 1182 974 208 28 944 119 726 173 0 53 ... 13 1260 738 206 21 4 2 1 000700 2
3 607 512 95 39 468 27 307 178 1 13 ... 7 601 440 62 6 1 3 0 000700 3
4 684 148 536 11 136 43 42 104 1 1 ... 0 268 72 46 20 7 2 1 000900 1

5 rows × 22 columns


In [8]:
census_pop_housing = reduce(lambda left,right: pd.merge(left,right,on=['tract','block group']), census_df_clean)

In [21]:
census_pop_housing.rename(columns={'block group': 'block_group'}, inplace=True)

In [24]:
census_pop_housing.to_sql('census_pop_housing', engine, schema='shape_files', if_exists='replace')