In [1]:
import pandas as pd
https://factfinder.census.gov/faces/nav/jsf/pages/searchresults.xhtml?refresh=t
The US Census generates a 'Median Household Income in the Past 12 Months (In 2014 Inflation-Adjusted Dollars)' from data collected by the American Community Survey. This report is based on 5-year estimates, from 2010-2014. The report I pulled is the most recent income-related information available by the US Census.
In [74]:
income = pd.read_excel("../data/unique/ACS_14_5YR_B19013.xls")
In [75]:
income = income.loc[8:]
income.head()
Out[75]:
In [76]:
income = income.drop(['Unnamed: 1', 'Unnamed: 2', 'Unnamed: 3'], axis=1)
In [77]:
income = income.rename(columns={'B19013: MEDIAN HOUSEHOLD INCOME IN THE PAST 12 MONTHS (IN 2014 INFLATION-ADJUSTED DOLLARS) - Universe: Households': 'Zip_Code', 'Unnamed: 4': 'Median_Househould_Income', '$b': 'b'})
In [78]:
zips = []
for elem in income['Zip_Code']:
zips.append(str(elem))
In [79]:
zips2 = []
for elem in zips:
zips2.append(elem[6:])
In [80]:
income['Zip_Code'] = zips2
income['Zip_Code'] = pd.to_numeric(income['Zip_Code'])
In [81]:
income.head()
Out[81]:
In [82]:
income["Zip"] = income["Zip_Code"].dropna().astype('int')
income.drop('Zip_Code', axis=1, inplace=True)
In [83]:
stations = pd.read_csv('../data/processed/stations.csv')
In [84]:
# left join of stations and income_zips
stations_income = stations.merge(income, how='inner', on='Zip')
In [85]:
stations_income.head()
Out[85]:
In [86]:
print(type(stations_income))
In [87]:
print(len(stations_income))
In [89]:
stations_income.to_csv("../data/processed/stations-income.csv")