In [1]:
import pandas as pd
from geopy.geocoders import Nominatim
import folium
import matplotlib.pyplot as plt
import numpy as np

In [2]:
df = pd.read_csv("data/zip_info.csv")

In [3]:
df.head()


Out[3]:
state_fips state state_abbr zipcode county city Average_Household_Income Per_capita_Income High_Income_Households
0 1 Alabama AL 36093 Elmore Zcta 36093 $97,493 $35,689 5.70%
1 1 Alabama AL 35173 Jefferson Trussville $96,314 $35,053 6.70%
2 1 Alabama AL 35757 Madison Zcta 35757 $95,882 $35,317 5.30%
3 1 Alabama AL 36527 Baldwin Spanish fort $95,431 $37,820 9.10%
4 1 Alabama AL 35114 Shelby Maylene $95,145 $32,961 5.50%

In [4]:
# Convert columns into floats
df['Average_Household_Income'] = df['Average_Household_Income'].replace('[\$,]', '', regex=True).astype(float)
df['Per_capita_Income'] = df['Per_capita_Income'].replace('[\$,]', '', regex=True).astype(float)
df['High_Income_Households'] = df['High_Income_Households'].replace('[\%,]', '', regex=True).astype(float)

In [5]:
state_income = df.groupby(['state_abbr'], axis=0).mean().reset_index()
state_income.drop(['state_fips','zipcode'],inplace=True,axis=1)

In [6]:
state_income.head()


Out[6]:
state_abbr Average_Household_Income Per_capita_Income High_Income_Households
0 AK 61664.676724 23540.732759 2.426724
1 AL 53461.360976 21633.505691 1.937724
2 AR 48642.797927 20237.065630 1.497582
3 AZ 57926.386431 23647.486726 2.643658
4 CA 80201.422101 30944.696860 6.808756

In [7]:
stateslis = ['AL','AK','AZ','AR','CA','CO','CT','DE','FL','GA','HI','ID','IL',
             'IN','IA','KS','KY','LA','ME','MD','MA','MI','MN','MS','MO','MT', 
             'NE','NV','NH','NJ','NM','NY','NC','ND','OH','OK','OR','PA','RI',
             'SC','SD','TN','TX','UT','VT','VA','WA','WV','WI','WY']

In [8]:
# States that are not in our dataframe will be filled with a None value
states_not_in_df = pd.DataFrame([i for i in stateslis if i not in list(state_income.state_abbr)])
states_not_in_df.columns = ['state_abbr']
states_not_in_df['Average_Household_Income'] = None
states_not_in_df['Per_capita_Income'] = None
states_not_in_df['High_Income_Households'] = None

In [9]:
state_income = state_income.append(states_not_in_df, ignore_index=True)

In [10]:
state_income.sort(['High_Income_Households'], ascending=False).head()


//anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:1: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
  if __name__ == '__main__':
Out[10]:
state_abbr Average_Household_Income Per_capita_Income High_Income_Households
27 NJ 177262.000000 90205.000000 28.000000
6 DC 108644.782609 53724.043478 14.282609
18 MD 98351.824444 37717.428889 9.681778
4 CA 80201.422101 30944.696860 6.808756
30 NY 80380.302109 31981.285980 5.960360

In [11]:
map = folium.Map(location=[48, -102], zoom_start=3)
map.choropleth(geo_path='us-states.json', data=state_income,
                columns=['state_abbr', 'High_Income_Households'],
                key_on='feature.id', 
                threshold_scale=[1, 2, 3, 4, 10],
                fill_color='Spectral', fill_opacity=0.7, line_opacity=0.2,
                legend_name='High Income Households (%)',reset=True )

map


Out[11]: