In [1]:
import pandas as pd
from geopy.geocoders import Nominatim
import folium
import matplotlib.pyplot as plt
import numpy as np
In [2]:
df = pd.read_csv("data/zip_info.csv")
In [3]:
df.head()
Out[3]:
In [4]:
# Convert columns into floats
df['Average_Household_Income'] = df['Average_Household_Income'].replace('[\$,]', '', regex=True).astype(float)
df['Per_capita_Income'] = df['Per_capita_Income'].replace('[\$,]', '', regex=True).astype(float)
df['High_Income_Households'] = df['High_Income_Households'].replace('[\%,]', '', regex=True).astype(float)
In [5]:
state_income = df.groupby(['state_abbr'], axis=0).mean().reset_index()
state_income.drop(['state_fips','zipcode'],inplace=True,axis=1)
In [6]:
state_income.head()
Out[6]:
In [7]:
stateslis = ['AL','AK','AZ','AR','CA','CO','CT','DE','FL','GA','HI','ID','IL',
'IN','IA','KS','KY','LA','ME','MD','MA','MI','MN','MS','MO','MT',
'NE','NV','NH','NJ','NM','NY','NC','ND','OH','OK','OR','PA','RI',
'SC','SD','TN','TX','UT','VT','VA','WA','WV','WI','WY']
In [8]:
# States that are not in our dataframe will be filled with a None value
states_not_in_df = pd.DataFrame([i for i in stateslis if i not in list(state_income.state_abbr)])
states_not_in_df.columns = ['state_abbr']
states_not_in_df['Average_Household_Income'] = None
states_not_in_df['Per_capita_Income'] = None
states_not_in_df['High_Income_Households'] = None
In [9]:
state_income = state_income.append(states_not_in_df, ignore_index=True)
In [10]:
state_income.sort(['High_Income_Households'], ascending=False).head()
Out[10]:
In [11]:
map = folium.Map(location=[48, -102], zoom_start=3)
map.choropleth(geo_path='us-states.json', data=state_income,
columns=['state_abbr', 'High_Income_Households'],
key_on='feature.id',
threshold_scale=[1, 2, 3, 4, 10],
fill_color='Spectral', fill_opacity=0.7, line_opacity=0.2,
legend_name='High Income Households (%)',reset=True )
map
Out[11]: