notebook.community

Edit and run



In [1]:

    
import pandas as pd
from geopy.geocoders import Nominatim
import folium
import matplotlib.pyplot as plt
import numpy as np



In [2]:

    
df = pd.read_csv("data/zip_info.csv")



In [3]:

    
df.head()









    Out[3]:






  
    
      
      state_fips
      state
      state_abbr
      zipcode
      county
      city
      Average_Household_Income
      Per_capita_Income
      High_Income_Households
    
  
  
    
      0
      1
      Alabama
      AL
      36093
      Elmore
      Zcta 36093
      $97,493
      $35,689
      5.70%
    
    
      1
      1
      Alabama
      AL
      35173
      Jefferson
      Trussville
      $96,314
      $35,053
      6.70%
    
    
      2
      1
      Alabama
      AL
      35757
      Madison
      Zcta 35757
      $95,882
      $35,317
      5.30%
    
    
      3
      1
      Alabama
      AL
      36527
      Baldwin
      Spanish fort
      $95,431
      $37,820
      9.10%
    
    
      4
      1
      Alabama
      AL
      35114
      Shelby
      Maylene
      $95,145
      $32,961
      5.50%



In [4]:

    
# Convert columns into floats
df['Average_Household_Income'] = df['Average_Household_Income'].replace('[\$,]', '', regex=True).astype(float)
df['Per_capita_Income'] = df['Per_capita_Income'].replace('[\$,]', '', regex=True).astype(float)
df['High_Income_Households'] = df['High_Income_Households'].replace('[\%,]', '', regex=True).astype(float)



In [5]:

    
state_income = df.groupby(['state_abbr'], axis=0).mean().reset_index()
state_income.drop(['state_fips','zipcode'],inplace=True,axis=1)



In [6]:

    
state_income.head()









    Out[6]:






  
    
      
      state_abbr
      Average_Household_Income
      Per_capita_Income
      High_Income_Households
    
  
  
    
      0
      AK
      61664.676724
      23540.732759
      2.426724
    
    
      1
      AL
      53461.360976
      21633.505691
      1.937724
    
    
      2
      AR
      48642.797927
      20237.065630
      1.497582
    
    
      3
      AZ
      57926.386431
      23647.486726
      2.643658
    
    
      4
      CA
      80201.422101
      30944.696860
      6.808756



In [7]:

    
stateslis = ['AL','AK','AZ','AR','CA','CO','CT','DE','FL','GA','HI','ID','IL',
             'IN','IA','KS','KY','LA','ME','MD','MA','MI','MN','MS','MO','MT', 
             'NE','NV','NH','NJ','NM','NY','NC','ND','OH','OK','OR','PA','RI',
             'SC','SD','TN','TX','UT','VT','VA','WA','WV','WI','WY']



In [8]:

    
# States that are not in our dataframe will be filled with a None value
states_not_in_df = pd.DataFrame([i for i in stateslis if i not in list(state_income.state_abbr)])
states_not_in_df.columns = ['state_abbr']
states_not_in_df['Average_Household_Income'] = None
states_not_in_df['Per_capita_Income'] = None
states_not_in_df['High_Income_Households'] = None



In [9]:

    
state_income = state_income.append(states_not_in_df, ignore_index=True)



In [10]:

    
state_income.sort(['High_Income_Households'], ascending=False).head()









    



//anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:1: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
  if __name__ == '__main__':






    Out[10]:






  
    
      
      state_abbr
      Average_Household_Income
      Per_capita_Income
      High_Income_Households
    
  
  
    
      27
      NJ
      177262.000000
      90205.000000
      28.000000
    
    
      6
      DC
      108644.782609
      53724.043478
      14.282609
    
    
      18
      MD
      98351.824444
      37717.428889
      9.681778
    
    
      4
      CA
      80201.422101
      30944.696860
      6.808756
    
    
      30
      NY
      80380.302109
      31981.285980
      5.960360



In [11]:

    
map = folium.Map(location=[48, -102], zoom_start=3)
map.choropleth(geo_path='us-states.json', data=state_income,
                columns=['state_abbr', 'High_Income_Households'],
                key_on='feature.id', 
                threshold_scale=[1, 2, 3, 4, 10],
                fill_color='Spectral', fill_opacity=0.7, line_opacity=0.2,
                legend_name='High Income Households (%)',reset=True )

map









    Out[11]:

	state_fips	state	state_abbr	zipcode	county	city	Average_Household_Income	Per_capita_Income	High_Income_Households
0	1	Alabama	AL	36093	Elmore	Zcta 36093	$97,493	$35,689	5.70%
1	1	Alabama	AL	35173	Jefferson	Trussville	$96,314	$35,053	6.70%
2	1	Alabama	AL	35757	Madison	Zcta 35757	$95,882	$35,317	5.30%
3	1	Alabama	AL	36527	Baldwin	Spanish fort	$95,431	$37,820	9.10%
4	1	Alabama	AL	35114	Shelby	Maylene	$95,145	$32,961	5.50%

	state_abbr	Average_Household_Income	Per_capita_Income	High_Income_Households
0	AK	61664.676724	23540.732759	2.426724
1	AL	53461.360976	21633.505691	1.937724
2	AR	48642.797927	20237.065630	1.497582
3	AZ	57926.386431	23647.486726	2.643658
4	CA	80201.422101	30944.696860	6.808756

	state_abbr	Average_Household_Income	Per_capita_Income	High_Income_Households
27	NJ	177262.000000	90205.000000	28.000000
6	DC	108644.782609	53724.043478	14.282609
18	MD	98351.824444	37717.428889	9.681778
4	CA	80201.422101	30944.696860	6.808756
30	NY	80380.302109	31981.285980	5.960360