In [1]:
import os
import geopandas as gpd
from geopandas import GeoSeries
import pandas as pd
import pickle
from fiona import collection

In [2]:
directory = 'analysis'
if not os.path.exists(directory):
    os.makedirs(directory)

In [3]:
directory = 'analysis/use_and_income'
if not os.path.exists(directory):
    os.makedirs(directory)

In [4]:
# import merged puma pums data
df_pp = pd.read_pickle('data/puma_pums/merged.pickle')
df_pp.sort_values(by='state_puma', inplace=True)

df_pp_nogeo = df_pp.copy()
df_pp_nogeo.drop('geometry', axis=1, inplace=True)
df_pp_nogeo.head(3)


Out[4]:
state_puma inc_percap WGTP price w_use_percap
0 1-1302 8850.0 121 0.007806 3843.115486
209 1-1302 36000.0 169 0.007806 96077.887141
208 1-1302 800.0 229 0.007806 23058.692914

In [5]:
# get weighted per capita income and water usage
df_pp_nogeo['inc_percap_wt'] = df_pp_nogeo['inc_percap'] * df_pp_nogeo['WGTP']
df_pp_nogeo['w_use_percap_wt'] = df_pp_nogeo['w_use_percap'] * df_pp_nogeo['WGTP']
df_pp_nogeo.drop(['price', 'inc_percap', 'w_use_percap'], axis=1, inplace=True)
df_pp_nogeo.head(3)


Out[5]:
state_puma WGTP inc_percap_wt w_use_percap_wt
0 1-1302 121 1070850.0 4.650170e+05
209 1-1302 169 6084000.0 1.623716e+07
208 1-1302 229 183200.0 5.280441e+06

In [6]:
# group by state_puma
gp_pp_nogeo = df_pp_nogeo.groupby(by='state_puma')
gp_pp_nogeo


Out[6]:
<pandas.core.groupby.DataFrameGroupBy object at 0x1118f7fd0>

In [7]:
# sum products
gp_pp_nogeo_wt = gp_pp_nogeo.sum()
gp_pp_nogeo_wt['inc_avg'] = gp_pp_nogeo_wt['inc_percap_wt'] / gp_pp_nogeo_wt['WGTP']
gp_pp_nogeo_wt['w_use_avg'] = gp_pp_nogeo_wt['w_use_percap_wt'] / gp_pp_nogeo_wt['WGTP']

gp_pp_nogeo_wt.head(3)


Out[7]:
WGTP inc_percap_wt w_use_percap_wt inc_avg w_use_avg
state_puma
1-1302 29541 8.372530e+08 1.583425e+09 28342.068364 53600.912649
1-1600 35144 1.076107e+09 2.010141e+09 30619.928816 57197.264959
1-200 58542 2.131697e+09 2.164087e+09 36413.117761 36966.402513

In [8]:
# join non geo dataframe back with geoframe col
df_pp_avg = gp_pp_nogeo_wt.merge(df_pp, left_index=True, right_on='state_puma')
df_pp_avg.reset_index(inplace=True)
df_pp_avg.drop(['index','WGTP_x', 'inc_percap_wt', 'w_use_percap_wt', 'inc_percap', 'WGTP_y', 'w_use_percap'], axis=1, inplace=True)
df_pp_avg.drop_duplicates(subset = 'state_puma', inplace=True)
df_pp_avg.head(3)


Out[8]:
inc_avg w_use_avg state_puma geometry price
0 28342.068364 53600.912649 1-1302 POLYGON ((-86.85779499999998 33.53612699999999... 0.007806
308 30619.928816 57197.264959 1-1600 POLYGON ((-87.61014299999998 33.18157999999999... 0.004520
601 36413.117761 36966.402513 1-200 POLYGON ((-87.27848299999998 34.77764699999999... 0.005125

In [9]:
# export as shapefile
gdf_pp_avg = gpd.GeoDataFrame(df_pp_avg)
gdf_pp_avg.to_file('analysis/use_and_income/use_and_income.shp')