In [1]:
import os
import geopandas as gpd
from geopandas import GeoSeries
import pandas as pd
import pickle
from fiona import collection

In [2]:
directory = 'analysis'
if not os.path.exists(directory):
    os.makedirs(directory)

In [3]:
directory = 'analysis/use_and_income'
if not os.path.exists(directory):
    os.makedirs(directory)

In [4]:
# import merged puma pums data
df_pp = pd.read_pickle('data/puma_pums/merged.pickle')
df_pp.sort_values(by='state_puma', inplace=True)

df_pp_nogeo = df_pp.copy()
df_pp_nogeo.drop('geometry', axis=1, inplace=True)
df_pp_nogeo.head(3)


Out[4]:
state_puma inc_percap WGTP price w_use_percap
0 1-1302 17933.333333 166 0.007806 15372.461942
347 1-1302 9866.666667 59 0.007806 85.402566
346 1-1302 0.000000 0 0.007806 0.000000

In [5]:
# get weighted per capita income and water usage
df_pp_nogeo['inc_percap_wt'] = df_pp_nogeo['inc_percap'] * df_pp_nogeo['WGTP']
df_pp_nogeo['w_use_percap_wt'] = df_pp_nogeo['w_use_percap'] * df_pp_nogeo['WGTP']
df_pp_nogeo.drop(['price', 'inc_percap', 'w_use_percap'], axis=1, inplace=True)
df_pp_nogeo.head(3)


Out[5]:
state_puma WGTP inc_percap_wt w_use_percap_wt
0 1-1302 166 2.976933e+06 2.551829e+06
347 1-1302 59 5.821333e+05 5.038751e+03
346 1-1302 0 0.000000e+00 0.000000e+00

In [6]:
# group by state_puma
gp_pp_nogeo = df_pp_nogeo.groupby(by='state_puma')
gp_pp_nogeo


Out[6]:
<pandas.core.groupby.DataFrameGroupBy object at 0x111a0aa50>

In [7]:
# sum products
gp_pp_nogeo_wt = gp_pp_nogeo.sum()
gp_pp_nogeo_wt['inc_avg'] = gp_pp_nogeo_wt['inc_percap_wt'] / gp_pp_nogeo_wt['WGTP']
gp_pp_nogeo_wt['w_use_avg'] = gp_pp_nogeo_wt['w_use_percap_wt'] / gp_pp_nogeo_wt['WGTP']

gp_pp_nogeo_wt.head(3)


Out[7]:
WGTP inc_percap_wt w_use_percap_wt inc_avg w_use_avg
state_puma
1-1302 42407 1.126472e+09 1.584954e+09 26563.355732 37374.821526
1-1600 40172 1.171106e+09 2.011090e+09 29152.284966 50061.975515
1-200 67856 2.325482e+09 2.165485e+09 34270.837957 31912.943636

In [15]:
# join non geo dataframe back with geoframe col
df_pp_avg = gp_pp_nogeo_wt.merge(df_pp, left_index=True, right_on='state_puma')
df_pp_avg.reset_index(inplace=True)
df_pp_avg.drop(['index','WGTP_x', 'inc_percap_wt', 'w_use_percap_wt', 'inc_percap', 'WGTP_y', 'w_use_percap'], axis=1, inplace=True)
df_pp_avg.drop_duplicates(subset = 'state_puma', inplace=True)
df_pp_avg.head(3)


Out[15]:
inc_avg w_use_avg state_puma geometry price
0 26563.355732 37374.821526 1-1302 POLYGON ((-86.85779499999998 33.53612699999999... 0.007806
509 29152.284966 50061.975515 1-1600 POLYGON ((-87.61014299999998 33.18157999999999... 0.004520
1001 34270.837957 31912.943636 1-200 POLYGON ((-87.27848299999998 34.77764699999999... 0.005125

In [18]:
# export as shapefile
gdf_pp_avg = gpd.GeoDataFrame(df_pp_avg)
gdf_pp_avg.to_file('analysis/use_and_income/use_and_income.shp')