In [1]:
import os
import geopandas as gpd
from geopandas import GeoSeries
import pandas as pd
import pickle
from fiona import collection
In [2]:
directory = 'analysis'
if not os.path.exists(directory):
os.makedirs(directory)
In [3]:
directory = 'analysis/use_and_income'
if not os.path.exists(directory):
os.makedirs(directory)
In [4]:
# import merged puma pums data
df_pp = pd.read_pickle('data/puma_pums/merged.pickle')
df_pp.sort_values(by='state_puma', inplace=True)
df_pp_nogeo = df_pp.copy()
df_pp_nogeo.drop('geometry', axis=1, inplace=True)
df_pp_nogeo.head(3)
Out[4]:
In [5]:
# get weighted per capita income and water usage
df_pp_nogeo['inc_percap_wt'] = df_pp_nogeo['inc_percap'] * df_pp_nogeo['WGTP']
df_pp_nogeo['w_use_percap_wt'] = df_pp_nogeo['w_use_percap'] * df_pp_nogeo['WGTP']
df_pp_nogeo.drop(['price', 'inc_percap', 'w_use_percap'], axis=1, inplace=True)
df_pp_nogeo.head(3)
Out[5]:
In [6]:
# group by state_puma
gp_pp_nogeo = df_pp_nogeo.groupby(by='state_puma')
gp_pp_nogeo
Out[6]:
In [7]:
# sum products
gp_pp_nogeo_wt = gp_pp_nogeo.sum()
gp_pp_nogeo_wt['inc_avg'] = gp_pp_nogeo_wt['inc_percap_wt'] / gp_pp_nogeo_wt['WGTP']
gp_pp_nogeo_wt['w_use_avg'] = gp_pp_nogeo_wt['w_use_percap_wt'] / gp_pp_nogeo_wt['WGTP']
gp_pp_nogeo_wt.head(3)
Out[7]:
In [8]:
# join non geo dataframe back with geoframe col
df_pp_avg = gp_pp_nogeo_wt.merge(df_pp, left_index=True, right_on='state_puma')
df_pp_avg.reset_index(inplace=True)
df_pp_avg.drop(['index','WGTP_x', 'inc_percap_wt', 'w_use_percap_wt', 'inc_percap', 'WGTP_y', 'w_use_percap'], axis=1, inplace=True)
df_pp_avg.drop_duplicates(subset = 'state_puma', inplace=True)
df_pp_avg.head(3)
Out[8]:
In [9]:
# export as shapefile
gdf_pp_avg = gpd.GeoDataFrame(df_pp_avg)
gdf_pp_avg.to_file('analysis/use_and_income/use_and_income.shp')