In [66]:
import os
import geopandas as gpd
import pandas as pd
import pickle

In [67]:
directory = 'data'
if not os.path.exists(directory):
    os.makedirs(directory)

In [68]:
directory = 'data/puma_pums'
if not os.path.exists(directory):
    os.makedirs(directory)

In [69]:
# import pums-lite
df_pums_lite = pd.read_pickle('data/pums/pums_lite.pickle')
df_pums_lite.sort_values(by='state_puma', inplace=True)
df_pums_lite.head(3)


Out[69]:
state_puma inc_percap w_percap WGTP
SERIALNO
976988 1-100 13936.666667 283.333333 14
970765 1-100 27866.666667 120.000000 68
986158 1-100 11833.333333 200.000000 71

In [70]:
# load puma-pricing into geodataframe
df_pp = gpd.GeoDataFrame.from_file('data/puma-pricing')
df_pp


Out[70]:
bill cartodb_id geometry owner pumace10 state statefp10 system
0 307.50 1.0 POLYGON ((-87.27848299999998 34.77764699999999... Public 00200 AL 01 MADISON COUNTY
1 285.23 2.0 POLYGON ((-111.719775 34.16405299999999, -111.... Private 00800 AZ 04 PINAL VALLEY
2 170.47 3.0 POLYGON ((-93.93488399999997 34.37469199999999... Public 01600 AR 05 HOT SPRINGS UTILITIES
3 716.18 4.0 POLYGON ((-121.977604 36.58150699999999, -121.... Private 05301 CA 06 MONTEREY
4 429.16 5.0 POLYGON ((-121.977604 36.58150699999999, -121.... Public 05301 CA 06 MARIN MUNICIPAL
5 285.00 6.0 POLYGON ((-107.465957 39.32211999999999, -107.... Public 00400 CO 08 ASPEN
6 347.76 7.0 POLYGON ((-106.195372 40.98993799999999, -106.... Public 00103 CO 08 FT COLLINS
7 206.16 8.0 POLYGON ((-82.05528799999998 27.88458999999999... Public 10504 FL 12 BOCA RATON WTP
8 239.88 9.0 POLYGON ((-82.05528799999998 27.88458999999999... Public 10504 FL 12 BOYNTON BEACH WTP
9 199.80 10.0 POLYGON ((-82.05528799999998 27.88458999999999... Public 10504 FL 12 ORANGE PARK GRID
10 389.04 11.0 POLYGON ((-82.05528799999998 27.88458999999999... Public 10504 FL 12 COLLIER COUNTY REGIONAL WTP
11 267.00 12.0 POLYGON ((-82.05528799999998 27.88458999999999... Public 10504 FL 12 EMERALD COAST UTILITIES AUTHORITY
12 276.54 13.0 POLYGON ((-82.05528799999998 27.88458999999999... Public 10504 FL 12 GRU - MURPHREE WTP
13 146.52 14.0 POLYGON ((-82.05528799999998 27.88458999999999... Public 10504 FL 12 HERNANDO CO UTL-WEST
14 365.04 15.0 POLYGON ((-82.05528799999998 27.88458999999999... Public 10504 FL 12 SOUTH CENTRAL SERVICE AREA
15 245.88 16.0 POLYGON ((-82.05528799999998 27.88458999999999... Public 10504 FL 12 INDIAN RIVER COUNTY UTILITIES (2 WTPS)
16 229.20 17.0 POLYGON ((-82.05528799999998 27.88458999999999... Public 10504 FL 12 JEA MAJOR GRID
17 220.68 18.0 POLYGON ((-82.05528799999998 27.88458999999999... Public 10504 FL 12 MANATEE COUNTY UTILITIES DEPT
18 116.46 19.0 POLYGON ((-82.05528799999998 27.88458999999999... Public 10504 FL 12 MDWASA - MAIN SYSTEM
19 372.37 20.0 POLYGON ((-82.05528799999998 27.88458999999999... Public 10504 FL 12 MIRAMAR (EAST ; WEST) PLANTS
20 482.40 21.0 POLYGON ((-82.05528799999998 27.88458999999999... Public 10504 FL 12 OCUD/EASTERN (1 WP)
21 282.96 22.0 POLYGON ((-82.05528799999998 27.88458999999999... Public 10504 FL 12 OKALOOSA CO.WTR.; SWR.SYSTEM
22 150.00 23.0 POLYGON ((-82.05528799999998 27.88458999999999... Public 10504 FL 12 OCUD/WESTERN REGIONAL WTR SYS (4 WPS)
23 138.67 24.0 POLYGON ((-82.05528799999998 27.88458999999999... Public 10504 FL 12 ORLANDO UTILITIES COMMISSION (7 WPS)
24 255.96 25.0 POLYGON ((-82.05528799999998 27.88458999999999... Public 10504 FL 12 PCUD-PASCO COUNTY REGIONAL
25 363.00 26.0 POLYGON ((-82.05528799999998 27.88458999999999... Public 10504 FL 12 PINELLAS COUNTY UTILITIES
26 359.44 27.0 POLYGON ((-82.05528799999998 27.88458999999999... Public 10504 FL 12 RCID CENTRAL (4 WPS)
27 346.92 28.0 POLYGON ((-82.05528799999998 27.88458999999999... Public 10504 FL 12 SARASOTA CO SPECIAL UTIL DIST
28 274.56 29.0 POLYGON ((-82.05528799999998 27.88458999999999... Public 10504 FL 12 SEACOAST UTILITIES AUTHORITY
29 275.64 30.0 POLYGON ((-86.32767699999998 38.18273599999999... Public 03500 KY 18 LOUISVILLE WATER COMPANY
... ... ... ... ... ... ... ... ...
458 271.68 459.0 POLYGON ((-96.77037099999998 33.05601199999999... Public 01903 TX 48 PLANO
459 231.84 460.0 POLYGON ((-95.45750799999998 32.32925399999999... Public 01502 TX 48 TYLER
460 202.08 461.0 POLYGON ((-111.835446 40.39215899999999, -111.... Public 49002 UT 49 OREM CITY
461 244.80 462.0 POLYGON ((-111.93584 40.48790799999999, -111.9... Public 35009 UT 49 SANDY CITY
462 196.30 463.0 POLYGON ((-111.923868 40.81086799999999, -111.... Public 35002 UT 49 SALT LAKE CITY
463 432.51 464.0 POLYGON ((-122.270304 47.49564799999998, -122.... Public 11610 WA 53 RENTON
464 339.72 465.0 POLYGON ((-112.012394 40.54376999999998, -112.... Public 35007 UT 49 WEST JORDAN CITY
465 175.08 466.0 POLYGON ((-111.741042 40.23722099999999, -111.... Public 49003 UT 49 PROVO CITY
466 246.00 467.0 POLYGON ((-77.13757599999997 38.86620099999999... Public 01302 VA 51 ARLINGTON COUNTY
467 305.20 468.0 POLYGON ((-77.54264099999999 37.68613299999999... Public 51224 VA 51 HENRICO COUNTY
468 261.93 469.0 POLYGON ((-77.73233299999998 37.50623899999999... Public 04102 VA 51 CHESTERFIELD CO CENTRAL
469 248.52 470.0 POLYGON ((-77.96095499999997 39.01495599999999... Public 10703 VA 51 LOUDOUN
470 368.93 471.0 POLYGON ((-76.41356599999999 36.87414699999999... Public 51155 VA 51 NORFOLK
471 414.36 472.0 POLYGON ((-76.41356599999999 36.87414699999999... Public 51155 VA 51 PORTSMOUTH
472 317.52 473.0 POLYGON ((-76.15378699999998 36.77151499999999... Public 51167 VA 51 VIRGINIA BEACH
473 262.80 474.0 POLYGON ((-77.38268999999998 38.90514999999999... Public 59305 VA 51 FAIRFAX COUNTY
474 297.48 475.0 POLYGON ((-77.14370899999999 38.81182799999999... Private 51255 VA 51 ALEXANDRIA
475 419.71 476.0 POLYGON ((-77.60101099999997 37.54484699999999... Public 51235 VA 51 RICHMOND
476 433.66 477.0 POLYGON ((-76.62775599999996 37.12586199999999... Public 51175 VA 51 NEWPORT NEWS
477 218.97 478.0 POLYGON ((-122.772511 45.69963699999999, -122.... Public 11101 WA 53 VANCOUVER
478 568.69 479.0 POLYGON ((-122.263535 47.62431999999998, -122.... Public 11608 WA 53 BELLEVUE
479 422.83 480.0 POLYGON ((-122.259891 47.77845599999999, -122.... Public 11704 WA 53 NORTHSHORE
480 356.71 481.0 POLYGON ((-122.269937 47.73411999999998, -122.... Public 11607 WA 53 REDMOND
481 362.88 482.0 POLYGON ((-117.566158 47.59465499999998, -117.... Public 10502 WA 53 SPOKANE
482 303.93 483.0 POLYGON ((-122.259748676706 47.95840102157888,... Public 11703 WA 53 EVERETT
483 214.40 484.0 POLYGON ((-88.30589099999997 42.61081699999999... Public 10000 WI 55 kenosha
484 259.89 485.0 POLYGON ((-87.99428799999997 43.19254999999999... Public 40101 WI 55 MILWAUKEE
485 246.84 486.0 POLYGON ((-88.30757199999998 42.77810199999999... Public 30000 WI 55 RACINE
486 240.60 487.0 POLYGON ((-89.47349999999999 43.05362099999999... Public 00101 WI 55 MADISON
487 270.49 488.0 POLYGON ((-88.15010299999997 44.51398999999999... Public 00200 WI 55 green bay

488 rows × 8 columns


In [71]:
# strip out leading zeros with lstrip and create new column which is concatenation of state and puma cols
df_pp['pumace10'] = [c.lstrip('0') for c in df_pp['pumace10']]
df_pp['statefp10'] = [c.lstrip('0') for c in df_pp['statefp10']]
df_pp['state_puma'] = df_pp['statefp10'] + '-' + df_pp['pumace10']

df_pp


Out[71]:
bill cartodb_id geometry owner pumace10 state statefp10 system state_puma
0 307.50 1.0 POLYGON ((-87.27848299999998 34.77764699999999... Public 200 AL 1 MADISON COUNTY 1-200
1 285.23 2.0 POLYGON ((-111.719775 34.16405299999999, -111.... Private 800 AZ 4 PINAL VALLEY 4-800
2 170.47 3.0 POLYGON ((-93.93488399999997 34.37469199999999... Public 1600 AR 5 HOT SPRINGS UTILITIES 5-1600
3 716.18 4.0 POLYGON ((-121.977604 36.58150699999999, -121.... Private 5301 CA 6 MONTEREY 6-5301
4 429.16 5.0 POLYGON ((-121.977604 36.58150699999999, -121.... Public 5301 CA 6 MARIN MUNICIPAL 6-5301
5 285.00 6.0 POLYGON ((-107.465957 39.32211999999999, -107.... Public 400 CO 8 ASPEN 8-400
6 347.76 7.0 POLYGON ((-106.195372 40.98993799999999, -106.... Public 103 CO 8 FT COLLINS 8-103
7 206.16 8.0 POLYGON ((-82.05528799999998 27.88458999999999... Public 10504 FL 12 BOCA RATON WTP 12-10504
8 239.88 9.0 POLYGON ((-82.05528799999998 27.88458999999999... Public 10504 FL 12 BOYNTON BEACH WTP 12-10504
9 199.80 10.0 POLYGON ((-82.05528799999998 27.88458999999999... Public 10504 FL 12 ORANGE PARK GRID 12-10504
10 389.04 11.0 POLYGON ((-82.05528799999998 27.88458999999999... Public 10504 FL 12 COLLIER COUNTY REGIONAL WTP 12-10504
11 267.00 12.0 POLYGON ((-82.05528799999998 27.88458999999999... Public 10504 FL 12 EMERALD COAST UTILITIES AUTHORITY 12-10504
12 276.54 13.0 POLYGON ((-82.05528799999998 27.88458999999999... Public 10504 FL 12 GRU - MURPHREE WTP 12-10504
13 146.52 14.0 POLYGON ((-82.05528799999998 27.88458999999999... Public 10504 FL 12 HERNANDO CO UTL-WEST 12-10504
14 365.04 15.0 POLYGON ((-82.05528799999998 27.88458999999999... Public 10504 FL 12 SOUTH CENTRAL SERVICE AREA 12-10504
15 245.88 16.0 POLYGON ((-82.05528799999998 27.88458999999999... Public 10504 FL 12 INDIAN RIVER COUNTY UTILITIES (2 WTPS) 12-10504
16 229.20 17.0 POLYGON ((-82.05528799999998 27.88458999999999... Public 10504 FL 12 JEA MAJOR GRID 12-10504
17 220.68 18.0 POLYGON ((-82.05528799999998 27.88458999999999... Public 10504 FL 12 MANATEE COUNTY UTILITIES DEPT 12-10504
18 116.46 19.0 POLYGON ((-82.05528799999998 27.88458999999999... Public 10504 FL 12 MDWASA - MAIN SYSTEM 12-10504
19 372.37 20.0 POLYGON ((-82.05528799999998 27.88458999999999... Public 10504 FL 12 MIRAMAR (EAST ; WEST) PLANTS 12-10504
20 482.40 21.0 POLYGON ((-82.05528799999998 27.88458999999999... Public 10504 FL 12 OCUD/EASTERN (1 WP) 12-10504
21 282.96 22.0 POLYGON ((-82.05528799999998 27.88458999999999... Public 10504 FL 12 OKALOOSA CO.WTR.; SWR.SYSTEM 12-10504
22 150.00 23.0 POLYGON ((-82.05528799999998 27.88458999999999... Public 10504 FL 12 OCUD/WESTERN REGIONAL WTR SYS (4 WPS) 12-10504
23 138.67 24.0 POLYGON ((-82.05528799999998 27.88458999999999... Public 10504 FL 12 ORLANDO UTILITIES COMMISSION (7 WPS) 12-10504
24 255.96 25.0 POLYGON ((-82.05528799999998 27.88458999999999... Public 10504 FL 12 PCUD-PASCO COUNTY REGIONAL 12-10504
25 363.00 26.0 POLYGON ((-82.05528799999998 27.88458999999999... Public 10504 FL 12 PINELLAS COUNTY UTILITIES 12-10504
26 359.44 27.0 POLYGON ((-82.05528799999998 27.88458999999999... Public 10504 FL 12 RCID CENTRAL (4 WPS) 12-10504
27 346.92 28.0 POLYGON ((-82.05528799999998 27.88458999999999... Public 10504 FL 12 SARASOTA CO SPECIAL UTIL DIST 12-10504
28 274.56 29.0 POLYGON ((-82.05528799999998 27.88458999999999... Public 10504 FL 12 SEACOAST UTILITIES AUTHORITY 12-10504
29 275.64 30.0 POLYGON ((-86.32767699999998 38.18273599999999... Public 3500 KY 18 LOUISVILLE WATER COMPANY 18-3500
... ... ... ... ... ... ... ... ... ...
458 271.68 459.0 POLYGON ((-96.77037099999998 33.05601199999999... Public 1903 TX 48 PLANO 48-1903
459 231.84 460.0 POLYGON ((-95.45750799999998 32.32925399999999... Public 1502 TX 48 TYLER 48-1502
460 202.08 461.0 POLYGON ((-111.835446 40.39215899999999, -111.... Public 49002 UT 49 OREM CITY 49-49002
461 244.80 462.0 POLYGON ((-111.93584 40.48790799999999, -111.9... Public 35009 UT 49 SANDY CITY 49-35009
462 196.30 463.0 POLYGON ((-111.923868 40.81086799999999, -111.... Public 35002 UT 49 SALT LAKE CITY 49-35002
463 432.51 464.0 POLYGON ((-122.270304 47.49564799999998, -122.... Public 11610 WA 53 RENTON 53-11610
464 339.72 465.0 POLYGON ((-112.012394 40.54376999999998, -112.... Public 35007 UT 49 WEST JORDAN CITY 49-35007
465 175.08 466.0 POLYGON ((-111.741042 40.23722099999999, -111.... Public 49003 UT 49 PROVO CITY 49-49003
466 246.00 467.0 POLYGON ((-77.13757599999997 38.86620099999999... Public 1302 VA 51 ARLINGTON COUNTY 51-1302
467 305.20 468.0 POLYGON ((-77.54264099999999 37.68613299999999... Public 51224 VA 51 HENRICO COUNTY 51-51224
468 261.93 469.0 POLYGON ((-77.73233299999998 37.50623899999999... Public 4102 VA 51 CHESTERFIELD CO CENTRAL 51-4102
469 248.52 470.0 POLYGON ((-77.96095499999997 39.01495599999999... Public 10703 VA 51 LOUDOUN 51-10703
470 368.93 471.0 POLYGON ((-76.41356599999999 36.87414699999999... Public 51155 VA 51 NORFOLK 51-51155
471 414.36 472.0 POLYGON ((-76.41356599999999 36.87414699999999... Public 51155 VA 51 PORTSMOUTH 51-51155
472 317.52 473.0 POLYGON ((-76.15378699999998 36.77151499999999... Public 51167 VA 51 VIRGINIA BEACH 51-51167
473 262.80 474.0 POLYGON ((-77.38268999999998 38.90514999999999... Public 59305 VA 51 FAIRFAX COUNTY 51-59305
474 297.48 475.0 POLYGON ((-77.14370899999999 38.81182799999999... Private 51255 VA 51 ALEXANDRIA 51-51255
475 419.71 476.0 POLYGON ((-77.60101099999997 37.54484699999999... Public 51235 VA 51 RICHMOND 51-51235
476 433.66 477.0 POLYGON ((-76.62775599999996 37.12586199999999... Public 51175 VA 51 NEWPORT NEWS 51-51175
477 218.97 478.0 POLYGON ((-122.772511 45.69963699999999, -122.... Public 11101 WA 53 VANCOUVER 53-11101
478 568.69 479.0 POLYGON ((-122.263535 47.62431999999998, -122.... Public 11608 WA 53 BELLEVUE 53-11608
479 422.83 480.0 POLYGON ((-122.259891 47.77845599999999, -122.... Public 11704 WA 53 NORTHSHORE 53-11704
480 356.71 481.0 POLYGON ((-122.269937 47.73411999999998, -122.... Public 11607 WA 53 REDMOND 53-11607
481 362.88 482.0 POLYGON ((-117.566158 47.59465499999998, -117.... Public 10502 WA 53 SPOKANE 53-10502
482 303.93 483.0 POLYGON ((-122.259748676706 47.95840102157888,... Public 11703 WA 53 EVERETT 53-11703
483 214.40 484.0 POLYGON ((-88.30589099999997 42.61081699999999... Public 10000 WI 55 kenosha 55-10000
484 259.89 485.0 POLYGON ((-87.99428799999997 43.19254999999999... Public 40101 WI 55 MILWAUKEE 55-40101
485 246.84 486.0 POLYGON ((-88.30757199999998 42.77810199999999... Public 30000 WI 55 RACINE 55-30000
486 240.60 487.0 POLYGON ((-89.47349999999999 43.05362099999999... Public 101 WI 55 MADISON 55-101
487 270.49 488.0 POLYGON ((-88.15010299999997 44.51398999999999... Public 200 WI 55 green bay 55-200

488 rows × 9 columns


In [72]:
# drop unnecessary columns
df_pp.drop(['cartodb_id', 'pumace10', 'state', 'statefp10'], inplace=True, axis=1)

# strip out unicodes
def unicode_to_string(df, cols_list):
    for col in cols_list:
        df[col] = [unicode(x).encode("utf-8") for x in df[col]]
    return df

df_pp = unicode_to_string(df_pp, ['owner', 'system', 'state_puma'])

# sort
df_pp.sort_values(by='state_puma', inplace=True)

In [73]:
df_pp


Out[73]:
bill geometry owner system state_puma
133 468.37 POLYGON ((-86.85779499999998 33.53612699999999... Public BIRMINGHAM WATER WORKS BOARD 1-1302
132 271.18 POLYGON ((-87.61014299999998 33.18157999999999... Public TUSCALOOSA 1-1600
0 307.50 POLYGON ((-87.27848299999998 34.77764699999999... Public MADISON COUNTY 1-200
127 470.40 POLYGON ((-86.41971599999997 32.30373399999999... Public montgomery 1-2000
129 145.44 POLYGON ((-86.19347599999996 31.19221299999999... Public DOTHAN UTILITIES 1-2500
135 132.00 POLYGON ((-88.14843099999999 30.60459799999999... Public Mobile 1-2701
236 375.42 POLYGON ((-75.65007399999999 39.70100099999999... Public WILMINGTON 10-101
234 420.12 POLYGON ((-77.06465794452546 38.89184384525549... Public D.C. 11-105
235 305.60 POLYGON ((-77.06465794452546 38.89184384525549... Public WASHINGTON SUBURBAN SANITARY COMMISSION 11-105
222 403.92 POLYGON ((-82.84782599999998 27.91019899999999... Public CLEARWATER 12-10303
240 208.08 POLYGON ((-82.10620499999997 28.25922599999999... Public LAKELAND 12-10501
21 282.96 POLYGON ((-82.05528799999998 27.88458999999999... Public OKALOOSA CO.WTR.; SWR.SYSTEM 12-10504
28 274.56 POLYGON ((-82.05528799999998 27.88458999999999... Public SEACOAST UTILITIES AUTHORITY 12-10504
27 346.92 POLYGON ((-82.05528799999998 27.88458999999999... Public SARASOTA CO SPECIAL UTIL DIST 12-10504
26 359.44 POLYGON ((-82.05528799999998 27.88458999999999... Public RCID CENTRAL (4 WPS) 12-10504
25 363.00 POLYGON ((-82.05528799999998 27.88458999999999... Public PINELLAS COUNTY UTILITIES 12-10504
24 255.96 POLYGON ((-82.05528799999998 27.88458999999999... Public PCUD-PASCO COUNTY REGIONAL 12-10504
23 138.67 POLYGON ((-82.05528799999998 27.88458999999999... Public ORLANDO UTILITIES COMMISSION (7 WPS) 12-10504
20 482.40 POLYGON ((-82.05528799999998 27.88458999999999... Public OCUD/EASTERN (1 WP) 12-10504
22 150.00 POLYGON ((-82.05528799999998 27.88458999999999... Public OCUD/WESTERN REGIONAL WTR SYS (4 WPS) 12-10504
18 116.46 POLYGON ((-82.05528799999998 27.88458999999999... Public MDWASA - MAIN SYSTEM 12-10504
7 206.16 POLYGON ((-82.05528799999998 27.88458999999999... Public BOCA RATON WTP 12-10504
8 239.88 POLYGON ((-82.05528799999998 27.88458999999999... Public BOYNTON BEACH WTP 12-10504
19 372.37 POLYGON ((-82.05528799999998 27.88458999999999... Public MIRAMAR (EAST ; WEST) PLANTS 12-10504
10 389.04 POLYGON ((-82.05528799999998 27.88458999999999... Public COLLIER COUNTY REGIONAL WTP 12-10504
11 267.00 POLYGON ((-82.05528799999998 27.88458999999999... Public EMERALD COAST UTILITIES AUTHORITY 12-10504
12 276.54 POLYGON ((-82.05528799999998 27.88458999999999... Public GRU - MURPHREE WTP 12-10504
9 199.80 POLYGON ((-82.05528799999998 27.88458999999999... Public ORANGE PARK GRID 12-10504
14 365.04 POLYGON ((-82.05528799999998 27.88458999999999... Public SOUTH CENTRAL SERVICE AREA 12-10504
15 245.88 POLYGON ((-82.05528799999998 27.88458999999999... Public INDIAN RIVER COUNTY UTILITIES (2 WTPS) 12-10504
... ... ... ... ... ...
189 355.22 POLYGON ((-121.368712 37.96744899999999, -121.... Public STOCKTON 6-7702
51 264.87 POLYGON ((-121.185056 35.79416099999999, -121.... Public SANTA MARGARITA 6-7902
199 242.46 POLYGON ((-122.520852 37.59417799999999, -122.... Public DALY CITY 6-8101
36 549.00 POLYGON ((-122.47276 37.63393099999999, -122.4... Public SAN FRANCISCO INT L. AIRPORT 6-8102
202 557.32 POLYGON ((-122.427884 37.60494099999999, -122.... Private - SAN MATEO 6-8103
181 635.13 POLYGON ((-120.492522 34.88650299999999, -120.... Public SANTA MARIA 6-8301
56 533.89 (POLYGON ((-119.052774 33.46585999999999, -119... Public SANTA BARBARA 6-8303
213 335.13 POLYGON ((-122.065265 37.34328299999999, -122.... Public SUNNYVALE 6-8502
197 446.99 POLYGON ((-121.99629 37.37021999999999, -121.9... Public SANTA CLARA 6-8503
204 605.57 POLYGON ((-122.082415 36.96469999999999, -122.... Public SANTA CRUZ 6-8702
193 526.80 POLYGON ((-122.406786 38.15563199999999, -122.... Public VALLEJO 6-9501
168 437.46 POLYGON ((-122.216482 38.26601499999999, -122.... Public FAIRFIELD 6-9502
173 231.75 POLYGON ((-122.126628 38.43545099999999, -122.... Public VACAVILLE 6-9503
182 441.84 POLYGON ((-122.845592 38.48359399999999, -122.... Public SANTA ROSA 6-9703
194 292.65 POLYGON ((-121.007987 37.64688199999999, -121.... Public MODESTO 6-9904
6 347.76 POLYGON ((-106.195372 40.98993799999999, -106.... Public FT COLLINS 8-103
229 376.80 POLYGON ((-104.980919 40.34916699999999, -104.... Public GREELEY 8-300
5 285.00 POLYGON ((-107.465957 39.32211999999999, -107.... Public ASPEN 8-400
226 469.73 POLYGON ((-104.822408 38.82167599999998, -104.... Public COLORADO SPRINGS UTILITIES 8-4105
224 277.20 POLYGON ((-105.327136 39.98149799999999, -105.... Public BOULDER 8-803
228 292.20 POLYGON ((-105.241114 39.90640599999999, -105.... Public BROOMFIELD CITY AND COUNTY OF 8-804
227 270.24 POLYGON ((-105.090367 39.91428199999999, -105.... Public WESTMINSTER 8-805
230 242.04 POLYGON ((-105.02502 39.88521399999999, -105.0... Public THORNTON 8-806
64 460.92 POLYGON ((-104.886613 39.72921899999999, -104.... Public AURORA 8-810
225 245.88 POLYGON ((-105.064541 39.79107999999999, -105.... Public DENVER WATER BOARD 8-813
223 246.78 POLYGON ((-105.240265 39.86590899999999, -105.... Public ARVADA 8-817
231 424.26 (POLYGON ((-73.61336899999998 40.9888829999999... Private STAMFORD 9-102
232 494.28 POLYGON ((-73.51678499999998 41.68758099999999... Private AQUARION WATER CO OF CT-MAIN SYSTEM 9-500
233 313.56 POLYGON ((-73.51678499999998 41.68758099999999... Public METROPOLITAN DISTRICT COMMISSION 9-500
65 205.28 POLYGON ((-73.09235299999997 41.57049599999999... Public WATERBURY 9-901

488 rows × 5 columns


In [74]:
# merge pums_lite and puma
df_merge = pd.merge(df_pums_lite, df_pp, on='state_puma')
df_merge.head(3)


Out[74]:
state_puma inc_percap w_percap WGTP bill geometry owner system
0 1-1302 8850.000000 30.0 121 468.37 POLYGON ((-86.85779499999998 33.53612699999999... Public BIRMINGHAM WATER WORKS BOARD
1 1-1302 9100.000000 700.0 62 468.37 POLYGON ((-86.85779499999998 33.53612699999999... Public BIRMINGHAM WATER WORKS BOARD
2 1-1302 31933.333333 300.0 59 468.37 POLYGON ((-86.85779499999998 33.53612699999999... Public BIRMINGHAM WATER WORKS BOARD

In [75]:
# pums_lite+puma

# get water prices: bill is $/year assuming consumption of 60,000 gallons, so just divide by 60k
df_merge['price'] = df_merge['bill'] / 60000

# drop unnecessary columns
df_merge.drop('bill', inplace=True, axis=1)

df_merge.head(3)


Out[75]:
state_puma inc_percap w_percap WGTP geometry owner system price
0 1-1302 8850.000000 30.0 121 POLYGON ((-86.85779499999998 33.53612699999999... Public BIRMINGHAM WATER WORKS BOARD 0.007806
1 1-1302 9100.000000 700.0 62 POLYGON ((-86.85779499999998 33.53612699999999... Public BIRMINGHAM WATER WORKS BOARD 0.007806
2 1-1302 31933.333333 300.0 59 POLYGON ((-86.85779499999998 33.53612699999999... Public BIRMINGHAM WATER WORKS BOARD 0.007806

In [76]:
# pums_lite+puma
# compute consumption
df_merge['w_use_percap'] = df_merge['w_percap'] / df_merge['price']
df_merge.head(3)


Out[76]:
state_puma inc_percap w_percap WGTP geometry owner system price w_use_percap
0 1-1302 8850.000000 30.0 121 POLYGON ((-86.85779499999998 33.53612699999999... Public BIRMINGHAM WATER WORKS BOARD 0.007806 3843.115486
1 1-1302 9100.000000 700.0 62 POLYGON ((-86.85779499999998 33.53612699999999... Public BIRMINGHAM WATER WORKS BOARD 0.007806 89672.694664
2 1-1302 31933.333333 300.0 59 POLYGON ((-86.85779499999998 33.53612699999999... Public BIRMINGHAM WATER WORKS BOARD 0.007806 38431.154856

In [77]:
# pums_lite+puma
# drop unnecessary cols
df_merge.drop(['w_percap', 'owner', 'system'], inplace=True, axis=1)
df_merge.head(3)


Out[77]:
state_puma inc_percap WGTP geometry price w_use_percap
0 1-1302 8850.000000 121 POLYGON ((-86.85779499999998 33.53612699999999... 0.007806 3843.115486
1 1-1302 9100.000000 62 POLYGON ((-86.85779499999998 33.53612699999999... 0.007806 89672.694664
2 1-1302 31933.333333 59 POLYGON ((-86.85779499999998 33.53612699999999... 0.007806 38431.154856

In [78]:
# pums_lite+puma
with open('data/puma_pums/merged.pickle', 'wb') as f:
    pickle.dump(df_merge, f)

In [79]:
# import pums
df_pums = pd.read_pickle('data/pums/pums.pickle')
df_pums.head(3)


Out[79]:
DIVISION PUMA REGION ST WGTP ADJINC ADJHSG HINCP FINCP NP ... na o w y_raw st puma y_wt state_puma inc_percap w_percap
SERIALNO
3 7 3602 3 48 90 1008425 1000000 52000.0 52000.0 3 ... 0.0 0.0 1.0 0.000769 48 3602 0.069231 48-3602 17333.333333 13.333333
5 7 6603 3 48 112 1008425 1000000 137200.0 137200.0 2 ... 0.0 0.0 1.0 0.000583 48 6603 0.065306 48-6603 68600.000000 40.000000
7 6 2400 3 1 52 1008425 1000000 45000.0 45000.0 6 ... 0.0 0.0 1.0 0.026667 1 2400 1.386667 1-2400 7500.000000 200.000000

3 rows × 59 columns


In [80]:
# merge pums and puma
df_merge2 = pd.merge(df_pums, df_pp, on='state_puma')
df_merge2.head(3)


Out[80]:
DIVISION PUMA REGION ST WGTP ADJINC ADJHSG HINCP FINCP NP ... st puma y_wt state_puma inc_percap w_percap bill geometry owner system
0 7 3602 3 48 90 1008425 1000000 52000.0 52000.0 3 ... 48 3602 0.069231 48-3602 17333.333333 13.333333 257.88 POLYGON ((-96.60183099999998 30.64192599999999... Public COLLEGE STATION
1 7 3602 3 48 290 1008425 1000000 41100.0 0.0 3 ... 48 3602 6.350365 48-3602 13700.000000 300.000000 257.88 POLYGON ((-96.60183099999998 30.64192599999999... Public COLLEGE STATION
2 7 3602 3 48 124 1008425 1000000 61000.0 61000.0 4 ... 48 3602 0.609836 48-3602 15250.000000 75.000000 257.88 POLYGON ((-96.60183099999998 30.64192599999999... Public COLLEGE STATION

3 rows × 63 columns


In [81]:
# pums+puma

# get water prices: bill is $/year assuming consumption of 60,000 gallons, so just divide by 60k
df_merge2['price'] = df_merge2['bill'] / 60000

# drop unnecessary columns
df_merge2.drop('bill', inplace=True, axis=1)

df_merge2.head(3)


Out[81]:
DIVISION PUMA REGION ST WGTP ADJINC ADJHSG HINCP FINCP NP ... st puma y_wt state_puma inc_percap w_percap geometry owner system price
0 7 3602 3 48 90 1008425 1000000 52000.0 52000.0 3 ... 48 3602 0.069231 48-3602 17333.333333 13.333333 POLYGON ((-96.60183099999998 30.64192599999999... Public COLLEGE STATION 0.004298
1 7 3602 3 48 290 1008425 1000000 41100.0 0.0 3 ... 48 3602 6.350365 48-3602 13700.000000 300.000000 POLYGON ((-96.60183099999998 30.64192599999999... Public COLLEGE STATION 0.004298
2 7 3602 3 48 124 1008425 1000000 61000.0 61000.0 4 ... 48 3602 0.609836 48-3602 15250.000000 75.000000 POLYGON ((-96.60183099999998 30.64192599999999... Public COLLEGE STATION 0.004298

3 rows × 63 columns


In [82]:
# pums+puma

# compute consumption
df_merge2['w_use_percap'] = df_merge2['w_percap'] / df_merge2['price']
df_merge2.head(3)


Out[82]:
DIVISION PUMA REGION ST WGTP ADJINC ADJHSG HINCP FINCP NP ... puma y_wt state_puma inc_percap w_percap geometry owner system price w_use_percap
0 7 3602 3 48 90 1008425 1000000 52000.0 52000.0 3 ... 3602 0.069231 48-3602 17333.333333 13.333333 POLYGON ((-96.60183099999998 30.64192599999999... Public COLLEGE STATION 0.004298 3102.218086
1 7 3602 3 48 290 1008425 1000000 41100.0 0.0 3 ... 3602 6.350365 48-3602 13700.000000 300.000000 POLYGON ((-96.60183099999998 30.64192599999999... Public COLLEGE STATION 0.004298 69799.906933
2 7 3602 3 48 124 1008425 1000000 61000.0 61000.0 4 ... 3602 0.609836 48-3602 15250.000000 75.000000 POLYGON ((-96.60183099999998 30.64192599999999... Public COLLEGE STATION 0.004298 17449.976733

3 rows × 64 columns


In [83]:
# pums+puma
with open('data/puma_pums/merged2.pickle', 'wb') as f:
    pickle.dump(df_merge2, f)