In [66]:
import os
import geopandas as gpd
import pandas as pd
import pickle
In [67]:
directory = 'data'
if not os.path.exists(directory):
os.makedirs(directory)
In [68]:
directory = 'data/puma_pums'
if not os.path.exists(directory):
os.makedirs(directory)
In [69]:
# import pums-lite
df_pums_lite = pd.read_pickle('data/pums/pums_lite.pickle')
df_pums_lite.sort_values(by='state_puma', inplace=True)
df_pums_lite.head(3)
Out[69]:
state_puma
inc_percap
w_percap
WGTP
SERIALNO
976988
1-100
13936.666667
283.333333
14
970765
1-100
27866.666667
120.000000
68
986158
1-100
11833.333333
200.000000
71
In [70]:
# load puma-pricing into geodataframe
df_pp = gpd.GeoDataFrame.from_file('data/puma-pricing')
df_pp
Out[70]:
bill
cartodb_id
geometry
owner
pumace10
state
statefp10
system
0
307.50
1.0
POLYGON ((-87.27848299999998 34.77764699999999...
Public
00200
AL
01
MADISON COUNTY
1
285.23
2.0
POLYGON ((-111.719775 34.16405299999999, -111....
Private
00800
AZ
04
PINAL VALLEY
2
170.47
3.0
POLYGON ((-93.93488399999997 34.37469199999999...
Public
01600
AR
05
HOT SPRINGS UTILITIES
3
716.18
4.0
POLYGON ((-121.977604 36.58150699999999, -121....
Private
05301
CA
06
MONTEREY
4
429.16
5.0
POLYGON ((-121.977604 36.58150699999999, -121....
Public
05301
CA
06
MARIN MUNICIPAL
5
285.00
6.0
POLYGON ((-107.465957 39.32211999999999, -107....
Public
00400
CO
08
ASPEN
6
347.76
7.0
POLYGON ((-106.195372 40.98993799999999, -106....
Public
00103
CO
08
FT COLLINS
7
206.16
8.0
POLYGON ((-82.05528799999998 27.88458999999999...
Public
10504
FL
12
BOCA RATON WTP
8
239.88
9.0
POLYGON ((-82.05528799999998 27.88458999999999...
Public
10504
FL
12
BOYNTON BEACH WTP
9
199.80
10.0
POLYGON ((-82.05528799999998 27.88458999999999...
Public
10504
FL
12
ORANGE PARK GRID
10
389.04
11.0
POLYGON ((-82.05528799999998 27.88458999999999...
Public
10504
FL
12
COLLIER COUNTY REGIONAL WTP
11
267.00
12.0
POLYGON ((-82.05528799999998 27.88458999999999...
Public
10504
FL
12
EMERALD COAST UTILITIES AUTHORITY
12
276.54
13.0
POLYGON ((-82.05528799999998 27.88458999999999...
Public
10504
FL
12
GRU - MURPHREE WTP
13
146.52
14.0
POLYGON ((-82.05528799999998 27.88458999999999...
Public
10504
FL
12
HERNANDO CO UTL-WEST
14
365.04
15.0
POLYGON ((-82.05528799999998 27.88458999999999...
Public
10504
FL
12
SOUTH CENTRAL SERVICE AREA
15
245.88
16.0
POLYGON ((-82.05528799999998 27.88458999999999...
Public
10504
FL
12
INDIAN RIVER COUNTY UTILITIES (2 WTPS)
16
229.20
17.0
POLYGON ((-82.05528799999998 27.88458999999999...
Public
10504
FL
12
JEA MAJOR GRID
17
220.68
18.0
POLYGON ((-82.05528799999998 27.88458999999999...
Public
10504
FL
12
MANATEE COUNTY UTILITIES DEPT
18
116.46
19.0
POLYGON ((-82.05528799999998 27.88458999999999...
Public
10504
FL
12
MDWASA - MAIN SYSTEM
19
372.37
20.0
POLYGON ((-82.05528799999998 27.88458999999999...
Public
10504
FL
12
MIRAMAR (EAST ; WEST) PLANTS
20
482.40
21.0
POLYGON ((-82.05528799999998 27.88458999999999...
Public
10504
FL
12
OCUD/EASTERN (1 WP)
21
282.96
22.0
POLYGON ((-82.05528799999998 27.88458999999999...
Public
10504
FL
12
OKALOOSA CO.WTR.; SWR.SYSTEM
22
150.00
23.0
POLYGON ((-82.05528799999998 27.88458999999999...
Public
10504
FL
12
OCUD/WESTERN REGIONAL WTR SYS (4 WPS)
23
138.67
24.0
POLYGON ((-82.05528799999998 27.88458999999999...
Public
10504
FL
12
ORLANDO UTILITIES COMMISSION (7 WPS)
24
255.96
25.0
POLYGON ((-82.05528799999998 27.88458999999999...
Public
10504
FL
12
PCUD-PASCO COUNTY REGIONAL
25
363.00
26.0
POLYGON ((-82.05528799999998 27.88458999999999...
Public
10504
FL
12
PINELLAS COUNTY UTILITIES
26
359.44
27.0
POLYGON ((-82.05528799999998 27.88458999999999...
Public
10504
FL
12
RCID CENTRAL (4 WPS)
27
346.92
28.0
POLYGON ((-82.05528799999998 27.88458999999999...
Public
10504
FL
12
SARASOTA CO SPECIAL UTIL DIST
28
274.56
29.0
POLYGON ((-82.05528799999998 27.88458999999999...
Public
10504
FL
12
SEACOAST UTILITIES AUTHORITY
29
275.64
30.0
POLYGON ((-86.32767699999998 38.18273599999999...
Public
03500
KY
18
LOUISVILLE WATER COMPANY
...
...
...
...
...
...
...
...
...
458
271.68
459.0
POLYGON ((-96.77037099999998 33.05601199999999...
Public
01903
TX
48
PLANO
459
231.84
460.0
POLYGON ((-95.45750799999998 32.32925399999999...
Public
01502
TX
48
TYLER
460
202.08
461.0
POLYGON ((-111.835446 40.39215899999999, -111....
Public
49002
UT
49
OREM CITY
461
244.80
462.0
POLYGON ((-111.93584 40.48790799999999, -111.9...
Public
35009
UT
49
SANDY CITY
462
196.30
463.0
POLYGON ((-111.923868 40.81086799999999, -111....
Public
35002
UT
49
SALT LAKE CITY
463
432.51
464.0
POLYGON ((-122.270304 47.49564799999998, -122....
Public
11610
WA
53
RENTON
464
339.72
465.0
POLYGON ((-112.012394 40.54376999999998, -112....
Public
35007
UT
49
WEST JORDAN CITY
465
175.08
466.0
POLYGON ((-111.741042 40.23722099999999, -111....
Public
49003
UT
49
PROVO CITY
466
246.00
467.0
POLYGON ((-77.13757599999997 38.86620099999999...
Public
01302
VA
51
ARLINGTON COUNTY
467
305.20
468.0
POLYGON ((-77.54264099999999 37.68613299999999...
Public
51224
VA
51
HENRICO COUNTY
468
261.93
469.0
POLYGON ((-77.73233299999998 37.50623899999999...
Public
04102
VA
51
CHESTERFIELD CO CENTRAL
469
248.52
470.0
POLYGON ((-77.96095499999997 39.01495599999999...
Public
10703
VA
51
LOUDOUN
470
368.93
471.0
POLYGON ((-76.41356599999999 36.87414699999999...
Public
51155
VA
51
NORFOLK
471
414.36
472.0
POLYGON ((-76.41356599999999 36.87414699999999...
Public
51155
VA
51
PORTSMOUTH
472
317.52
473.0
POLYGON ((-76.15378699999998 36.77151499999999...
Public
51167
VA
51
VIRGINIA BEACH
473
262.80
474.0
POLYGON ((-77.38268999999998 38.90514999999999...
Public
59305
VA
51
FAIRFAX COUNTY
474
297.48
475.0
POLYGON ((-77.14370899999999 38.81182799999999...
Private
51255
VA
51
ALEXANDRIA
475
419.71
476.0
POLYGON ((-77.60101099999997 37.54484699999999...
Public
51235
VA
51
RICHMOND
476
433.66
477.0
POLYGON ((-76.62775599999996 37.12586199999999...
Public
51175
VA
51
NEWPORT NEWS
477
218.97
478.0
POLYGON ((-122.772511 45.69963699999999, -122....
Public
11101
WA
53
VANCOUVER
478
568.69
479.0
POLYGON ((-122.263535 47.62431999999998, -122....
Public
11608
WA
53
BELLEVUE
479
422.83
480.0
POLYGON ((-122.259891 47.77845599999999, -122....
Public
11704
WA
53
NORTHSHORE
480
356.71
481.0
POLYGON ((-122.269937 47.73411999999998, -122....
Public
11607
WA
53
REDMOND
481
362.88
482.0
POLYGON ((-117.566158 47.59465499999998, -117....
Public
10502
WA
53
SPOKANE
482
303.93
483.0
POLYGON ((-122.259748676706 47.95840102157888,...
Public
11703
WA
53
EVERETT
483
214.40
484.0
POLYGON ((-88.30589099999997 42.61081699999999...
Public
10000
WI
55
kenosha
484
259.89
485.0
POLYGON ((-87.99428799999997 43.19254999999999...
Public
40101
WI
55
MILWAUKEE
485
246.84
486.0
POLYGON ((-88.30757199999998 42.77810199999999...
Public
30000
WI
55
RACINE
486
240.60
487.0
POLYGON ((-89.47349999999999 43.05362099999999...
Public
00101
WI
55
MADISON
487
270.49
488.0
POLYGON ((-88.15010299999997 44.51398999999999...
Public
00200
WI
55
green bay
488 rows × 8 columns
In [71]:
# strip out leading zeros with lstrip and create new column which is concatenation of state and puma cols
df_pp['pumace10'] = [c.lstrip('0') for c in df_pp['pumace10']]
df_pp['statefp10'] = [c.lstrip('0') for c in df_pp['statefp10']]
df_pp['state_puma'] = df_pp['statefp10'] + '-' + df_pp['pumace10']
df_pp
Out[71]:
bill
cartodb_id
geometry
owner
pumace10
state
statefp10
system
state_puma
0
307.50
1.0
POLYGON ((-87.27848299999998 34.77764699999999...
Public
200
AL
1
MADISON COUNTY
1-200
1
285.23
2.0
POLYGON ((-111.719775 34.16405299999999, -111....
Private
800
AZ
4
PINAL VALLEY
4-800
2
170.47
3.0
POLYGON ((-93.93488399999997 34.37469199999999...
Public
1600
AR
5
HOT SPRINGS UTILITIES
5-1600
3
716.18
4.0
POLYGON ((-121.977604 36.58150699999999, -121....
Private
5301
CA
6
MONTEREY
6-5301
4
429.16
5.0
POLYGON ((-121.977604 36.58150699999999, -121....
Public
5301
CA
6
MARIN MUNICIPAL
6-5301
5
285.00
6.0
POLYGON ((-107.465957 39.32211999999999, -107....
Public
400
CO
8
ASPEN
8-400
6
347.76
7.0
POLYGON ((-106.195372 40.98993799999999, -106....
Public
103
CO
8
FT COLLINS
8-103
7
206.16
8.0
POLYGON ((-82.05528799999998 27.88458999999999...
Public
10504
FL
12
BOCA RATON WTP
12-10504
8
239.88
9.0
POLYGON ((-82.05528799999998 27.88458999999999...
Public
10504
FL
12
BOYNTON BEACH WTP
12-10504
9
199.80
10.0
POLYGON ((-82.05528799999998 27.88458999999999...
Public
10504
FL
12
ORANGE PARK GRID
12-10504
10
389.04
11.0
POLYGON ((-82.05528799999998 27.88458999999999...
Public
10504
FL
12
COLLIER COUNTY REGIONAL WTP
12-10504
11
267.00
12.0
POLYGON ((-82.05528799999998 27.88458999999999...
Public
10504
FL
12
EMERALD COAST UTILITIES AUTHORITY
12-10504
12
276.54
13.0
POLYGON ((-82.05528799999998 27.88458999999999...
Public
10504
FL
12
GRU - MURPHREE WTP
12-10504
13
146.52
14.0
POLYGON ((-82.05528799999998 27.88458999999999...
Public
10504
FL
12
HERNANDO CO UTL-WEST
12-10504
14
365.04
15.0
POLYGON ((-82.05528799999998 27.88458999999999...
Public
10504
FL
12
SOUTH CENTRAL SERVICE AREA
12-10504
15
245.88
16.0
POLYGON ((-82.05528799999998 27.88458999999999...
Public
10504
FL
12
INDIAN RIVER COUNTY UTILITIES (2 WTPS)
12-10504
16
229.20
17.0
POLYGON ((-82.05528799999998 27.88458999999999...
Public
10504
FL
12
JEA MAJOR GRID
12-10504
17
220.68
18.0
POLYGON ((-82.05528799999998 27.88458999999999...
Public
10504
FL
12
MANATEE COUNTY UTILITIES DEPT
12-10504
18
116.46
19.0
POLYGON ((-82.05528799999998 27.88458999999999...
Public
10504
FL
12
MDWASA - MAIN SYSTEM
12-10504
19
372.37
20.0
POLYGON ((-82.05528799999998 27.88458999999999...
Public
10504
FL
12
MIRAMAR (EAST ; WEST) PLANTS
12-10504
20
482.40
21.0
POLYGON ((-82.05528799999998 27.88458999999999...
Public
10504
FL
12
OCUD/EASTERN (1 WP)
12-10504
21
282.96
22.0
POLYGON ((-82.05528799999998 27.88458999999999...
Public
10504
FL
12
OKALOOSA CO.WTR.; SWR.SYSTEM
12-10504
22
150.00
23.0
POLYGON ((-82.05528799999998 27.88458999999999...
Public
10504
FL
12
OCUD/WESTERN REGIONAL WTR SYS (4 WPS)
12-10504
23
138.67
24.0
POLYGON ((-82.05528799999998 27.88458999999999...
Public
10504
FL
12
ORLANDO UTILITIES COMMISSION (7 WPS)
12-10504
24
255.96
25.0
POLYGON ((-82.05528799999998 27.88458999999999...
Public
10504
FL
12
PCUD-PASCO COUNTY REGIONAL
12-10504
25
363.00
26.0
POLYGON ((-82.05528799999998 27.88458999999999...
Public
10504
FL
12
PINELLAS COUNTY UTILITIES
12-10504
26
359.44
27.0
POLYGON ((-82.05528799999998 27.88458999999999...
Public
10504
FL
12
RCID CENTRAL (4 WPS)
12-10504
27
346.92
28.0
POLYGON ((-82.05528799999998 27.88458999999999...
Public
10504
FL
12
SARASOTA CO SPECIAL UTIL DIST
12-10504
28
274.56
29.0
POLYGON ((-82.05528799999998 27.88458999999999...
Public
10504
FL
12
SEACOAST UTILITIES AUTHORITY
12-10504
29
275.64
30.0
POLYGON ((-86.32767699999998 38.18273599999999...
Public
3500
KY
18
LOUISVILLE WATER COMPANY
18-3500
...
...
...
...
...
...
...
...
...
...
458
271.68
459.0
POLYGON ((-96.77037099999998 33.05601199999999...
Public
1903
TX
48
PLANO
48-1903
459
231.84
460.0
POLYGON ((-95.45750799999998 32.32925399999999...
Public
1502
TX
48
TYLER
48-1502
460
202.08
461.0
POLYGON ((-111.835446 40.39215899999999, -111....
Public
49002
UT
49
OREM CITY
49-49002
461
244.80
462.0
POLYGON ((-111.93584 40.48790799999999, -111.9...
Public
35009
UT
49
SANDY CITY
49-35009
462
196.30
463.0
POLYGON ((-111.923868 40.81086799999999, -111....
Public
35002
UT
49
SALT LAKE CITY
49-35002
463
432.51
464.0
POLYGON ((-122.270304 47.49564799999998, -122....
Public
11610
WA
53
RENTON
53-11610
464
339.72
465.0
POLYGON ((-112.012394 40.54376999999998, -112....
Public
35007
UT
49
WEST JORDAN CITY
49-35007
465
175.08
466.0
POLYGON ((-111.741042 40.23722099999999, -111....
Public
49003
UT
49
PROVO CITY
49-49003
466
246.00
467.0
POLYGON ((-77.13757599999997 38.86620099999999...
Public
1302
VA
51
ARLINGTON COUNTY
51-1302
467
305.20
468.0
POLYGON ((-77.54264099999999 37.68613299999999...
Public
51224
VA
51
HENRICO COUNTY
51-51224
468
261.93
469.0
POLYGON ((-77.73233299999998 37.50623899999999...
Public
4102
VA
51
CHESTERFIELD CO CENTRAL
51-4102
469
248.52
470.0
POLYGON ((-77.96095499999997 39.01495599999999...
Public
10703
VA
51
LOUDOUN
51-10703
470
368.93
471.0
POLYGON ((-76.41356599999999 36.87414699999999...
Public
51155
VA
51
NORFOLK
51-51155
471
414.36
472.0
POLYGON ((-76.41356599999999 36.87414699999999...
Public
51155
VA
51
PORTSMOUTH
51-51155
472
317.52
473.0
POLYGON ((-76.15378699999998 36.77151499999999...
Public
51167
VA
51
VIRGINIA BEACH
51-51167
473
262.80
474.0
POLYGON ((-77.38268999999998 38.90514999999999...
Public
59305
VA
51
FAIRFAX COUNTY
51-59305
474
297.48
475.0
POLYGON ((-77.14370899999999 38.81182799999999...
Private
51255
VA
51
ALEXANDRIA
51-51255
475
419.71
476.0
POLYGON ((-77.60101099999997 37.54484699999999...
Public
51235
VA
51
RICHMOND
51-51235
476
433.66
477.0
POLYGON ((-76.62775599999996 37.12586199999999...
Public
51175
VA
51
NEWPORT NEWS
51-51175
477
218.97
478.0
POLYGON ((-122.772511 45.69963699999999, -122....
Public
11101
WA
53
VANCOUVER
53-11101
478
568.69
479.0
POLYGON ((-122.263535 47.62431999999998, -122....
Public
11608
WA
53
BELLEVUE
53-11608
479
422.83
480.0
POLYGON ((-122.259891 47.77845599999999, -122....
Public
11704
WA
53
NORTHSHORE
53-11704
480
356.71
481.0
POLYGON ((-122.269937 47.73411999999998, -122....
Public
11607
WA
53
REDMOND
53-11607
481
362.88
482.0
POLYGON ((-117.566158 47.59465499999998, -117....
Public
10502
WA
53
SPOKANE
53-10502
482
303.93
483.0
POLYGON ((-122.259748676706 47.95840102157888,...
Public
11703
WA
53
EVERETT
53-11703
483
214.40
484.0
POLYGON ((-88.30589099999997 42.61081699999999...
Public
10000
WI
55
kenosha
55-10000
484
259.89
485.0
POLYGON ((-87.99428799999997 43.19254999999999...
Public
40101
WI
55
MILWAUKEE
55-40101
485
246.84
486.0
POLYGON ((-88.30757199999998 42.77810199999999...
Public
30000
WI
55
RACINE
55-30000
486
240.60
487.0
POLYGON ((-89.47349999999999 43.05362099999999...
Public
101
WI
55
MADISON
55-101
487
270.49
488.0
POLYGON ((-88.15010299999997 44.51398999999999...
Public
200
WI
55
green bay
55-200
488 rows × 9 columns
In [72]:
# drop unnecessary columns
df_pp.drop(['cartodb_id', 'pumace10', 'state', 'statefp10'], inplace=True, axis=1)
# strip out unicodes
def unicode_to_string(df, cols_list):
for col in cols_list:
df[col] = [unicode(x).encode("utf-8") for x in df[col]]
return df
df_pp = unicode_to_string(df_pp, ['owner', 'system', 'state_puma'])
# sort
df_pp.sort_values(by='state_puma', inplace=True)
In [73]:
df_pp
Out[73]:
bill
geometry
owner
system
state_puma
133
468.37
POLYGON ((-86.85779499999998 33.53612699999999...
Public
BIRMINGHAM WATER WORKS BOARD
1-1302
132
271.18
POLYGON ((-87.61014299999998 33.18157999999999...
Public
TUSCALOOSA
1-1600
0
307.50
POLYGON ((-87.27848299999998 34.77764699999999...
Public
MADISON COUNTY
1-200
127
470.40
POLYGON ((-86.41971599999997 32.30373399999999...
Public
montgomery
1-2000
129
145.44
POLYGON ((-86.19347599999996 31.19221299999999...
Public
DOTHAN UTILITIES
1-2500
135
132.00
POLYGON ((-88.14843099999999 30.60459799999999...
Public
Mobile
1-2701
236
375.42
POLYGON ((-75.65007399999999 39.70100099999999...
Public
WILMINGTON
10-101
234
420.12
POLYGON ((-77.06465794452546 38.89184384525549...
Public
D.C.
11-105
235
305.60
POLYGON ((-77.06465794452546 38.89184384525549...
Public
WASHINGTON SUBURBAN SANITARY COMMISSION
11-105
222
403.92
POLYGON ((-82.84782599999998 27.91019899999999...
Public
CLEARWATER
12-10303
240
208.08
POLYGON ((-82.10620499999997 28.25922599999999...
Public
LAKELAND
12-10501
21
282.96
POLYGON ((-82.05528799999998 27.88458999999999...
Public
OKALOOSA CO.WTR.; SWR.SYSTEM
12-10504
28
274.56
POLYGON ((-82.05528799999998 27.88458999999999...
Public
SEACOAST UTILITIES AUTHORITY
12-10504
27
346.92
POLYGON ((-82.05528799999998 27.88458999999999...
Public
SARASOTA CO SPECIAL UTIL DIST
12-10504
26
359.44
POLYGON ((-82.05528799999998 27.88458999999999...
Public
RCID CENTRAL (4 WPS)
12-10504
25
363.00
POLYGON ((-82.05528799999998 27.88458999999999...
Public
PINELLAS COUNTY UTILITIES
12-10504
24
255.96
POLYGON ((-82.05528799999998 27.88458999999999...
Public
PCUD-PASCO COUNTY REGIONAL
12-10504
23
138.67
POLYGON ((-82.05528799999998 27.88458999999999...
Public
ORLANDO UTILITIES COMMISSION (7 WPS)
12-10504
20
482.40
POLYGON ((-82.05528799999998 27.88458999999999...
Public
OCUD/EASTERN (1 WP)
12-10504
22
150.00
POLYGON ((-82.05528799999998 27.88458999999999...
Public
OCUD/WESTERN REGIONAL WTR SYS (4 WPS)
12-10504
18
116.46
POLYGON ((-82.05528799999998 27.88458999999999...
Public
MDWASA - MAIN SYSTEM
12-10504
7
206.16
POLYGON ((-82.05528799999998 27.88458999999999...
Public
BOCA RATON WTP
12-10504
8
239.88
POLYGON ((-82.05528799999998 27.88458999999999...
Public
BOYNTON BEACH WTP
12-10504
19
372.37
POLYGON ((-82.05528799999998 27.88458999999999...
Public
MIRAMAR (EAST ; WEST) PLANTS
12-10504
10
389.04
POLYGON ((-82.05528799999998 27.88458999999999...
Public
COLLIER COUNTY REGIONAL WTP
12-10504
11
267.00
POLYGON ((-82.05528799999998 27.88458999999999...
Public
EMERALD COAST UTILITIES AUTHORITY
12-10504
12
276.54
POLYGON ((-82.05528799999998 27.88458999999999...
Public
GRU - MURPHREE WTP
12-10504
9
199.80
POLYGON ((-82.05528799999998 27.88458999999999...
Public
ORANGE PARK GRID
12-10504
14
365.04
POLYGON ((-82.05528799999998 27.88458999999999...
Public
SOUTH CENTRAL SERVICE AREA
12-10504
15
245.88
POLYGON ((-82.05528799999998 27.88458999999999...
Public
INDIAN RIVER COUNTY UTILITIES (2 WTPS)
12-10504
...
...
...
...
...
...
189
355.22
POLYGON ((-121.368712 37.96744899999999, -121....
Public
STOCKTON
6-7702
51
264.87
POLYGON ((-121.185056 35.79416099999999, -121....
Public
SANTA MARGARITA
6-7902
199
242.46
POLYGON ((-122.520852 37.59417799999999, -122....
Public
DALY CITY
6-8101
36
549.00
POLYGON ((-122.47276 37.63393099999999, -122.4...
Public
SAN FRANCISCO INT L. AIRPORT
6-8102
202
557.32
POLYGON ((-122.427884 37.60494099999999, -122....
Private
- SAN MATEO
6-8103
181
635.13
POLYGON ((-120.492522 34.88650299999999, -120....
Public
SANTA MARIA
6-8301
56
533.89
(POLYGON ((-119.052774 33.46585999999999, -119...
Public
SANTA BARBARA
6-8303
213
335.13
POLYGON ((-122.065265 37.34328299999999, -122....
Public
SUNNYVALE
6-8502
197
446.99
POLYGON ((-121.99629 37.37021999999999, -121.9...
Public
SANTA CLARA
6-8503
204
605.57
POLYGON ((-122.082415 36.96469999999999, -122....
Public
SANTA CRUZ
6-8702
193
526.80
POLYGON ((-122.406786 38.15563199999999, -122....
Public
VALLEJO
6-9501
168
437.46
POLYGON ((-122.216482 38.26601499999999, -122....
Public
FAIRFIELD
6-9502
173
231.75
POLYGON ((-122.126628 38.43545099999999, -122....
Public
VACAVILLE
6-9503
182
441.84
POLYGON ((-122.845592 38.48359399999999, -122....
Public
SANTA ROSA
6-9703
194
292.65
POLYGON ((-121.007987 37.64688199999999, -121....
Public
MODESTO
6-9904
6
347.76
POLYGON ((-106.195372 40.98993799999999, -106....
Public
FT COLLINS
8-103
229
376.80
POLYGON ((-104.980919 40.34916699999999, -104....
Public
GREELEY
8-300
5
285.00
POLYGON ((-107.465957 39.32211999999999, -107....
Public
ASPEN
8-400
226
469.73
POLYGON ((-104.822408 38.82167599999998, -104....
Public
COLORADO SPRINGS UTILITIES
8-4105
224
277.20
POLYGON ((-105.327136 39.98149799999999, -105....
Public
BOULDER
8-803
228
292.20
POLYGON ((-105.241114 39.90640599999999, -105....
Public
BROOMFIELD CITY AND COUNTY OF
8-804
227
270.24
POLYGON ((-105.090367 39.91428199999999, -105....
Public
WESTMINSTER
8-805
230
242.04
POLYGON ((-105.02502 39.88521399999999, -105.0...
Public
THORNTON
8-806
64
460.92
POLYGON ((-104.886613 39.72921899999999, -104....
Public
AURORA
8-810
225
245.88
POLYGON ((-105.064541 39.79107999999999, -105....
Public
DENVER WATER BOARD
8-813
223
246.78
POLYGON ((-105.240265 39.86590899999999, -105....
Public
ARVADA
8-817
231
424.26
(POLYGON ((-73.61336899999998 40.9888829999999...
Private
STAMFORD
9-102
232
494.28
POLYGON ((-73.51678499999998 41.68758099999999...
Private
AQUARION WATER CO OF CT-MAIN SYSTEM
9-500
233
313.56
POLYGON ((-73.51678499999998 41.68758099999999...
Public
METROPOLITAN DISTRICT COMMISSION
9-500
65
205.28
POLYGON ((-73.09235299999997 41.57049599999999...
Public
WATERBURY
9-901
488 rows × 5 columns
In [74]:
# merge pums_lite and puma
df_merge = pd.merge(df_pums_lite, df_pp, on='state_puma')
df_merge.head(3)
Out[74]:
state_puma
inc_percap
w_percap
WGTP
bill
geometry
owner
system
0
1-1302
8850.000000
30.0
121
468.37
POLYGON ((-86.85779499999998 33.53612699999999...
Public
BIRMINGHAM WATER WORKS BOARD
1
1-1302
9100.000000
700.0
62
468.37
POLYGON ((-86.85779499999998 33.53612699999999...
Public
BIRMINGHAM WATER WORKS BOARD
2
1-1302
31933.333333
300.0
59
468.37
POLYGON ((-86.85779499999998 33.53612699999999...
Public
BIRMINGHAM WATER WORKS BOARD
In [75]:
# pums_lite+puma
# get water prices: bill is $/year assuming consumption of 60,000 gallons, so just divide by 60k
df_merge['price'] = df_merge['bill'] / 60000
# drop unnecessary columns
df_merge.drop('bill', inplace=True, axis=1)
df_merge.head(3)
Out[75]:
state_puma
inc_percap
w_percap
WGTP
geometry
owner
system
price
0
1-1302
8850.000000
30.0
121
POLYGON ((-86.85779499999998 33.53612699999999...
Public
BIRMINGHAM WATER WORKS BOARD
0.007806
1
1-1302
9100.000000
700.0
62
POLYGON ((-86.85779499999998 33.53612699999999...
Public
BIRMINGHAM WATER WORKS BOARD
0.007806
2
1-1302
31933.333333
300.0
59
POLYGON ((-86.85779499999998 33.53612699999999...
Public
BIRMINGHAM WATER WORKS BOARD
0.007806
In [76]:
# pums_lite+puma
# compute consumption
df_merge['w_use_percap'] = df_merge['w_percap'] / df_merge['price']
df_merge.head(3)
Out[76]:
state_puma
inc_percap
w_percap
WGTP
geometry
owner
system
price
w_use_percap
0
1-1302
8850.000000
30.0
121
POLYGON ((-86.85779499999998 33.53612699999999...
Public
BIRMINGHAM WATER WORKS BOARD
0.007806
3843.115486
1
1-1302
9100.000000
700.0
62
POLYGON ((-86.85779499999998 33.53612699999999...
Public
BIRMINGHAM WATER WORKS BOARD
0.007806
89672.694664
2
1-1302
31933.333333
300.0
59
POLYGON ((-86.85779499999998 33.53612699999999...
Public
BIRMINGHAM WATER WORKS BOARD
0.007806
38431.154856
In [77]:
# pums_lite+puma
# drop unnecessary cols
df_merge.drop(['w_percap', 'owner', 'system'], inplace=True, axis=1)
df_merge.head(3)
Out[77]:
state_puma
inc_percap
WGTP
geometry
price
w_use_percap
0
1-1302
8850.000000
121
POLYGON ((-86.85779499999998 33.53612699999999...
0.007806
3843.115486
1
1-1302
9100.000000
62
POLYGON ((-86.85779499999998 33.53612699999999...
0.007806
89672.694664
2
1-1302
31933.333333
59
POLYGON ((-86.85779499999998 33.53612699999999...
0.007806
38431.154856
In [78]:
# pums_lite+puma
with open('data/puma_pums/merged.pickle', 'wb') as f:
pickle.dump(df_merge, f)
In [79]:
# import pums
df_pums = pd.read_pickle('data/pums/pums.pickle')
df_pums.head(3)
Out[79]:
DIVISION
PUMA
REGION
ST
WGTP
ADJINC
ADJHSG
HINCP
FINCP
NP
...
na
o
w
y_raw
st
puma
y_wt
state_puma
inc_percap
w_percap
SERIALNO
3
7
3602
3
48
90
1008425
1000000
52000.0
52000.0
3
...
0.0
0.0
1.0
0.000769
48
3602
0.069231
48-3602
17333.333333
13.333333
5
7
6603
3
48
112
1008425
1000000
137200.0
137200.0
2
...
0.0
0.0
1.0
0.000583
48
6603
0.065306
48-6603
68600.000000
40.000000
7
6
2400
3
1
52
1008425
1000000
45000.0
45000.0
6
...
0.0
0.0
1.0
0.026667
1
2400
1.386667
1-2400
7500.000000
200.000000
3 rows × 59 columns
In [80]:
# merge pums and puma
df_merge2 = pd.merge(df_pums, df_pp, on='state_puma')
df_merge2.head(3)
Out[80]:
DIVISION
PUMA
REGION
ST
WGTP
ADJINC
ADJHSG
HINCP
FINCP
NP
...
st
puma
y_wt
state_puma
inc_percap
w_percap
bill
geometry
owner
system
0
7
3602
3
48
90
1008425
1000000
52000.0
52000.0
3
...
48
3602
0.069231
48-3602
17333.333333
13.333333
257.88
POLYGON ((-96.60183099999998 30.64192599999999...
Public
COLLEGE STATION
1
7
3602
3
48
290
1008425
1000000
41100.0
0.0
3
...
48
3602
6.350365
48-3602
13700.000000
300.000000
257.88
POLYGON ((-96.60183099999998 30.64192599999999...
Public
COLLEGE STATION
2
7
3602
3
48
124
1008425
1000000
61000.0
61000.0
4
...
48
3602
0.609836
48-3602
15250.000000
75.000000
257.88
POLYGON ((-96.60183099999998 30.64192599999999...
Public
COLLEGE STATION
3 rows × 63 columns
In [81]:
# pums+puma
# get water prices: bill is $/year assuming consumption of 60,000 gallons, so just divide by 60k
df_merge2['price'] = df_merge2['bill'] / 60000
# drop unnecessary columns
df_merge2.drop('bill', inplace=True, axis=1)
df_merge2.head(3)
Out[81]:
DIVISION
PUMA
REGION
ST
WGTP
ADJINC
ADJHSG
HINCP
FINCP
NP
...
st
puma
y_wt
state_puma
inc_percap
w_percap
geometry
owner
system
price
0
7
3602
3
48
90
1008425
1000000
52000.0
52000.0
3
...
48
3602
0.069231
48-3602
17333.333333
13.333333
POLYGON ((-96.60183099999998 30.64192599999999...
Public
COLLEGE STATION
0.004298
1
7
3602
3
48
290
1008425
1000000
41100.0
0.0
3
...
48
3602
6.350365
48-3602
13700.000000
300.000000
POLYGON ((-96.60183099999998 30.64192599999999...
Public
COLLEGE STATION
0.004298
2
7
3602
3
48
124
1008425
1000000
61000.0
61000.0
4
...
48
3602
0.609836
48-3602
15250.000000
75.000000
POLYGON ((-96.60183099999998 30.64192599999999...
Public
COLLEGE STATION
0.004298
3 rows × 63 columns
In [82]:
# pums+puma
# compute consumption
df_merge2['w_use_percap'] = df_merge2['w_percap'] / df_merge2['price']
df_merge2.head(3)
Out[82]:
DIVISION
PUMA
REGION
ST
WGTP
ADJINC
ADJHSG
HINCP
FINCP
NP
...
puma
y_wt
state_puma
inc_percap
w_percap
geometry
owner
system
price
w_use_percap
0
7
3602
3
48
90
1008425
1000000
52000.0
52000.0
3
...
3602
0.069231
48-3602
17333.333333
13.333333
POLYGON ((-96.60183099999998 30.64192599999999...
Public
COLLEGE STATION
0.004298
3102.218086
1
7
3602
3
48
290
1008425
1000000
41100.0
0.0
3
...
3602
6.350365
48-3602
13700.000000
300.000000
POLYGON ((-96.60183099999998 30.64192599999999...
Public
COLLEGE STATION
0.004298
69799.906933
2
7
3602
3
48
124
1008425
1000000
61000.0
61000.0
4
...
3602
0.609836
48-3602
15250.000000
75.000000
POLYGON ((-96.60183099999998 30.64192599999999...
Public
COLLEGE STATION
0.004298
17449.976733
3 rows × 64 columns
In [83]:
# pums+puma
with open('data/puma_pums/merged2.pickle', 'wb') as f:
pickle.dump(df_merge2, f)
Content source: pgr-me/metis_projects
Similar notebooks: