In [1]:
from __future__ import print_function, division
import pandas as pd
import pylab as pl
import os
import csv
from pandas.tools.plotting import scatter_matrix
import sys
reload(sys)
import zipfile
import geopandas as gpd
sys.setdefaultencoding('utf-8')
%pylab inline
In [2]:
#Unzippping downloaded median income data of 1999 in a dataframe
zipfile.ZipFile(os.path.join("data/income_00.zip")).extractall(r"data/income_00")
In [3]:
#Reading and saving downloaded median income data of 1999 in a dataframe
data_2000 = pd.read_csv('data/income_00/DEC_00_SF3_P052_with_ann.csv')
data_2000.head()
Out[3]:
GEO.id
GEO.id2
GEO.display-label
VD01
VD02
VD03
VD04
VD05
VD06
VD07
VD08
VD09
VD10
VD11
VD12
VD13
VD14
VD15
VD16
VD17
0
Id
Id2
Geography
Total:
Less than $10,000
$10,000 to $14,999
$15,000 to $19,999
$20,000 to $24,999
$25,000 to $29,999
$30,000 to $34,999
$35,000 to $39,999
$40,000 to $44,999
$45,000 to $49,999
$50,000 to $59,999
$60,000 to $74,999
$75,000 to $99,999
$100,000 to $124,999
$125,000 to $149,999
$150,000 to $199,999
$200,000 or more
1
1500000US360470001001
360470001001
Block Group 1, Census Tract 1, Kings County, N...
859
48
45
16
31
18
59
35
31
8
70
105
68
115
52
39
119
2
1500000US360470001002
360470001002
Block Group 2, Census Tract 1, Kings County, N...
1303
63
0
66
44
22
75
54
94
86
147
174
57
151
131
87
52
3
1500000US360470001003
360470001003
Block Group 3, Census Tract 1, Kings County, N...
246
16
6
10
23
10
13
8
13
12
16
31
23
24
8
15
18
4
1500000US360470002001
360470002001
Block Group 1, Census Tract 2, Kings County, N...
118
6
10
15
0
12
9
16
3
10
15
0
15
7
0
0
0
In [4]:
#Unzipping downloaded median income data of 2013 in a dataframe
zipfile.ZipFile(os.path.join("data/income_13.zip")).extractall(r"data/income_13")
In [5]:
#Reading and saving downloaded median income data of 2013 in a dataframe
data_2013 = pd.read_csv('data/income_13/ACS_13_5YR_B19001_with_ann.csv')
data_2013.head()
Out[5]:
GEO.id
GEO.id2
GEO.display-label
HD01_VD01
HD02_VD01
HD01_VD02
HD02_VD02
HD01_VD03
HD02_VD03
HD01_VD04
...
HD01_VD13
HD02_VD13
HD01_VD14
HD02_VD14
HD01_VD15
HD02_VD15
HD01_VD16
HD02_VD16
HD01_VD17
HD02_VD17
0
Id
Id2
Geography
Estimate; Total:
Margin of Error; Total:
Estimate; Total: - Less than $10,000
Margin of Error; Total: - Less than $10,000
Estimate; Total: - $10,000 to $14,999
Margin of Error; Total: - $10,000 to $14,999
Estimate; Total: - $15,000 to $19,999
...
Estimate; Total: - $75,000 to $99,999
Margin of Error; Total: - $75,000 to $99,999
Estimate; Total: - $100,000 to $124,999
Margin of Error; Total: - $100,000 to $124,999
Estimate; Total: - $125,000 to $149,999
Margin of Error; Total: - $125,000 to $149,999
Estimate; Total: - $150,000 to $199,999
Margin of Error; Total: - $150,000 to $199,999
Estimate; Total: - $200,000 or more
Margin of Error; Total: - $200,000 or more
1
1500000US360470001001
360470001001
Block Group 1, Census Tract 1, Kings County, N...
308
73
54
48
14
22
0
...
37
37
22
25
5
8
39
35
87
70
2
1500000US360470001002
360470001002
Block Group 2, Census Tract 1, Kings County, N...
291
86
33
36
15
23
0
...
0
12
13
22
0
12
0
12
119
63
3
1500000US360470001003
360470001003
Block Group 3, Census Tract 1, Kings County, N...
710
114
33
39
51
80
0
...
151
75
96
53
33
39
118
73
71
42
4
1500000US360470001004
360470001004
Block Group 4, Census Tract 1, Kings County, N...
797
126
47
43
30
33
21
...
107
59
23
28
59
43
32
37
123
73
5 rows × 37 columns
In [6]:
data_2013.columns
Out[6]:
Index([u'GEO.id', u'GEO.id2', u'GEO.display-label', u'HD01_VD01', u'HD02_VD01',
u'HD01_VD02', u'HD02_VD02', u'HD01_VD03', u'HD02_VD03', u'HD01_VD04',
u'HD02_VD04', u'HD01_VD05', u'HD02_VD05', u'HD01_VD06', u'HD02_VD06',
u'HD01_VD07', u'HD02_VD07', u'HD01_VD08', u'HD02_VD08', u'HD01_VD09',
u'HD02_VD09', u'HD01_VD10', u'HD02_VD10', u'HD01_VD11', u'HD02_VD11',
u'HD01_VD12', u'HD02_VD12', u'HD01_VD13', u'HD02_VD13', u'HD01_VD14',
u'HD02_VD14', u'HD01_VD15', u'HD02_VD15', u'HD01_VD16', u'HD02_VD16',
u'HD01_VD17', u'HD02_VD17'],
dtype='object')
In [7]:
data_2013 = data_2013[[u'GEO.id', u'GEO.id2', u'GEO.display-label', u'HD01_VD01', u'HD01_VD02', u'HD01_VD03', u'HD01_VD04',
u'HD01_VD05', u'HD01_VD06', u'HD01_VD07', u'HD01_VD08', u'HD01_VD09', u'HD01_VD10', u'HD01_VD11',
u'HD01_VD12', u'HD01_VD13', u'HD01_VD14', u'HD01_VD15', u'HD01_VD16', u'HD01_VD17']]
In [8]:
data_2013.head()
Out[8]:
GEO.id
GEO.id2
GEO.display-label
HD01_VD01
HD01_VD02
HD01_VD03
HD01_VD04
HD01_VD05
HD01_VD06
HD01_VD07
HD01_VD08
HD01_VD09
HD01_VD10
HD01_VD11
HD01_VD12
HD01_VD13
HD01_VD14
HD01_VD15
HD01_VD16
HD01_VD17
0
Id
Id2
Geography
Estimate; Total:
Estimate; Total: - Less than $10,000
Estimate; Total: - $10,000 to $14,999
Estimate; Total: - $15,000 to $19,999
Estimate; Total: - $20,000 to $24,999
Estimate; Total: - $25,000 to $29,999
Estimate; Total: - $30,000 to $34,999
Estimate; Total: - $35,000 to $39,999
Estimate; Total: - $40,000 to $44,999
Estimate; Total: - $45,000 to $49,999
Estimate; Total: - $50,000 to $59,999
Estimate; Total: - $60,000 to $74,999
Estimate; Total: - $75,000 to $99,999
Estimate; Total: - $100,000 to $124,999
Estimate; Total: - $125,000 to $149,999
Estimate; Total: - $150,000 to $199,999
Estimate; Total: - $200,000 or more
1
1500000US360470001001
360470001001
Block Group 1, Census Tract 1, Kings County, N...
308
54
14
0
0
0
0
5
5
6
13
21
37
22
5
39
87
2
1500000US360470001002
360470001002
Block Group 2, Census Tract 1, Kings County, N...
291
33
15
0
0
0
46
40
12
0
0
13
0
13
0
0
119
3
1500000US360470001003
360470001003
Block Group 3, Census Tract 1, Kings County, N...
710
33
51
0
27
13
58
0
0
16
14
29
151
96
33
118
71
4
1500000US360470001004
360470001004
Block Group 4, Census Tract 1, Kings County, N...
797
47
30
21
88
0
35
0
45
0
79
108
107
23
59
32
123
In [9]:
#Unzipping downloaded nyc shapefile in a dataframe
zipfile.ZipFile(os.path.join("data/cb_2015_36_bg_500k.zip")).extractall(r"data/cb_2015_36_bg_500k")
In [10]:
# loading shape file for NYC
nyc_shape = gpd.read_file("data/cb_2015_36_bg_500k/cb_2015_36_bg_500k.shp")
nyc_shape.head()
Out[10]:
AFFGEOID
ALAND
AWATER
BLKGRPCE
COUNTYFP
GEOID
LSAD
NAME
STATEFP
TRACTCE
geometry
0
1500000US360610211000
0
307945
0
061
360610211000
BG
0
36
021100
POLYGON ((-73.968082 40.8207, -73.967982575439...
1
1500000US360290131014
557271
351509
4
029
360290131014
BG
4
36
013101
POLYGON ((-78.89711856256349 42.75125713600959...
2
1500000US360050516002
256313
0
2
005
360050516002
BG
2
36
051600
POLYGON ((-73.791504 40.855456, -73.7874279999...
3
1500000US360810183004
33973
0
4
081
360810183004
BG
4
36
018300
POLYGON ((-73.92145099999999 40.743634, -73.92...
4
1500000US360470276003
70988
0
3
047
360470276003
BG
3
36
027600
POLYGON ((-74.001302 40.611068, -74.0010729999...
In [11]:
nyc_shape.columns = [[u'GEO.id', u'ALAND', u'AWATER', u'BLKGRPCE', u'COUNTYFP',
u'GEO.id2', u'LSAD', u'NAME', u'STATEFP', u'TRACTCE',
u'geometry']]
In [12]:
nyc_shape.head()
Out[12]:
GEO.id
ALAND
AWATER
BLKGRPCE
COUNTYFP
GEO.id2
LSAD
NAME
STATEFP
TRACTCE
geometry
0
1500000US360610211000
0
307945
0
061
360610211000
BG
0
36
021100
POLYGON ((-73.968082 40.8207, -73.967982575439...
1
1500000US360290131014
557271
351509
4
029
360290131014
BG
4
36
013101
POLYGON ((-78.89711856256349 42.75125713600959...
2
1500000US360050516002
256313
0
2
005
360050516002
BG
2
36
051600
POLYGON ((-73.791504 40.855456, -73.7874279999...
3
1500000US360810183004
33973
0
4
081
360810183004
BG
4
36
018300
POLYGON ((-73.92145099999999 40.743634, -73.92...
4
1500000US360470276003
70988
0
3
047
360470276003
BG
3
36
027600
POLYGON ((-74.001302 40.611068, -74.0010729999...
In [13]:
bky_shape = nyc_shape[[u'GEO.id', u'GEO.id2', u'geometry']]
bky_shape.head()
Out[13]:
GEO.id
GEO.id2
geometry
0
1500000US360610211000
360610211000
POLYGON ((-73.968082 40.8207, -73.967982575439...
1
1500000US360290131014
360290131014
POLYGON ((-78.89711856256349 42.75125713600959...
2
1500000US360050516002
360050516002
POLYGON ((-73.791504 40.855456, -73.7874279999...
3
1500000US360810183004
360810183004
POLYGON ((-73.92145099999999 40.743634, -73.92...
4
1500000US360470276003
360470276003
POLYGON ((-74.001302 40.611068, -74.0010729999...
In [ ]:
In [14]:
#Merging 2000 the dataframes to a mother dataframe
income_2000 = pd.merge(data_2000, bky_shape, how='left', on=['GEO.id', 'GEO.id2'])
income_2000.head()
Out[14]:
GEO.id
GEO.id2
GEO.display-label
VD01
VD02
VD03
VD04
VD05
VD06
VD07
...
VD09
VD10
VD11
VD12
VD13
VD14
VD15
VD16
VD17
geometry
0
Id
Id2
Geography
Total:
Less than $10,000
$10,000 to $14,999
$15,000 to $19,999
$20,000 to $24,999
$25,000 to $29,999
$30,000 to $34,999
...
$40,000 to $44,999
$45,000 to $49,999
$50,000 to $59,999
$60,000 to $74,999
$75,000 to $99,999
$100,000 to $124,999
$125,000 to $149,999
$150,000 to $199,999
$200,000 or more
NaN
1
1500000US360470001001
360470001001
Block Group 1, Census Tract 1, Kings County, N...
859
48
45
16
31
18
59
...
31
8
70
105
68
115
52
39
119
POLYGON ((-73.99669799999999 40.700877, -73.99...
2
1500000US360470001002
360470001002
Block Group 2, Census Tract 1, Kings County, N...
1303
63
0
66
44
22
75
...
94
86
147
174
57
151
131
87
52
POLYGON ((-73.995379 40.700309, -73.993672 40....
3
1500000US360470001003
360470001003
Block Group 3, Census Tract 1, Kings County, N...
246
16
6
10
23
10
13
...
13
12
16
31
23
24
8
15
18
POLYGON ((-73.993672 40.699836, -73.9926119999...
4
1500000US360470002001
360470002001
Block Group 1, Census Tract 2, Kings County, N...
118
6
10
15
0
12
9
...
3
10
15
0
15
7
0
0
0
POLYGON ((-74.012844 40.653016, -74.0150479999...
5 rows × 21 columns
In [15]:
#Merging 2013 the dataframes to a mother dataframe
income_2013 = pd.merge(data_2013, bky_shape, how='left', on=['GEO.id', 'GEO.id2'])
income_2013.head()
Out[15]:
GEO.id
GEO.id2
GEO.display-label
HD01_VD01
HD01_VD02
HD01_VD03
HD01_VD04
HD01_VD05
HD01_VD06
HD01_VD07
...
HD01_VD09
HD01_VD10
HD01_VD11
HD01_VD12
HD01_VD13
HD01_VD14
HD01_VD15
HD01_VD16
HD01_VD17
geometry
0
Id
Id2
Geography
Estimate; Total:
Estimate; Total: - Less than $10,000
Estimate; Total: - $10,000 to $14,999
Estimate; Total: - $15,000 to $19,999
Estimate; Total: - $20,000 to $24,999
Estimate; Total: - $25,000 to $29,999
Estimate; Total: - $30,000 to $34,999
...
Estimate; Total: - $40,000 to $44,999
Estimate; Total: - $45,000 to $49,999
Estimate; Total: - $50,000 to $59,999
Estimate; Total: - $60,000 to $74,999
Estimate; Total: - $75,000 to $99,999
Estimate; Total: - $100,000 to $124,999
Estimate; Total: - $125,000 to $149,999
Estimate; Total: - $150,000 to $199,999
Estimate; Total: - $200,000 or more
NaN
1
1500000US360470001001
360470001001
Block Group 1, Census Tract 1, Kings County, N...
308
54
14
0
0
0
0
...
5
6
13
21
37
22
5
39
87
POLYGON ((-73.99669799999999 40.700877, -73.99...
2
1500000US360470001002
360470001002
Block Group 2, Census Tract 1, Kings County, N...
291
33
15
0
0
0
46
...
12
0
0
13
0
13
0
0
119
POLYGON ((-73.995379 40.700309, -73.993672 40....
3
1500000US360470001003
360470001003
Block Group 3, Census Tract 1, Kings County, N...
710
33
51
0
27
13
58
...
0
16
14
29
151
96
33
118
71
POLYGON ((-73.993672 40.699836, -73.9926119999...
4
1500000US360470001004
360470001004
Block Group 4, Census Tract 1, Kings County, N...
797
47
30
21
88
0
35
...
45
0
79
108
107
23
59
32
123
POLYGON ((-73.99271 40.698097, -73.991759 40.7...
5 rows × 21 columns
In [ ]:
In [16]:
#Saving the dataset as csv
income_2000.to_csv('income_2000.csv')
income_2013.to_csv('income_2013.csv')
In [17]:
#Checking the CSV
data = pd.read_csv('income_2013.csv')
data.head()
Out[17]:
Unnamed: 0
GEO.id
GEO.id2
GEO.display-label
HD01_VD01
HD01_VD02
HD01_VD03
HD01_VD04
HD01_VD05
HD01_VD06
...
HD01_VD09
HD01_VD10
HD01_VD11
HD01_VD12
HD01_VD13
HD01_VD14
HD01_VD15
HD01_VD16
HD01_VD17
geometry
0
0
Id
Id2
Geography
Estimate; Total:
Estimate; Total: - Less than $10,000
Estimate; Total: - $10,000 to $14,999
Estimate; Total: - $15,000 to $19,999
Estimate; Total: - $20,000 to $24,999
Estimate; Total: - $25,000 to $29,999
...
Estimate; Total: - $40,000 to $44,999
Estimate; Total: - $45,000 to $49,999
Estimate; Total: - $50,000 to $59,999
Estimate; Total: - $60,000 to $74,999
Estimate; Total: - $75,000 to $99,999
Estimate; Total: - $100,000 to $124,999
Estimate; Total: - $125,000 to $149,999
Estimate; Total: - $150,000 to $199,999
Estimate; Total: - $200,000 or more
NaN
1
1
1500000US360470001001
360470001001
Block Group 1, Census Tract 1, Kings County, N...
308
54
14
0
0
0
...
5
6
13
21
37
22
5
39
87
POLYGON ((-73.99669799999999 40.700877, -73.99...
2
2
1500000US360470001002
360470001002
Block Group 2, Census Tract 1, Kings County, N...
291
33
15
0
0
0
...
12
0
0
13
0
13
0
0
119
POLYGON ((-73.995379 40.700309, -73.993672 40....
3
3
1500000US360470001003
360470001003
Block Group 3, Census Tract 1, Kings County, N...
710
33
51
0
27
13
...
0
16
14
29
151
96
33
118
71
POLYGON ((-73.993672 40.699836, -73.9926119999...
4
4
1500000US360470001004
360470001004
Block Group 4, Census Tract 1, Kings County, N...
797
47
30
21
88
0
...
45
0
79
108
107
23
59
32
123
POLYGON ((-73.99271 40.698097, -73.991759 40.7...
5 rows × 22 columns
In [18]:
data.columns
Out[18]:
Index([u'Unnamed: 0', u'GEO.id', u'GEO.id2', u'GEO.display-label',
u'HD01_VD01', u'HD01_VD02', u'HD01_VD03', u'HD01_VD04', u'HD01_VD05',
u'HD01_VD06', u'HD01_VD07', u'HD01_VD08', u'HD01_VD09', u'HD01_VD10',
u'HD01_VD11', u'HD01_VD12', u'HD01_VD13', u'HD01_VD14', u'HD01_VD15',
u'HD01_VD16', u'HD01_VD17', u'geometry'],
dtype='object')
In [19]:
data.columns = [[u'Unnamed: 0', u'GEO.id', u'GEO.id2', u'GEO.display-label',
u'VD01', u'VD02', u'VD03', u'VD04', u'VD05',
u'VD06', u'VD07', u'VD08', u'VD09', u'VD10',
u'VD11', u'VD12', u'VD13', u'VD14', u'VD15',
u'VD16', u'VD17', u'geometry']]
In [20]:
data.drop([0], inplace=True)
data.head()
Out[20]:
Unnamed: 0
GEO.id
GEO.id2
GEO.display-label
VD01
VD02
VD03
VD04
VD05
VD06
...
VD09
VD10
VD11
VD12
VD13
VD14
VD15
VD16
VD17
geometry
1
1
1500000US360470001001
360470001001
Block Group 1, Census Tract 1, Kings County, N...
308
54
14
0
0
0
...
5
6
13
21
37
22
5
39
87
POLYGON ((-73.99669799999999 40.700877, -73.99...
2
2
1500000US360470001002
360470001002
Block Group 2, Census Tract 1, Kings County, N...
291
33
15
0
0
0
...
12
0
0
13
0
13
0
0
119
POLYGON ((-73.995379 40.700309, -73.993672 40....
3
3
1500000US360470001003
360470001003
Block Group 3, Census Tract 1, Kings County, N...
710
33
51
0
27
13
...
0
16
14
29
151
96
33
118
71
POLYGON ((-73.993672 40.699836, -73.9926119999...
4
4
1500000US360470001004
360470001004
Block Group 4, Census Tract 1, Kings County, N...
797
47
30
21
88
0
...
45
0
79
108
107
23
59
32
123
POLYGON ((-73.99271 40.698097, -73.991759 40.7...
5
5
1500000US360470002001
360470002001
Block Group 1, Census Tract 2, Kings County, N...
371
48
5
48
21
29
...
5
19
47
23
16
33
0
5
0
POLYGON ((-74.012844 40.653016, -74.0150479999...
5 rows × 22 columns
In [21]:
intax_2013 = data[['GEO.id', 'GEO.display-label', 'geometry']]
intax_2013.head()
Out[21]:
GEO.id
GEO.display-label
geometry
1
1500000US360470001001
Block Group 1, Census Tract 1, Kings County, N...
POLYGON ((-73.99669799999999 40.700877, -73.99...
2
1500000US360470001002
Block Group 2, Census Tract 1, Kings County, N...
POLYGON ((-73.995379 40.700309, -73.993672 40....
3
1500000US360470001003
Block Group 3, Census Tract 1, Kings County, N...
POLYGON ((-73.993672 40.699836, -73.9926119999...
4
1500000US360470001004
Block Group 4, Census Tract 1, Kings County, N...
POLYGON ((-73.99271 40.698097, -73.991759 40.7...
5
1500000US360470002001
Block Group 1, Census Tract 2, Kings County, N...
POLYGON ((-74.012844 40.653016, -74.0150479999...
In [22]:
intax_2013['B1'] = (data.VD02.astype(float) * 5000 * 0.04) + (data.VD03.astype(float)* 12500 * 0.04)
intax_2013['B2'] = (data.VD04.astype(float) * (((17500 - 17050) * 0.045) + 682)) + (data.VD05.astype(float)* (((22500 - 17050) * 0.045) + 682))
intax_2013['B3'] = (data.VD06.astype(float) * (((27500 - 23450) * 0.0525) + 970))
intax_2013['B4'] = (data.VD07.astype(float) * (((32500 - 27750) * 0.059) + 1195.75)) + (data.VD08.astype(float)* (((37500 - 27750) * 0.059) + 1195.75)) + (data.VD09.astype(float)* (((42500 - 27750) * 0.059) + 1195.75))
intax_2013['B5'] = (data.VD10.astype(float) * (((47500 - 42750) * 0.0645) + 2080.75)) + (data.VD11.astype(float) * (((55000 - 42750) * 0.0645) + 2080.75)) + (data.VD12.astype(float) * (((67500 - 42750) * 0.0645) + 2080.75)) + (data.VD13.astype(float) * (((87500 - 42750) * 0.0645) + 2080.75)) + (data.VD14.astype(float) * (((112500 - 42750) * 0.0645) + 2080.75)) + (data.VD15.astype(float) * (((137500 - 42750) * 0.0645) + 2080.75))
intax_2013['B6'] = (data.VD16.astype(float) * (((175000 - 160500) * 0.0665) + 9675.63)) + (data.VD17.astype(float) * (((319000 - 160500) * 0.0665) + 9675.63))
intax_2013.head()
Out[22]:
GEO.id
GEO.display-label
geometry
B1
B2
B3
B4
B5
B6
1
1500000US360470001001
Block Group 1, Census Tract 1, Kings County, N...
POLYGON ((-73.99669799999999 40.700877, -73.99...
17800.0
0.00
0.000
19185.0
498359.750
2173736.88
2
1500000US360470001002
Block Group 2, Census Tract 1, Kings County, N...
POLYGON ((-73.995379 40.700309, -73.993672 40....
14100.0
0.00
0.000
163528.0
133337.750
2405689.72
3
1500000US360470001003
Block Group 3, Census Tract 1, Kings County, N...
POLYGON ((-73.993672 40.699836, -73.9926119999...
32100.0
25035.75
15374.125
85608.0
1837042.875
2690833.32
4
1500000US360470001004
Block Group 4, Census Tract 1, Kings County, N...
POLYGON ((-73.99271 40.698097, -73.991759 40.7...
24400.0
96345.25
0.000
144630.0
1790077.750
2827029.40
5
1500000US360470002001
Block Group 1, Census Tract 2, Kings County, N...
POLYGON ((-74.012844 40.653016, -74.0150479999...
12100.0
53180.25
34296.125
124567.0
561462.000
53199.40
In [23]:
intax_2013['TotalTax2013'] = intax_2013.B1 + intax_2013.B2 + intax_2013.B3 + intax_2013.B4 + intax_2013.B5 + intax_2013.B6
intax_2013.head()
Out[23]:
GEO.id
GEO.display-label
geometry
B1
B2
B3
B4
B5
B6
TotalTax2013
1
1500000US360470001001
Block Group 1, Census Tract 1, Kings County, N...
POLYGON ((-73.99669799999999 40.700877, -73.99...
17800.0
0.00
0.000
19185.0
498359.750
2173736.88
2709081.630
2
1500000US360470001002
Block Group 2, Census Tract 1, Kings County, N...
POLYGON ((-73.995379 40.700309, -73.993672 40....
14100.0
0.00
0.000
163528.0
133337.750
2405689.72
2716655.470
3
1500000US360470001003
Block Group 3, Census Tract 1, Kings County, N...
POLYGON ((-73.993672 40.699836, -73.9926119999...
32100.0
25035.75
15374.125
85608.0
1837042.875
2690833.32
4685994.070
4
1500000US360470001004
Block Group 4, Census Tract 1, Kings County, N...
POLYGON ((-73.99271 40.698097, -73.991759 40.7...
24400.0
96345.25
0.000
144630.0
1790077.750
2827029.40
4882482.400
5
1500000US360470002001
Block Group 1, Census Tract 2, Kings County, N...
POLYGON ((-74.012844 40.653016, -74.0150479999...
12100.0
53180.25
34296.125
124567.0
561462.000
53199.40
838804.775
In [24]:
#Checking the CSV
data = pd.read_csv('income_2000.csv')
data.head()
Out[24]:
Unnamed: 0
GEO.id
GEO.id2
GEO.display-label
VD01
VD02
VD03
VD04
VD05
VD06
...
VD09
VD10
VD11
VD12
VD13
VD14
VD15
VD16
VD17
geometry
0
0
Id
Id2
Geography
Total:
Less than $10,000
$10,000 to $14,999
$15,000 to $19,999
$20,000 to $24,999
$25,000 to $29,999
...
$40,000 to $44,999
$45,000 to $49,999
$50,000 to $59,999
$60,000 to $74,999
$75,000 to $99,999
$100,000 to $124,999
$125,000 to $149,999
$150,000 to $199,999
$200,000 or more
NaN
1
1
1500000US360470001001
360470001001
Block Group 1, Census Tract 1, Kings County, N...
859
48
45
16
31
18
...
31
8
70
105
68
115
52
39
119
POLYGON ((-73.99669799999999 40.700877, -73.99...
2
2
1500000US360470001002
360470001002
Block Group 2, Census Tract 1, Kings County, N...
1303
63
0
66
44
22
...
94
86
147
174
57
151
131
87
52
POLYGON ((-73.995379 40.700309, -73.993672 40....
3
3
1500000US360470001003
360470001003
Block Group 3, Census Tract 1, Kings County, N...
246
16
6
10
23
10
...
13
12
16
31
23
24
8
15
18
POLYGON ((-73.993672 40.699836, -73.9926119999...
4
4
1500000US360470002001
360470002001
Block Group 1, Census Tract 2, Kings County, N...
118
6
10
15
0
12
...
3
10
15
0
15
7
0
0
0
POLYGON ((-74.012844 40.653016, -74.0150479999...
5 rows × 22 columns
In [25]:
data.columns
Out[25]:
Index([u'Unnamed: 0', u'GEO.id', u'GEO.id2', u'GEO.display-label', u'VD01',
u'VD02', u'VD03', u'VD04', u'VD05', u'VD06', u'VD07', u'VD08', u'VD09',
u'VD10', u'VD11', u'VD12', u'VD13', u'VD14', u'VD15', u'VD16', u'VD17',
u'geometry'],
dtype='object')
In [26]:
data.columns = [[u'Unnamed: 0', u'GEO.id', u'GEO.id2', u'GEO.display-label',
u'VD01', u'VD02', u'VD03', u'VD04', u'VD05',
u'VD06', u'VD07', u'VD08', u'VD09', u'VD10',
u'VD11', u'VD12', u'VD13', u'VD14', u'VD15',
u'VD16', u'VD17', u'geometry']]
In [27]:
data.drop([0], inplace=True)
data.head()
Out[27]:
Unnamed: 0
GEO.id
GEO.id2
GEO.display-label
VD01
VD02
VD03
VD04
VD05
VD06
...
VD09
VD10
VD11
VD12
VD13
VD14
VD15
VD16
VD17
geometry
1
1
1500000US360470001001
360470001001
Block Group 1, Census Tract 1, Kings County, N...
859
48
45
16
31
18
...
31
8
70
105
68
115
52
39
119
POLYGON ((-73.99669799999999 40.700877, -73.99...
2
2
1500000US360470001002
360470001002
Block Group 2, Census Tract 1, Kings County, N...
1303
63
0
66
44
22
...
94
86
147
174
57
151
131
87
52
POLYGON ((-73.995379 40.700309, -73.993672 40....
3
3
1500000US360470001003
360470001003
Block Group 3, Census Tract 1, Kings County, N...
246
16
6
10
23
10
...
13
12
16
31
23
24
8
15
18
POLYGON ((-73.993672 40.699836, -73.9926119999...
4
4
1500000US360470002001
360470002001
Block Group 1, Census Tract 2, Kings County, N...
118
6
10
15
0
12
...
3
10
15
0
15
7
0
0
0
POLYGON ((-74.012844 40.653016, -74.0150479999...
5
5
1500000US360470002002
360470002002
Block Group 2, Census Tract 2, Kings County, N...
232
46
29
18
25
19
...
4
5
14
16
12
0
11
0
0
NaN
5 rows × 22 columns
In [28]:
intax_2000 = data[['GEO.id', 'GEO.display-label', 'geometry']]
intax_2000.head()
Out[28]:
GEO.id
GEO.display-label
geometry
1
1500000US360470001001
Block Group 1, Census Tract 1, Kings County, N...
POLYGON ((-73.99669799999999 40.700877, -73.99...
2
1500000US360470001002
Block Group 2, Census Tract 1, Kings County, N...
POLYGON ((-73.995379 40.700309, -73.993672 40....
3
1500000US360470001003
Block Group 3, Census Tract 1, Kings County, N...
POLYGON ((-73.993672 40.699836, -73.9926119999...
4
1500000US360470002001
Block Group 1, Census Tract 2, Kings County, N...
POLYGON ((-74.012844 40.653016, -74.0150479999...
5
1500000US360470002002
Block Group 2, Census Tract 2, Kings County, N...
NaN
In [29]:
intax_2000['B1'] = (data.VD02.astype(float) * 5000 * 0.04) + (data.VD03.astype(float)* 12500 * 0.04)
intax_2000['B2'] = (data.VD04.astype(float) * (((17500 - 16000) * 0.045) + 640))
intax_2000['B3'] = (data.VD05.astype(float)* (((22500 - 22000) * 0.0525) + 910))
intax_2000['B4'] = (data.VD06.astype(float) * (((27500 - 26000) * 0.059) + 1120)) + (data.VD07.astype(float) * (((32500 - 26000) * 0.059) + 1120)) + (data.VD08.astype(float)* (((37500 - 26000) * 0.059) + 1120))
intax_2000['B5'] = (data.VD09.astype(float)* (((42500 - 40000) * 0.0685) + 1946)) + (data.VD10.astype(float) * (((47500 - 40000) * 0.0685) + 1946)) + (data.VD11.astype(float) * (((55000 - 40000) * 0.0685) + 1946)) + (data.VD12.astype(float) * (((67500 - 40000) * 0.0685) + 1946)) + (data.VD13.astype(float) * (((87500 - 40000) * 0.0685) + 1946)) + (data.VD14.astype(float) * (((112500 - 40000) * 0.0685) + 1946)) + (data.VD15.astype(float) * (((137500 - 40000) * 0.0685) + 1946)) + (data.VD16.astype(float) * (((175000 - 40000) * 0.0685) + 1946)) + (data.VD17.astype(float) * (((319000 - 40000) * 0.0685) + 1946))
intax_2000.head()
Out[29]:
GEO.id
GEO.display-label
geometry
B1
B2
B3
B4
B5
1
1500000US360470001001
Block Group 1, Census Tract 1, Kings County, N...
POLYGON ((-73.99669799999999 40.700877, -73.99...
32100.0
11320.0
29023.75
173407.0
5234949.25
2
1500000US360470001002
Block Group 2, Census Tract 1, Kings County, N...
POLYGON ((-73.995379 40.700309, -73.993672 40....
12600.0
46695.0
41195.00
236468.5
6052843.25
3
1500000US360470001003
Block Group 3, Census Tract 1, Kings County, N...
POLYGON ((-73.993672 40.699836, -73.9926119999...
6200.0
7075.0
21533.75
46018.5
1124763.25
4
1500000US360470002001
Block Group 1, Census Tract 2, Kings County, N...
POLYGON ((-74.012844 40.653016, -74.0150479999...
6200.0
10612.5
0.00
56809.5
201933.75
5
1500000US360470002002
Block Group 2, Census Tract 2, Kings County, N...
NaN
23700.0
12735.0
23406.25
75527.0
280942.00
In [30]:
intax_2013['TotalTax2000'] = intax_2000.B1 + intax_2000.B2 + intax_2000.B3 + intax_2000.B4 + intax_2000.B5
intax_2013.head()
Out[30]:
GEO.id
GEO.display-label
geometry
B1
B2
B3
B4
B5
B6
TotalTax2013
TotalTax2000
1
1500000US360470001001
Block Group 1, Census Tract 1, Kings County, N...
POLYGON ((-73.99669799999999 40.700877, -73.99...
17800.0
0.00
0.000
19185.0
498359.750
2173736.88
2709081.630
5480800.00
2
1500000US360470001002
Block Group 2, Census Tract 1, Kings County, N...
POLYGON ((-73.995379 40.700309, -73.993672 40....
14100.0
0.00
0.000
163528.0
133337.750
2405689.72
2716655.470
6389801.75
3
1500000US360470001003
Block Group 3, Census Tract 1, Kings County, N...
POLYGON ((-73.993672 40.699836, -73.9926119999...
32100.0
25035.75
15374.125
85608.0
1837042.875
2690833.32
4685994.070
1205590.50
4
1500000US360470001004
Block Group 4, Census Tract 1, Kings County, N...
POLYGON ((-73.99271 40.698097, -73.991759 40.7...
24400.0
96345.25
0.000
144630.0
1790077.750
2827029.40
4882482.400
275555.75
5
1500000US360470002001
Block Group 1, Census Tract 2, Kings County, N...
POLYGON ((-74.012844 40.653016, -74.0150479999...
12100.0
53180.25
34296.125
124567.0
561462.000
53199.40
838804.775
416310.25
In [31]:
intax_2013.columns
Out[31]:
Index([ u'GEO.id', u'GEO.display-label', u'geometry',
u'B1', u'B2', u'B3',
u'B4', u'B5', u'B6',
u'TotalTax2013', u'TotalTax2000'],
dtype='object')
In [32]:
taxes = intax_2013[[u'GEO.id', u'GEO.display-label', u'geometry', u'TotalTax2013', u'TotalTax2000']]
In [33]:
taxes.TotalTax2000 = taxes.TotalTax2000 * 1.13
taxes.TotalTax2013 = taxes.TotalTax2013 * 1.04
taxes.head()
Out[33]:
GEO.id
GEO.display-label
geometry
TotalTax2013
TotalTax2000
1
1500000US360470001001
Block Group 1, Census Tract 1, Kings County, N...
POLYGON ((-73.99669799999999 40.700877, -73.99...
2.817445e+06
6.193304e+06
2
1500000US360470001002
Block Group 2, Census Tract 1, Kings County, N...
POLYGON ((-73.995379 40.700309, -73.993672 40....
2.825322e+06
7.220476e+06
3
1500000US360470001003
Block Group 3, Census Tract 1, Kings County, N...
POLYGON ((-73.993672 40.699836, -73.9926119999...
4.873434e+06
1.362317e+06
4
1500000US360470001004
Block Group 4, Census Tract 1, Kings County, N...
POLYGON ((-73.99271 40.698097, -73.991759 40.7...
5.077782e+06
3.113780e+05
5
1500000US360470002001
Block Group 1, Census Tract 2, Kings County, N...
POLYGON ((-74.012844 40.653016, -74.0150479999...
8.723570e+05
4.704306e+05
In [34]:
#Saving the dataset as csv
taxes.to_csv('taxes.csv')
In [35]:
dataw = pd.read_csv ('taxes.csv')
dataw.head()
Out[35]:
Unnamed: 0
GEO.id
GEO.display-label
geometry
TotalTax2013
TotalTax2000
0
1
1500000US360470001001
Block Group 1, Census Tract 1, Kings County, N...
POLYGON ((-73.99669799999999 40.700877, -73.99...
2.817445e+06
6.193304e+06
1
2
1500000US360470001002
Block Group 2, Census Tract 1, Kings County, N...
POLYGON ((-73.995379 40.700309, -73.993672 40....
2.825322e+06
7.220476e+06
2
3
1500000US360470001003
Block Group 3, Census Tract 1, Kings County, N...
POLYGON ((-73.993672 40.699836, -73.9926119999...
4.873434e+06
1.362317e+06
3
4
1500000US360470001004
Block Group 4, Census Tract 1, Kings County, N...
POLYGON ((-73.99271 40.698097, -73.991759 40.7...
5.077782e+06
3.113780e+05
4
5
1500000US360470002001
Block Group 1, Census Tract 2, Kings County, N...
POLYGON ((-74.012844 40.653016, -74.0150479999...
8.723570e+05
4.704306e+05
In [36]:
sum(taxes.TotalTax2000)
Out[36]:
2569863909.6524997
In [37]:
sum(taxes.TotalTax2013)
Out[37]:
3639857681.1219997
In [38]:
#https://www.tax-brackets.org/newyorktaxtable/2002
In [39]:
rent_2000 = pd.read_csv('rent_2000.csv')
rent_2013 = pd.read_csv('rent_2013.csv')
In [40]:
data1 = pd.read_csv('metrotech.csv')
data1.head()
Out[40]:
Unnamed: 0
GEO.id
0
0
1500000US360470033002
1
1
1500000US360470037001
2
2
1500000US360470009002
3
3
1500000US360470043002
4
4
1500000US360470043001
In [41]:
met = pd.merge(taxes, data1,how='right')
met.head()
Out[41]:
GEO.id
GEO.display-label
geometry
TotalTax2013
TotalTax2000
Unnamed: 0
0
1500000US360470007003
Block Group 3, Census Tract 7, Kings County, N...
POLYGON ((-73.995914 40.691416, -73.99508 40.6...
8.541489e+06
2.914260e+06
23
1
1500000US360470009001
Block Group 1, Census Tract 9, Kings County, N...
POLYGON ((-73.99310299999999 40.692846, -73.99...
1.044992e+07
4.826333e+06
11
2
1500000US360470009001
Block Group 1, Census Tract 9, Kings County, N...
POLYGON ((-73.99310299999999 40.692846, -73.99...
1.044992e+07
4.826333e+06
12
3
1500000US360470009002
Block Group 2, Census Tract 9, Kings County, N...
POLYGON ((-73.993736 40.69153499999999, -73.98...
9.948306e+06
4.825448e+06
2
4
1500000US360470011001
Block Group 1, Census Tract 11, Kings County, ...
POLYGON ((-73.990447 40.693735, -73.990663 40....
2.523074e+06
2.588893e+06
10
In [42]:
sum(met.TotalTax2000)
Out[42]:
55622271.087499999
In [45]:
sum(met.TotalTax2013)
Out[45]:
108898373.5944
In [46]:
rent_2000.head()
Out[46]:
Unnamed: 0
GEO.id
GEO.id2
GEO.display-label
VD01
VD02
VD03
VD04
VD05
VD06
VD07
VD08
VD09
VD10
VD11
geometry
0
0
Id
Id2
Geography
Total:
Less than 10 percent
10 to 14 percent
15 to 19 percent
20 to 24 percent
25 to 29 percent
30 to 34 percent
35 to 39 percent
40 to 49 percent
50 percent or more
Not computed
NaN
1
1
1500000US360470001001
360470001001
Block Group 1, Census Tract 1, Kings County, N...
497
53
106
60
49
27
28
22
67
64
21
POLYGON ((-73.99669799999999 40.700877, -73.99...
2
2
1500000US360470001002
360470001002
Block Group 2, Census Tract 1, Kings County, N...
550
85
108
78
68
67
62
12
0
59
11
POLYGON ((-73.995379 40.700309, -73.993672 40....
3
3
1500000US360470001003
360470001003
Block Group 3, Census Tract 1, Kings County, N...
118
14
19
15
9
11
2
4
5
30
9
POLYGON ((-73.993672 40.699836, -73.9926119999...
4
4
1500000US360470002001
360470002001
Block Group 1, Census Tract 2, Kings County, N...
129
6
11
15
29
6
5
12
16
19
10
POLYGON ((-74.012844 40.653016, -74.0150479999...
In [103]:
bky2000 = rent_2000[[u'GEO.id2', u'VD01', u'VD02', u'VD03',
u'VD04', u'VD05', u'VD06', u'VD07', u'VD08', u'VD09', u'VD10', u'VD11']]
bky2000.describe()
Out[103]:
GEO.id2
VD01
VD02
VD03
VD04
VD05
VD06
VD07
VD08
VD09
VD10
VD11
count
2062
2062
2062
2062
2062
2062
2062
2062
2062
2062
2062
2062
unique
2062
661
141
157
158
143
142
113
86
103
253
110
top
360470566002
0
0
0
0
0
0
0
0
0
0
0
freq
1
36
399
228
207
258
344
436
594
406
95
439
In [104]:
bky2000.drop([0])
Out[104]:
GEO.id2
VD01
VD02
VD03
VD04
VD05
VD06
VD07
VD08
VD09
VD10
VD11
1
360470001001
497
53
106
60
49
27
28
22
67
64
21
2
360470001002
550
85
108
78
68
67
62
12
0
59
11
3
360470001003
118
14
19
15
9
11
2
4
5
30
9
4
360470002001
129
6
11
15
29
6
5
12
16
19
10
5
360470002002
199
23
28
6
25
30
0
15
12
60
0
6
360470003011
316
54
58
41
40
21
27
30
0
30
15
7
360470003012
362
66
60
19
59
46
26
0
23
36
27
8
360470003013
199
26
12
13
0
26
25
9
14
52
22
9
360470003014
159
24
9
12
19
37
0
22
0
36
0
10
360470003015
305
59
13
58
11
35
11
35
17
66
0
11
360470003021
22
11
0
0
11
0
0
0
0
0
0
12
360470005001
547
23
129
162
24
47
34
11
47
58
12
13
360470005002
587
108
91
101
127
11
58
12
12
46
21
14
360470005003
279
14
95
43
43
14
14
14
14
28
0
15
360470005004
450
51
60
60
96
72
31
10
30
20
20
16
360470005005
439
34
61
45
105
26
38
0
36
69
25
17
360470007001
540
49
93
28
101
46
47
26
49
82
19
18
360470007002
453
41
9
58
98
41
48
36
30
83
9
19
360470007003
276
46
36
39
64
19
19
0
10
33
10
20
360470009001
450
56
72
65
58
38
23
9
9
91
29
21
360470009002
289
59
72
32
16
8
27
0
28
21
26
22
360470011001
83
41
0
0
0
42
0
0
0
0
0
23
360470013001
254
10
30
86
8
0
0
46
32
16
26
24
360470013002
14
0
0
0
0
7
0
0
0
7
0
25
360470018001
2
2
0
0
0
0
0
0
0
0
0
26
360470020001
101
5
4
5
16
5
0
26
6
21
13
27
360470020002
124
0
13
5
7
0
12
14
6
54
13
28
360470020003
142
16
0
5
45
18
9
0
9
30
10
29
360470021001
105
0
16
26
0
19
0
0
7
18
19
30
360470021002
120
13
0
0
30
12
13
13
0
39
0
...
...
...
...
...
...
...
...
...
...
...
...
...
2032
360471184001
261
10
11
26
29
20
0
20
22
102
21
2033
360471184002
301
0
30
67
38
41
39
11
11
64
0
2034
360471184003
318
22
0
17
51
52
27
0
29
96
24
2035
360471186001
201
11
10
9
22
22
49
18
0
60
0
2036
360471186002
286
20
0
48
49
0
26
0
36
107
0
2037
360471188001
225
6
20
0
0
20
23
33
6
111
6
2038
360471188002
247
6
27
27
19
15
12
0
25
98
18
2039
360471188003
410
6
15
44
0
29
9
26
45
177
59
2040
360471190001
417
30
50
25
13
31
23
14
9
205
17
2041
360471192001
144
7
6
9
18
0
29
0
6
54
15
2042
360471192002
167
21
21
0
27
15
0
14
15
46
8
2043
360471192003
158
10
29
25
9
0
0
8
14
29
34
2044
360471194001
183
0
7
21
23
20
8
0
9
78
17
2045
360471194002
241
11
45
64
16
8
9
0
39
49
0
2046
360471194003
280
26
28
25
17
0
22
23
17
86
36
2047
360471196001
269
28
21
9
26
28
6
8
20
114
9
2048
360471196002
207
16
41
14
21
7
13
9
13
52
21
2049
360471196003
383
6
40
65
41
45
8
18
16
122
22
2050
360471196004
314
0
34
36
41
27
45
12
7
99
13
2051
360471200001
224
0
34
12
19
21
4
0
35
72
27
2052
360471200002
111
0
25
15
11
0
12
12
12
24
0
2053
360471202001
135
22
0
30
9
12
14
17
7
24
0
2054
360471202002
138
8
7
23
12
0
9
0
5
50
24
2055
360471208001
97
0
25
13
0
35
0
11
0
13
0
2056
360471208002
1840
125
294
267
210
194
136
116
135
305
58
2057
360471208003
354
0
30
0
40
40
32
33
14
165
0
2058
360471210001
1365
192
181
133
129
174
87
64
55
213
137
2059
360471214001
1479
208
83
138
167
178
81
57
76
261
230
2060
360471220001
627
24
64
69
83
43
33
30
56
161
64
2061
360471220002
95
10
0
25
25
0
14
8
0
13
0
2061 rows × 12 columns
In [109]:
bky2000.VD01.sum()
Out[109]:
'Total:4975501181291993163621991593052254758727945043954045327645028983254142101124142105120871025835444914926218058107105521818218322801083610289486210345203489108304795283109723471541283052797327515726927529102532822884352502232281561521791033243222611912321821943001013889544189588467622461644161123140284435891981816514735825018035054788310377291272506223253284231231331342360390590204215233321191315568187423225308244275426317152205110348629231719938230626128432330035228538733334141738828340827828333634934635135529128234425027680763330272358513421417400307280408250330411363228292321251399412360342277420221303423429446296437365245439396180249537317329456317240421422301249213306276257208257265206438378262107019736122118620549337511053413822933217618643716728154922042033629129442022424731137024824323729636833129327317726924340525331633743033729330941324917121928726518325929840942941427435457258400309272802723315014527031723226126518710622122733221023525223820902252773042309432334536229026927339028433522550646730831522730332128838511428657635846824935329435234028743211013524752617541616815820325137512327219024524954156154025736002862656721751262893911471544133525812434433263462831631751700179702081903202832313052338734519721732326124835662221673243222639835741029736425730715629543639115620540847837324964299210342420175701671841256420240927729686805451327117204165380297321469267256432868515209165307388138922112231313842901632383902695293071982712365453482823912523892763835441303622772591369324225428722544718037219929163117149269202352196337384284270201231300134305145175169906240537437165117246265503433290148173331522273297438230745727272174164361373435406180194225263448329132175130175119290376225415293459449211145111233579282202510256445225329147207226314165321733087205109303723944434571016265207266291202333282166357935562312933742892683082171892433412333113353242893011712972932572613893163050352158264261335294249122613170469215352241324269333374401272166353521376256321254246273794743364119617443327049126837138338136297103232017513409115845870237345309126275148385355349528311261156401301209397207233473239177335260483492332302262361254353297278133189325540302152142712113472842021033064121514925089535362131019142527513418431431335329574441881120355761144448751738037540831038436046432111388287541564542286360464191021767768936456912543127029213258115464464599395984622485586186390179295245891252621822712831623483064643003304283141615710226236328924369937547101312152110215829133325956733821340534407530279113338344563404635675185326511276642548529104718882767814173096023175998419511127331245403670452250296174196621372743232791506968216753248341469144237387277259396177941640289250195164197126496261240295421614456739351565196215231962002472142902252071391082069511362141673171326306222114394145394151429218247270197303392283165165911953251951769964182273442245017827022520915324139723114324918321452323122931629232729712525213425336221729922322728723320313124127017627501232452453282723041082302442961982883591233022941991113121942603451491833171701942083543043504552411749123521820329013837922522822771193139252448236270143182301380267405445285731802723383214248463443763134103303413414263016311951641301513353013842083082813423284773774362263923202173653632723132222531112225144355582652804363394092731853582753563144192002502972551851042651453762164813823198000143583281421478257423317701511861367104413718826492126132225243245130177291341397339272286509370210276490366210286648282006483933014252910581018852742764311361862854173693071346746822782703218994052775164824044235882862932862793923233132473102944864851742543501844712703063773101141532593343013013014114556470520307597434268446560125044916867461703877573952307555187519761586951512482290317269363389161464720636773258954360947716143235337031334722457595506253379100535456344170284285190733376727326388280195272244366369280153162234367265331313255188452182772022842223113882783263132131893442041592861702264664473050113568440125947140530303962791073788585873881174124191313236254453491993502315244244541041193874514622173073762896165413232221623173951573901272683395417650731316156223402982842521474414526342419213348328452154313251865732797943340339937311171050266293285172117474471421311307200536251383012933154637820317241932373119212646549019629829534926637940142740737536131712419923033755040015132015091403721683972761822294672483845532029241838883660555841647735643762616313217142538229914494892623415324561674446172823831721656189219251435306703589711213478163031010918612934826823020081001311431091597943888114487133698721826719591422124922014010228630014718555170603791912195162251034421972252362182134233392153581996199118306202602785851171581011924410364406301461563390359425650479325317344301741927682316026781211721976022135847231821033856127232328224445922021186986387890074824320713791943204274515014203892133833163113260284317238428948251375178724874043402102544212555592973412654722421391251140158549111219311110211414614312101851331469489094131250537353143293194258345221251379110231202239323252188462233348222422203104345970223112295050112722230783249718745050248837723743957043746429039843548518559628865143995565284821122313013318254843934178301663042972673051843727113950540816848534873142251497741849516961315941110175591821833514423307110523412531513225650375344196339155162192803522707313511925226912194724084171419827033111901261011287381295161592321576355246219185447234218283557640440147179176152179172171771541321534421354262854431949914546229013511411439975511356811241467146851163252453531895273982922132122884073291922323553171736217237513015148671433532010517250161721023882283732182722431061651427922225321231117839644020632111610872343104152581211371291512201431752132273147230027637335430640219033523034136602662082663332613013182012862252474104171441671581832412802692073833142241111351389718403541365147962795'
In [96]:
bky2000.loc['Total']= bky2000.astype(float).sum()
bky2000.tail()
ValueErrorTraceback (most recent call last)
<ipython-input-96-2bc6fdf9cb97> in <module>()
----> 1 bky2000.loc['Total']= bky2000.astype(float).sum()
2 bky2000.tail()
/opt/rh/anaconda/root/envs/PUI2016_Python2/lib/python2.7/site-packages/pandas/core/generic.pyc in astype(self, dtype, copy, raise_on_error, **kwargs)
2948
2949 mgr = self._data.astype(dtype=dtype, copy=copy,
-> 2950 raise_on_error=raise_on_error, **kwargs)
2951 return self._constructor(mgr).__finalize__(self)
2952
/opt/rh/anaconda/root/envs/PUI2016_Python2/lib/python2.7/site-packages/pandas/core/internals.pyc in astype(self, dtype, **kwargs)
2936
2937 def astype(self, dtype, **kwargs):
-> 2938 return self.apply('astype', dtype=dtype, **kwargs)
2939
2940 def convert(self, **kwargs):
/opt/rh/anaconda/root/envs/PUI2016_Python2/lib/python2.7/site-packages/pandas/core/internals.pyc in apply(self, f, axes, filter, do_integrity_check, consolidate, raw, **kwargs)
2888
2889 kwargs['mgr'] = self
-> 2890 applied = getattr(b, f)(**kwargs)
2891 result_blocks = _extend_blocks(applied, result_blocks)
2892
/opt/rh/anaconda/root/envs/PUI2016_Python2/lib/python2.7/site-packages/pandas/core/internals.pyc in astype(self, dtype, copy, raise_on_error, values, **kwargs)
432 **kwargs):
433 return self._astype(dtype, copy=copy, raise_on_error=raise_on_error,
--> 434 values=values, **kwargs)
435
436 def _astype(self, dtype, copy=False, raise_on_error=True, values=None,
/opt/rh/anaconda/root/envs/PUI2016_Python2/lib/python2.7/site-packages/pandas/core/internals.pyc in _astype(self, dtype, copy, raise_on_error, values, klass, mgr, **kwargs)
475
476 # _astype_nansafe works fine with 1-d only
--> 477 values = com._astype_nansafe(values.ravel(), dtype, copy=True)
478 values = values.reshape(self.shape)
479
/opt/rh/anaconda/root/envs/PUI2016_Python2/lib/python2.7/site-packages/pandas/core/common.pyc in _astype_nansafe(arr, dtype, copy)
1918
1919 if copy:
-> 1920 return arr.astype(dtype)
1921 return arr.view(dtype)
1922
ValueError: could not convert string to float: Not computed21119100152722000122102025199102926026001313101900072124112520903093518506350203303314400223380032008100001635001896507600000179020008028161802500331613017002821151890810148181070131951600
In [ ]:
In [47]:
metr00 = pd.merge(rent_2000, data1,on='GEO.id', how='right')
metr00.head()
Out[47]:
Unnamed: 0_x
GEO.id
GEO.id2
GEO.display-label
VD01
VD02
VD03
VD04
VD05
VD06
VD07
VD08
VD09
VD10
VD11
geometry
Unnamed: 0_y
0
19
1500000US360470007003
360470007003
Block Group 3, Census Tract 7, Kings County, N...
276
46
36
39
64
19
19
0
10
33
10
POLYGON ((-73.995914 40.691416, -73.99508 40.6...
23
1
20
1500000US360470009001
360470009001
Block Group 1, Census Tract 9, Kings County, N...
450
56
72
65
58
38
23
9
9
91
29
POLYGON ((-73.99310299999999 40.692846, -73.99...
11
2
20
1500000US360470009001
360470009001
Block Group 1, Census Tract 9, Kings County, N...
450
56
72
65
58
38
23
9
9
91
29
POLYGON ((-73.99310299999999 40.692846, -73.99...
12
3
21
1500000US360470009002
360470009002
Block Group 2, Census Tract 9, Kings County, N...
289
59
72
32
16
8
27
0
28
21
26
POLYGON ((-73.993736 40.69153499999999, -73.98...
2
4
22
1500000US360470011001
360470011001
Block Group 1, Census Tract 11, Kings County, ...
83
41
0
0
0
42
0
0
0
0
0
POLYGON ((-73.990447 40.693735, -73.990663 40....
10
In [48]:
sum(metr00.VD01.astype(float))
Out[48]:
5488.0
In [49]:
sum(metr00.VD10.astype(float))
Out[49]:
920.0
In [50]:
sum(metr00.VD10.astype(float))/sum(metr00.VD01.astype(float))
Out[50]:
0.16763848396501457
In [86]:
metr00.columns
Out[86]:
Index([u'Unnamed: 0_x', u'GEO.id', u'GEO.id2', u'GEO.display-label', u'VD01',
u'VD02', u'VD03', u'VD04', u'VD05', u'VD06', u'VD07', u'VD08', u'VD09',
u'VD10', u'VD11', u'geometry', u'Unnamed: 0_y'],
dtype='object')
In [87]:
met00 = metr00[[u'GEO.id2',u'VD01',
u'VD02', u'VD03', u'VD04', u'VD05', u'VD06', u'VD07', u'VD08', u'VD09',
u'VD10', u'VD11']]
met00.head()
Out[87]:
GEO.id2
VD01
VD02
VD03
VD04
VD05
VD06
VD07
VD08
VD09
VD10
VD11
0
360470007003
276
46
36
39
64
19
19
0
10
33
10
1
360470009001
450
56
72
65
58
38
23
9
9
91
29
2
360470009001
450
56
72
65
58
38
23
9
9
91
29
3
360470009002
289
59
72
32
16
8
27
0
28
21
26
4
360470011001
83
41
0
0
0
42
0
0
0
0
0
In [88]:
met00.loc['Total']= met00.astype(float).sum()
met00
Out[88]:
GEO.id2
VD01
VD02
VD03
VD04
VD05
VD06
VD07
VD08
VD09
VD10
VD11
0
360470007003
276
46
36
39
64
19
19
0
10
33
10
1
360470009001
450
56
72
65
58
38
23
9
9
91
29
2
360470009001
450
56
72
65
58
38
23
9
9
91
29
3
360470009002
289
59
72
32
16
8
27
0
28
21
26
4
360470011001
83
41
0
0
0
42
0
0
0
0
0
5
360470013001
254
10
30
86
8
0
0
46
32
16
26
6
360470031001
0
0
0
0
0
0
0
0
0
0
0
7
360470031001
0
0
0
0
0
0
0
0
0
0
0
8
360470031002
108
14
0
19
12
28
0
7
0
22
6
9
360470033002
486
38
39
48
71
48
33
23
62
91
33
10
360470035001
489
49
43
87
62
38
56
21
35
58
40
11
360470037001
109
10
0
31
26
0
21
0
0
21
0
12
360470039001
154
17
16
13
11
22
0
29
16
30
0
13
360470039001
154
17
16
13
11
22
0
29
16
30
0
14
360470039002
128
0
0
66
10
25
0
0
14
13
0
15
360470039003
305
22
37
17
38
59
34
19
13
58
8
16
360470041001
275
0
27
40
27
79
9
27
5
61
0
17
360470041002
157
36
16
51
22
0
12
10
0
10
0
18
360470041003
269
14
43
42
27
28
14
11
37
37
16
19
360470043001
0
0
0
0
0
0
0
0
0
0
0
20
360470043002
253
0
10
32
62
33
34
0
17
47
18
21
360470043003
282
9
23
46
82
30
9
18
8
48
9
22
360470043004
288
25
18
32
60
53
9
9
5
71
6
23
360470129011
229
23
35
8
9
19
31
9
24
71
0
Total
8.65128e+12
5488
542
605
832
734
629
354
276
340
920
256
In [51]:
metr13 = pd.merge(rent_2013, data1,on='GEO.id', how='right')
metr13.head()
Out[51]:
Unnamed: 0_x
GEO.id
GEO.id2
GEO.display-label
HD01_VD01
HD01_VD02
HD01_VD03
HD01_VD04
HD01_VD05
HD01_VD06
HD01_VD07
HD01_VD08
HD01_VD09
HD01_VD10
HD01_VD11
geometry
Unnamed: 0_y
0
18
1500000US360470007003
360470007003
Block Group 3, Census Tract 7, Kings County, N...
407
31
48
36
84
74
11
9
35
79
0
POLYGON ((-73.995914 40.691416, -73.99508 40.6...
23
1
19
1500000US360470009001
360470009001
Block Group 1, Census Tract 9, Kings County, N...
640
46
62
117
83
41
20
19
66
150
36
POLYGON ((-73.99310299999999 40.692846, -73.99...
11
2
19
1500000US360470009001
360470009001
Block Group 1, Census Tract 9, Kings County, N...
640
46
62
117
83
41
20
19
66
150
36
POLYGON ((-73.99310299999999 40.692846, -73.99...
12
3
20
1500000US360470009002
360470009002
Block Group 2, Census Tract 9, Kings County, N...
782
65
119
139
94
132
24
35
0
80
94
POLYGON ((-73.993736 40.69153499999999, -73.98...
2
4
21
1500000US360470011001
360470011001
Block Group 1, Census Tract 11, Kings County, ...
313
22
6
44
88
31
32
21
31
38
0
POLYGON ((-73.990447 40.693735, -73.990663 40....
10
In [52]:
sum(metr13.HD01_VD01.astype(float))
Out[52]:
8469.0
In [53]:
sum(metr13.HD01_VD10.astype(float))
Out[53]:
1674.0
In [54]:
sum(metr13.HD01_VD10.astype(float))/sum(metr13.HD01_VD01.astype(float))
Out[54]:
0.19766206163655686
In [82]:
metr13.columns
Out[82]:
Index([u'Unnamed: 0_x', u'GEO.id', u'GEO.id2', u'GEO.display-label',
u'HD01_VD01', u'HD01_VD02', u'HD01_VD03', u'HD01_VD04', u'HD01_VD05',
u'HD01_VD06', u'HD01_VD07', u'HD01_VD08', u'HD01_VD09', u'HD01_VD10',
u'HD01_VD11', u'geometry', u'Unnamed: 0_y'],
dtype='object')
In [84]:
met13 = metr13[[u'GEO.id2',
u'HD01_VD01', u'HD01_VD02', u'HD01_VD03', u'HD01_VD04', u'HD01_VD05',
u'HD01_VD06', u'HD01_VD07', u'HD01_VD08', u'HD01_VD09', u'HD01_VD10',
u'HD01_VD11']]
met13.head()
Out[84]:
GEO.id2
HD01_VD01
HD01_VD02
HD01_VD03
HD01_VD04
HD01_VD05
HD01_VD06
HD01_VD07
HD01_VD08
HD01_VD09
HD01_VD10
HD01_VD11
0
360470007003
407
31
48
36
84
74
11
9
35
79
0
1
360470009001
640
46
62
117
83
41
20
19
66
150
36
2
360470009001
640
46
62
117
83
41
20
19
66
150
36
3
360470009002
782
65
119
139
94
132
24
35
0
80
94
4
360470011001
313
22
6
44
88
31
32
21
31
38
0
In [85]:
met13.loc['Total']= met13.astype(float).sum()
met13
Out[85]:
GEO.id2
HD01_VD01
HD01_VD02
HD01_VD03
HD01_VD04
HD01_VD05
HD01_VD06
HD01_VD07
HD01_VD08
HD01_VD09
HD01_VD10
HD01_VD11
0
360470007003
407
31
48
36
84
74
11
9
35
79
0
1
360470009001
640
46
62
117
83
41
20
19
66
150
36
2
360470009001
640
46
62
117
83
41
20
19
66
150
36
3
360470009002
782
65
119
139
94
132
24
35
0
80
94
4
360470011001
313
22
6
44
88
31
32
21
31
38
0
5
360470013001
134
0
24
8
52
27
5
0
13
0
5
6
360470031001
364
11
33
28
95
63
5
16
14
88
11
7
360470031001
364
11
33
28
95
63
5
16
14
88
11
8
360470031002
0
0
0
0
0
0
0
0
0
0
0
9
360470033002
897
33
76
75
153
138
60
55
49
258
0
10
360470035001
142
4
7
15
24
21
20
7
17
27
0
11
360470037001
314
0
63
37
60
35
12
17
54
32
4
12
360470039001
390
29
39
62
52
41
20
17
52
47
31
13
360470039001
390
29
39
62
52
41
20
17
52
47
31
14
360470039002
284
0
0
19
6
67
8
31
0
153
0
15
360470039003
144
40
10
30
0
21
0
0
7
36
0
16
360470041001
562
21
80
81
74
56
40
44
89
41
36
17
360470041002
281
9
19
16
7
81
9
10
30
100
0
18
360470041003
201
0
35
77
9
11
7
0
10
52
0
19
360470043001
0
0
0
0
0
0
0
0
0
0
0
20
360470043002
259
11
23
48
14
47
24
42
32
10
8
21
360470043003
344
0
26
27
67
68
0
0
75
56
25
22
360470043004
387
7
33
25
123
77
12
19
36
55
0
23
360470129011
230
0
0
25
49
0
17
38
0
87
14
Total
8.65128e+12
8469
415
837
1116
1364
1176
371
432
742
1674
342
In [55]:
data1 = pd.read_csv('willy.csv')
data1.head()
Out[55]:
Unnamed: 0
GEO.id
0
0
1500000US360470551003
1
3
1500000US360470551002
2
6
1500000US360470519003
3
14
1500000US360470551001
4
22
1500000US360470523004
In [56]:
met = pd.merge(taxes, data1,how='right')
met.head()
Out[56]:
GEO.id
GEO.display-label
geometry
TotalTax2013
TotalTax2000
Unnamed: 0
0
1500000US360470517001
Block Group 1, Census Tract 517, Kings County,...
POLYGON ((-73.95713099999999 40.718388, -73.95...
1.531597e+06
4.270027e+06
107
1
1500000US360470517002
Block Group 2, Census Tract 517, Kings County,...
POLYGON ((-73.958416 40.719189, -73.955443 40....
4.429708e+06
1.703405e+06
111
2
1500000US360470519001
Block Group 1, Census Tract 519, Kings County,...
POLYGON ((-73.955844 40.717589, -73.9552509999...
5.146695e+06
1.516244e+06
71
3
1500000US360470519002
Block Group 2, Census Tract 519, Kings County,...
POLYGON ((-73.956439 40.717032, -73.952754 40....
4.265974e+06
9.252660e+05
56
4
1500000US360470519003
Block Group 3, Census Tract 519, Kings County,...
POLYGON ((-73.958811 40.714824, -73.9582229999...
2.765746e+06
1.214220e+06
6
In [57]:
sum(met.TotalTax2000)
Out[57]:
26881499.657499991
In [58]:
sum(met.TotalTax2013)
Out[58]:
71606118.74000001
In [ ]:
In [59]:
willy00 = pd.merge(rent_2000, data1,on='GEO.id', how='right')
willy00.head()
Out[59]:
Unnamed: 0_x
GEO.id
GEO.id2
GEO.display-label
VD01
VD02
VD03
VD04
VD05
VD06
VD07
VD08
VD09
VD10
VD11
geometry
Unnamed: 0_y
0
1307
1500000US360470517001
360470517001
Block Group 1, Census Tract 517, Kings County,...
161
7
29
22
44
18
18
9
9
0
5
POLYGON ((-73.95713099999999 40.718388, -73.95...
107
1
1308
1500000US360470517002
360470517002
Block Group 2, Census Tract 517, Kings County,...
432
41
38
76
106
57
32
16
14
44
8
POLYGON ((-73.958416 40.719189, -73.955443 40....
111
2
1312
1500000US360470519001
360470519001
Block Group 1, Census Tract 519, Kings County,...
347
23
27
34
55
33
26
22
8
101
18
POLYGON ((-73.955844 40.717589, -73.9552509999...
71
3
1313
1500000US360470519002
360470519002
Block Group 2, Census Tract 519, Kings County,...
224
7
23
15
47
7
32
8
14
64
7
POLYGON ((-73.956439 40.717032, -73.952754 40....
56
4
1314
1500000US360470519003
360470519003
Block Group 3, Census Tract 519, Kings County,...
575
17
105
68
60
32
31
27
60
143
32
POLYGON ((-73.958811 40.714824, -73.9582229999...
6
In [60]:
sum(willy00.VD01.astype(float))
Out[60]:
7272.0
In [61]:
sum(willy00.VD10.astype(float))
Out[61]:
1413.0
In [62]:
sum(willy00.VD10.astype(float)) / sum(willy00.VD01.astype(float))
Out[62]:
0.19430693069306931
In [79]:
willy00.columns
Out[79]:
Index([u'Unnamed: 0_x', u'GEO.id', u'GEO.id2', u'GEO.display-label', u'VD01',
u'VD02', u'VD03', u'VD04', u'VD05', u'VD06', u'VD07', u'VD08', u'VD09',
u'VD10', u'VD11', u'geometry', u'Unnamed: 0_y'],
dtype='object')
In [80]:
will00 = willy00[[u'GEO.id2',u'VD01',
u'VD02', u'VD03', u'VD04', u'VD05', u'VD06', u'VD07', u'VD08', u'VD09',
u'VD10', u'VD11']]
will00.head()
Out[80]:
GEO.id2
VD01
VD02
VD03
VD04
VD05
VD06
VD07
VD08
VD09
VD10
VD11
0
360470517001
161
7
29
22
44
18
18
9
9
0
5
1
360470517002
432
41
38
76
106
57
32
16
14
44
8
2
360470519001
347
23
27
34
55
33
26
22
8
101
18
3
360470519002
224
7
23
15
47
7
32
8
14
64
7
4
360470519003
575
17
105
68
60
32
31
27
60
143
32
In [81]:
will00.loc['Total']= will00.astype(float).sum()
will00
Out[81]:
GEO.id2
VD01
VD02
VD03
VD04
VD05
VD06
VD07
VD08
VD09
VD10
VD11
0
360470517001
161
7
29
22
44
18
18
9
9
0
5
1
360470517002
432
41
38
76
106
57
32
16
14
44
8
2
360470519001
347
23
27
34
55
33
26
22
8
101
18
3
360470519002
224
7
23
15
47
7
32
8
14
64
7
4
360470519003
575
17
105
68
60
32
31
27
60
143
32
5
360470523001
535
89
73
15
80
54
46
42
40
57
39
6
360470523002
456
32
45
45
53
7
37
26
25
171
15
7
360470523003
344
64
50
8
67
37
23
7
26
52
10
8
360470523004
170
16
47
0
31
7
8
6
19
28
8
9
360470523005
284
23
19
56
29
56
8
0
10
50
33
10
360470525001
733
53
84
73
92
97
86
59
52
107
30
11
360470549002
199
59
23
17
5
18
0
18
21
34
4
12
360470551001
424
12
67
57
40
50
40
0
43
102
13
13
360470551002
454
40
46
52
93
49
41
6
9
88
30
14
360470551003
104
10
10
27
20
0
0
0
0
28
9
15
360470551004
119
6
10
29
14
0
0
27
7
19
7
16
360470553001
307
35
33
35
37
31
52
8
26
50
0
17
360470553002
376
73
48
46
37
65
14
30
13
43
7
18
360470553003
289
28
47
0
41
39
29
13
0
72
20
19
360470555001
222
0
15
38
30
29
6
9
19
62
14
20
360470557001
390
47
50
49
34
41
29
20
27
66
27
21
360470557002
127
17
15
0
36
13
0
0
14
32
0
Total
7.93035e+12
7272
699
904
762
1051
740
558
353
456
1413
336
In [63]:
willy13 = pd.merge(rent_2013, data1,on='GEO.id', how='right')
Out[63]:
Unnamed: 0_x
GEO.id
GEO.id2
GEO.display-label
HD01_VD01
HD01_VD02
HD01_VD03
HD01_VD04
HD01_VD05
HD01_VD06
HD01_VD07
HD01_VD08
HD01_VD09
HD01_VD10
HD01_VD11
geometry
Unnamed: 0_y
0
1298
1500000US360470517001
360470517001
Block Group 1, Census Tract 517, Kings County,...
261
0
15
32
22
11
61
29
24
31
36
POLYGON ((-73.95713099999999 40.718388, -73.95...
107
1
1299
1500000US360470517002
360470517002
Block Group 2, Census Tract 517, Kings County,...
482
48
21
91
99
0
56
64
7
96
0
POLYGON ((-73.958416 40.719189, -73.955443 40....
111
2
1303
1500000US360470519001
360470519001
Block Group 1, Census Tract 519, Kings County,...
666
40
85
145
45
36
111
2
23
179
0
POLYGON ((-73.955844 40.717589, -73.9552509999...
71
3
1304
1500000US360470519002
360470519002
Block Group 2, Census Tract 519, Kings County,...
488
0
88
115
83
43
10
39
45
55
10
POLYGON ((-73.956439 40.717032, -73.952754 40....
56
4
1305
1500000US360470519003
360470519003
Block Group 3, Census Tract 519, Kings County,...
469
40
10
21
76
84
37
0
11
190
0
POLYGON ((-73.958811 40.714824, -73.9582229999...
6
5
1309
1500000US360470523001
360470523001
Block Group 1, Census Tract 523, Kings County,...
651
15
78
59
116
13
36
15
108
157
54
POLYGON ((-73.95753499999999 40.711609, -73.95...
45
6
1310
1500000US360470523002
360470523002
Block Group 2, Census Tract 523, Kings County,...
719
0
98
140
15
65
57
13
121
210
0
POLYGON ((-73.960195 40.710355, -73.959064 40....
49
7
1311
1500000US360470523003
360470523003
Block Group 3, Census Tract 523, Kings County,...
281
0
0
32
79
15
0
51
35
42
27
POLYGON ((-73.961421 40.711349, -73.961029 40....
40
8
1312
1500000US360470523004
360470523004
Block Group 4, Census Tract 523, Kings County,...
163
0
16
16
35
16
0
35
0
45
0
POLYGON ((-73.95979699999999 40.713887, -73.95...
22
9
1313
1500000US360470523005
360470523005
Block Group 5, Census Tract 523, Kings County,...
436
104
0
36
86
75
0
80
15
40
0
POLYGON ((-73.958342 40.713306, -73.9568099999...
24
10
1314
1500000US360470525001
360470525001
Block Group 1, Census Tract 525, Kings County,...
769
25
68
123
80
121
134
3
38
166
11
POLYGON ((-73.96229199999999 40.709921, -73.96...
52
11
1383
1500000US360470549002
360470549002
Block Group 2, Census Tract 549, Kings County,...
457
22
0
53
68
39
31
7
22
215
0
POLYGON ((-73.96905 40.712482, -73.96886099999...
26
12
1387
1500000US360470551001
360470551001
Block Group 1, Census Tract 551, Kings County,...
285
63
17
15
26
26
35
35
39
29
0
POLYGON ((-73.96836999999999 40.714127, -73.96...
14
13
1388
1500000US360470551002
360470551002
Block Group 2, Census Tract 551, Kings County,...
582
14
40
49
133
66
50
16
79
86
49
POLYGON ((-73.961287 40.714502, -73.9597969999...
3
14
1389
1500000US360470551003
360470551003
Block Group 3, Census Tract 551, Kings County,...
584
72
90
75
75
34
23
0
14
187
14
POLYGON ((-73.962777 40.715106, -73.961287 40....
0
15
1390
1500000US360470551004
360470551004
Block Group 4, Census Tract 551, Kings County,...
504
52
130
62
126
17
29
0
0
88
0
POLYGON ((-73.96407099999999 40.713079, -73.96...
38
16
1394
1500000US360470553001
360470553001
Block Group 1, Census Tract 553, Kings County,...
357
0
86
59
11
12
35
16
65
55
18
POLYGON ((-73.958913 40.716728, -73.957725 40....
96
17
1395
1500000US360470553002
360470553002
Block Group 2, Census Tract 553, Kings County,...
421
8
56
57
64
34
70
33
32
67
0
POLYGON ((-73.962289 40.71558, -73.961912 40.7...
64
18
1396
1500000US360470553003
360470553003
Block Group 3, Census Tract 553, Kings County,...
200
12
0
35
17
15
45
16
0
53
7
POLYGON ((-73.960196 40.717528, -73.9590089999...
100
19
1400
1500000US360470555001
360470555001
Block Group 1, Census Tract 555, Kings County,...
1379
74
105
112
257
207
170
70
73
239
72
POLYGON ((-73.967578 40.716496, -73.966511 40....
82
20
1404
1500000US360470557001
360470557001
Block Group 1, Census Tract 557, Kings County,...
418
21
45
60
68
34
0
0
51
119
20
POLYGON ((-73.96278699999999 40.722562, -73.96...
105
21
1405
1500000US360470557002
360470557002
Block Group 2, Census Tract 557, Kings County,...
373
38
52
48
55
51
51
8
31
39
0
POLYGON ((-73.96264499999999 40.722747, -73.96...
115
In [64]:
sum(willy13.HD01_VD01.astype(float))
Out[64]:
10945.0
In [65]:
sum(willy13.HD01_VD10.astype(float))
Out[65]:
2388.0
In [66]:
sum(willy13.HD01_VD10.astype(float)) / sum(willy13.HD01_VD01.astype(float))
Out[66]:
0.21818181818181817
In [69]:
willy13.columns
Out[69]:
Index([u'Unnamed: 0_x', u'GEO.id', u'GEO.id2', u'GEO.display-label',
u'HD01_VD01', u'HD01_VD02', u'HD01_VD03', u'HD01_VD04', u'HD01_VD05',
u'HD01_VD06', u'HD01_VD07', u'HD01_VD08', u'HD01_VD09', u'HD01_VD10',
u'HD01_VD11', u'geometry', u'Unnamed: 0_y'],
dtype='object')
In [78]:
will13 = willy13[[u'GEO.id2',
u'HD01_VD01', u'HD01_VD02', u'HD01_VD03', u'HD01_VD04', u'HD01_VD05',
u'HD01_VD06', u'HD01_VD07', u'HD01_VD08', u'HD01_VD09', u'HD01_VD10',
u'HD01_VD11']]
will13.head()
Out[78]:
GEO.id2
HD01_VD01
HD01_VD02
HD01_VD03
HD01_VD04
HD01_VD05
HD01_VD06
HD01_VD07
HD01_VD08
HD01_VD09
HD01_VD10
HD01_VD11
0
360470517001
261
0
15
32
22
11
61
29
24
31
36
1
360470517002
482
48
21
91
99
0
56
64
7
96
0
2
360470519001
666
40
85
145
45
36
111
2
23
179
0
3
360470519002
488
0
88
115
83
43
10
39
45
55
10
4
360470519003
469
40
10
21
76
84
37
0
11
190
0
In [77]:
will13.loc['Total']= will13.astype(float).sum()
will13
Out[77]:
GEO.id2
HD01_VD01
HD01_VD02
HD01_VD03
HD01_VD04
HD01_VD05
HD01_VD06
HD01_VD07
HD01_VD08
HD01_VD09
HD01_VD10
HD01_VD11
0
360470517001
261
0
15
32
22
11
61
29
24
31
36
1
360470517002
482
48
21
91
99
0
56
64
7
96
0
2
360470519001
666
40
85
145
45
36
111
2
23
179
0
3
360470519002
488
0
88
115
83
43
10
39
45
55
10
4
360470519003
469
40
10
21
76
84
37
0
11
190
0
5
360470523001
651
15
78
59
116
13
36
15
108
157
54
6
360470523002
719
0
98
140
15
65
57
13
121
210
0
7
360470523003
281
0
0
32
79
15
0
51
35
42
27
8
360470523004
163
0
16
16
35
16
0
35
0
45
0
9
360470523005
436
104
0
36
86
75
0
80
15
40
0
10
360470525001
769
25
68
123
80
121
134
3
38
166
11
11
360470549002
457
22
0
53
68
39
31
7
22
215
0
12
360470551001
285
63
17
15
26
26
35
35
39
29
0
13
360470551002
582
14
40
49
133
66
50
16
79
86
49
14
360470551003
584
72
90
75
75
34
23
0
14
187
14
15
360470551004
504
52
130
62
126
17
29
0
0
88
0
16
360470553001
357
0
86
59
11
12
35
16
65
55
18
17
360470553002
421
8
56
57
64
34
70
33
32
67
0
18
360470553003
200
12
0
35
17
15
45
16
0
53
7
19
360470555001
1379
74
105
112
257
207
170
70
73
239
72
20
360470557001
418
21
45
60
68
34
0
0
51
119
20
21
360470557002
373
38
52
48
55
51
51
8
31
39
0
Total
7.93035e+12
10945
648
1100
1435
1636
1014
1041
532
833
2388
318
In [ ]:
Content source: pichot/was-wburg-worth-it
Similar notebooks: