Wrangling Data

Author: Avikal Somvanshi


In [1]:
from __future__ import print_function, division
import pandas as pd
import pylab as pl
import os
import csv
from pandas.tools.plotting import scatter_matrix
import sys
reload(sys)
import zipfile
import geopandas as gpd
sys.setdefaultencoding('utf-8')
%pylab inline

In [2]:
#Unzippping downloaded median income data of 1999 in a dataframe
zipfile.ZipFile(os.path.join("data/DEC_00_SF3_H069.zip")).extractall(r"data/rent2000")

In [3]:
#Reading and saving downloaded median income data of 1999 in a dataframe
data_2000 = pd.read_csv('data/rent2000/DEC_00_SF3_H069_with_ann.csv')
data_2000.head()


Out[3]:
GEO.id GEO.id2 GEO.display-label VD01 VD02 VD03 VD04 VD05 VD06 VD07 VD08 VD09 VD10 VD11
0 Id Id2 Geography Total: Less than 10 percent 10 to 14 percent 15 to 19 percent 20 to 24 percent 25 to 29 percent 30 to 34 percent 35 to 39 percent 40 to 49 percent 50 percent or more Not computed
1 1500000US360470001001 360470001001 Block Group 1, Census Tract 1, Kings County, N... 497 53 106 60 49 27 28 22 67 64 21
2 1500000US360470001002 360470001002 Block Group 2, Census Tract 1, Kings County, N... 550 85 108 78 68 67 62 12 0 59 11
3 1500000US360470001003 360470001003 Block Group 3, Census Tract 1, Kings County, N... 118 14 19 15 9 11 2 4 5 30 9
4 1500000US360470002001 360470002001 Block Group 1, Census Tract 2, Kings County, N... 129 6 11 15 29 6 5 12 16 19 10

In [4]:
#Unzipping downloaded median income data of 2013 in a dataframe
zipfile.ZipFile(os.path.join("data/ACS_13_5YR_B25070.zip")).extractall(r"data/rent2013")

In [5]:
#Reading and saving downloaded median income data of 2013 in a dataframe
data_2013 = pd.read_csv('data/rent2013/ACS_13_5YR_B25070_with_ann.csv')
data_2013.head()


Out[5]:
GEO.id GEO.id2 GEO.display-label HD01_VD01 HD02_VD01 HD01_VD02 HD02_VD02 HD01_VD03 HD02_VD03 HD01_VD04 ... HD01_VD07 HD02_VD07 HD01_VD08 HD02_VD08 HD01_VD09 HD02_VD09 HD01_VD10 HD02_VD10 HD01_VD11 HD02_VD11
0 Id Id2 Geography Estimate; Total: Margin of Error; Total: Estimate; Total: - Less than 10.0 percent Margin of Error; Total: - Less than 10.0 percent Estimate; Total: - 10.0 to 14.9 percent Margin of Error; Total: - 10.0 to 14.9 percent Estimate; Total: - 15.0 to 19.9 percent ... Estimate; Total: - 30.0 to 34.9 percent Margin of Error; Total: - 30.0 to 34.9 percent Estimate; Total: - 35.0 to 39.9 percent Margin of Error; Total: - 35.0 to 39.9 percent Estimate; Total: - 40.0 to 49.9 percent Margin of Error; Total: - 40.0 to 49.9 percent Estimate; Total: - 50.0 percent or more Margin of Error; Total: - 50.0 percent or more Estimate; Total: - Not computed Margin of Error; Total: - Not computed
1 1500000US360470001001 360470001001 Block Group 1, Census Tract 1, Kings County, N... 182 74 0 12 41 66 31 ... 0 12 0 12 14 22 67 51 0 12
2 1500000US360470001002 360470001002 Block Group 2, Census Tract 1, Kings County, N... 188 80 0 12 43 42 0 ... 0 12 40 64 18 29 61 46 0 12
3 1500000US360470001003 360470001003 Block Group 3, Census Tract 1, Kings County, N... 502 122 71 63 33 31 31 ... 15 24 0 12 32 35 119 83 0 12
4 1500000US360470001004 360470001004 Block Group 4, Census Tract 1, Kings County, N... 195 96 28 34 14 23 14 ... 0 12 0 12 0 12 31 35 30 36

5 rows × 25 columns


In [6]:
data_2013.columns


Out[6]:
Index([u'GEO.id', u'GEO.id2', u'GEO.display-label', u'HD01_VD01', u'HD02_VD01',
       u'HD01_VD02', u'HD02_VD02', u'HD01_VD03', u'HD02_VD03', u'HD01_VD04',
       u'HD02_VD04', u'HD01_VD05', u'HD02_VD05', u'HD01_VD06', u'HD02_VD06',
       u'HD01_VD07', u'HD02_VD07', u'HD01_VD08', u'HD02_VD08', u'HD01_VD09',
       u'HD02_VD09', u'HD01_VD10', u'HD02_VD10', u'HD01_VD11', u'HD02_VD11'],
      dtype='object')

In [7]:
data_2013 = data_2013[[u'GEO.id', u'GEO.id2', u'GEO.display-label', u'HD01_VD01', u'HD01_VD02', u'HD01_VD03', u'HD01_VD04', 
                       u'HD01_VD05', u'HD01_VD06', u'HD01_VD07', u'HD01_VD08', u'HD01_VD09', u'HD01_VD10', u'HD01_VD11']]

In [8]:
data_2013.head()


Out[8]:
GEO.id GEO.id2 GEO.display-label HD01_VD01 HD01_VD02 HD01_VD03 HD01_VD04 HD01_VD05 HD01_VD06 HD01_VD07 HD01_VD08 HD01_VD09 HD01_VD10 HD01_VD11
0 Id Id2 Geography Estimate; Total: Estimate; Total: - Less than 10.0 percent Estimate; Total: - 10.0 to 14.9 percent Estimate; Total: - 15.0 to 19.9 percent Estimate; Total: - 20.0 to 24.9 percent Estimate; Total: - 25.0 to 29.9 percent Estimate; Total: - 30.0 to 34.9 percent Estimate; Total: - 35.0 to 39.9 percent Estimate; Total: - 40.0 to 49.9 percent Estimate; Total: - 50.0 percent or more Estimate; Total: - Not computed
1 1500000US360470001001 360470001001 Block Group 1, Census Tract 1, Kings County, N... 182 0 41 31 18 11 0 0 14 67 0
2 1500000US360470001002 360470001002 Block Group 2, Census Tract 1, Kings County, N... 188 0 43 0 13 13 0 40 18 61 0
3 1500000US360470001003 360470001003 Block Group 3, Census Tract 1, Kings County, N... 502 71 33 31 99 102 15 0 32 119 0
4 1500000US360470001004 360470001004 Block Group 4, Census Tract 1, Kings County, N... 195 28 14 14 17 61 0 0 0 31 30

In [9]:
#Unzipping downloaded nyc shapefile in a dataframe
zipfile.ZipFile(os.path.join("data/cb_2015_36_bg_500k.zip")).extractall(r"data/cb_2015_36_bg_500k")

In [10]:
# loading shape file for NYC
nyc_shape = gpd.read_file("data/cb_2015_36_bg_500k/cb_2015_36_bg_500k.shp")
nyc_shape.head()


Out[10]:
AFFGEOID ALAND AWATER BLKGRPCE COUNTYFP GEOID LSAD NAME STATEFP TRACTCE geometry
0 1500000US360610211000 0 307945 0 061 360610211000 BG 0 36 021100 POLYGON ((-73.968082 40.8207, -73.967982575439...
1 1500000US360290131014 557271 351509 4 029 360290131014 BG 4 36 013101 POLYGON ((-78.89711856256349 42.75125713600959...
2 1500000US360050516002 256313 0 2 005 360050516002 BG 2 36 051600 POLYGON ((-73.791504 40.855456, -73.7874279999...
3 1500000US360810183004 33973 0 4 081 360810183004 BG 4 36 018300 POLYGON ((-73.92145099999999 40.743634, -73.92...
4 1500000US360470276003 70988 0 3 047 360470276003 BG 3 36 027600 POLYGON ((-74.001302 40.611068, -74.0010729999...

In [11]:
nyc_shape.columns = [[u'GEO.id',    u'ALAND',   u'AWATER', u'BLKGRPCE', u'COUNTYFP',
          u'GEO.id2',     u'LSAD',     u'NAME',  u'STATEFP',  u'TRACTCE',
       u'geometry']]

In [12]:
nyc_shape.head()


Out[12]:
GEO.id ALAND AWATER BLKGRPCE COUNTYFP GEO.id2 LSAD NAME STATEFP TRACTCE geometry
0 1500000US360610211000 0 307945 0 061 360610211000 BG 0 36 021100 POLYGON ((-73.968082 40.8207, -73.967982575439...
1 1500000US360290131014 557271 351509 4 029 360290131014 BG 4 36 013101 POLYGON ((-78.89711856256349 42.75125713600959...
2 1500000US360050516002 256313 0 2 005 360050516002 BG 2 36 051600 POLYGON ((-73.791504 40.855456, -73.7874279999...
3 1500000US360810183004 33973 0 4 081 360810183004 BG 4 36 018300 POLYGON ((-73.92145099999999 40.743634, -73.92...
4 1500000US360470276003 70988 0 3 047 360470276003 BG 3 36 027600 POLYGON ((-74.001302 40.611068, -74.0010729999...

In [13]:
bky_shape = nyc_shape[[u'GEO.id', u'GEO.id2', u'geometry']]
bky_shape.head()


Out[13]:
GEO.id GEO.id2 geometry
0 1500000US360610211000 360610211000 POLYGON ((-73.968082 40.8207, -73.967982575439...
1 1500000US360290131014 360290131014 POLYGON ((-78.89711856256349 42.75125713600959...
2 1500000US360050516002 360050516002 POLYGON ((-73.791504 40.855456, -73.7874279999...
3 1500000US360810183004 360810183004 POLYGON ((-73.92145099999999 40.743634, -73.92...
4 1500000US360470276003 360470276003 POLYGON ((-74.001302 40.611068, -74.0010729999...

In [ ]:


In [14]:
#Merging 2000 the dataframes to a mother dataframe
rent_2000 = pd.merge(data_2000, bky_shape, how='left', on=['GEO.id', 'GEO.id2'])
rent_2000.head()


Out[14]:
GEO.id GEO.id2 GEO.display-label VD01 VD02 VD03 VD04 VD05 VD06 VD07 VD08 VD09 VD10 VD11 geometry
0 Id Id2 Geography Total: Less than 10 percent 10 to 14 percent 15 to 19 percent 20 to 24 percent 25 to 29 percent 30 to 34 percent 35 to 39 percent 40 to 49 percent 50 percent or more Not computed NaN
1 1500000US360470001001 360470001001 Block Group 1, Census Tract 1, Kings County, N... 497 53 106 60 49 27 28 22 67 64 21 POLYGON ((-73.99669799999999 40.700877, -73.99...
2 1500000US360470001002 360470001002 Block Group 2, Census Tract 1, Kings County, N... 550 85 108 78 68 67 62 12 0 59 11 POLYGON ((-73.995379 40.700309, -73.993672 40....
3 1500000US360470001003 360470001003 Block Group 3, Census Tract 1, Kings County, N... 118 14 19 15 9 11 2 4 5 30 9 POLYGON ((-73.993672 40.699836, -73.9926119999...
4 1500000US360470002001 360470002001 Block Group 1, Census Tract 2, Kings County, N... 129 6 11 15 29 6 5 12 16 19 10 POLYGON ((-74.012844 40.653016, -74.0150479999...

In [15]:
#Merging 2013 the dataframes to a mother dataframe
rent_2013 = pd.merge(data_2013, bky_shape, how='left', on=['GEO.id', 'GEO.id2'])
rent_2013.head()


Out[15]:
GEO.id GEO.id2 GEO.display-label HD01_VD01 HD01_VD02 HD01_VD03 HD01_VD04 HD01_VD05 HD01_VD06 HD01_VD07 HD01_VD08 HD01_VD09 HD01_VD10 HD01_VD11 geometry
0 Id Id2 Geography Estimate; Total: Estimate; Total: - Less than 10.0 percent Estimate; Total: - 10.0 to 14.9 percent Estimate; Total: - 15.0 to 19.9 percent Estimate; Total: - 20.0 to 24.9 percent Estimate; Total: - 25.0 to 29.9 percent Estimate; Total: - 30.0 to 34.9 percent Estimate; Total: - 35.0 to 39.9 percent Estimate; Total: - 40.0 to 49.9 percent Estimate; Total: - 50.0 percent or more Estimate; Total: - Not computed NaN
1 1500000US360470001001 360470001001 Block Group 1, Census Tract 1, Kings County, N... 182 0 41 31 18 11 0 0 14 67 0 POLYGON ((-73.99669799999999 40.700877, -73.99...
2 1500000US360470001002 360470001002 Block Group 2, Census Tract 1, Kings County, N... 188 0 43 0 13 13 0 40 18 61 0 POLYGON ((-73.995379 40.700309, -73.993672 40....
3 1500000US360470001003 360470001003 Block Group 3, Census Tract 1, Kings County, N... 502 71 33 31 99 102 15 0 32 119 0 POLYGON ((-73.993672 40.699836, -73.9926119999...
4 1500000US360470001004 360470001004 Block Group 4, Census Tract 1, Kings County, N... 195 28 14 14 17 61 0 0 0 31 30 POLYGON ((-73.99271 40.698097, -73.991759 40.7...

In [ ]:


In [16]:
#Saving the dataset as csv
rent_2000.to_csv('rent_2000.csv')
rent_2013.to_csv('rent_2013.csv')

In [17]:
#Checking the CSV
data = pd.read_csv('rent_2013.csv')
data.head()


Out[17]:
Unnamed: 0 GEO.id GEO.id2 GEO.display-label HD01_VD01 HD01_VD02 HD01_VD03 HD01_VD04 HD01_VD05 HD01_VD06 HD01_VD07 HD01_VD08 HD01_VD09 HD01_VD10 HD01_VD11 geometry
0 0 Id Id2 Geography Estimate; Total: Estimate; Total: - Less than 10.0 percent Estimate; Total: - 10.0 to 14.9 percent Estimate; Total: - 15.0 to 19.9 percent Estimate; Total: - 20.0 to 24.9 percent Estimate; Total: - 25.0 to 29.9 percent Estimate; Total: - 30.0 to 34.9 percent Estimate; Total: - 35.0 to 39.9 percent Estimate; Total: - 40.0 to 49.9 percent Estimate; Total: - 50.0 percent or more Estimate; Total: - Not computed NaN
1 1 1500000US360470001001 360470001001 Block Group 1, Census Tract 1, Kings County, N... 182 0 41 31 18 11 0 0 14 67 0 POLYGON ((-73.99669799999999 40.700877, -73.99...
2 2 1500000US360470001002 360470001002 Block Group 2, Census Tract 1, Kings County, N... 188 0 43 0 13 13 0 40 18 61 0 POLYGON ((-73.995379 40.700309, -73.993672 40....
3 3 1500000US360470001003 360470001003 Block Group 3, Census Tract 1, Kings County, N... 502 71 33 31 99 102 15 0 32 119 0 POLYGON ((-73.993672 40.699836, -73.9926119999...
4 4 1500000US360470001004 360470001004 Block Group 4, Census Tract 1, Kings County, N... 195 28 14 14 17 61 0 0 0 31 30 POLYGON ((-73.99271 40.698097, -73.991759 40.7...

Good to go!


In [ ]:


In [53]:
#Unzipping downloaded nyc shapefile in a dataframe
zipfile.ZipFile(os.path.join("data/metr200.zip")).extractall(r"data/metr200")

In [54]:
# loading shape file for NYC
nyc_shape = gpd.read_file("data/metr200/metr200.shp")
nyc_shape


Out[54]:
GEO.disp_1 GEO.displa GEO.id GEO.id2 TotalTax20 TotalTax_1 VD01_x VD01_y VD02_x VD02_y ... VD12 VD13 VD14 VD15 VD16 VD17 field_1 field_1_2 geo.id_2 geometry
0 Block Group 2, Census Tract 33, Kings County, ... Block Group 2, Census Tract 33, Kings County, ... 1500000US360470033002 360470033002 6.794355e+06 2.751153e+06 676 486 144 38 ... 81 18 48 0 44 17 51 48 1500000US360470033002 (POLYGON ((-73.97870734680457 40.6868858825688...
1 Block Group 1, Census Tract 37, Kings County, ... Block Group 1, Census Tract 37, Kings County, ... 1500000US360470037001 360470037001 2.760843e+06 3.556554e+06 129 109 0 10 ... 8 0 0 7 0 0 60 58 1500000US360470037001 (POLYGON ((-73.97987536126175 40.6871320892793...
2 Block Group 2, Census Tract 9, Kings County, N... Block Group 2, Census Tract 9, Kings County, N... 1500000US360470009002 360470009002 9.948306e+06 4.825448e+06 366 289 40 59 ... 25 35 29 59 19 39 21 20 1500000US360470009002 (POLYGON ((-73.99247153124475 40.6905331370807...
3 Block Group 2, Census Tract 43, Kings County, ... Block Group 2, Census Tract 43, Kings County, ... 1500000US360470043002 360470043002 3.455649e+06 2.152118e+06 330 253 28 0 ... 53 0 34 0 16 56 74 68 1500000US360470043002 (POLYGON ((-73.98901204293372 40.6900246370137...
4 Block Group 1, Census Tract 43, Kings County, ... Block Group 1, Census Tract 43, Kings County, ... 1500000US360470043001 360470043001 0.000000e+00 4.121107e+06 0 0 0 0 ... 0 0 0 0 0 0 73 67 1500000US360470043001 POLYGON ((-73.98933584733204 40.68881232913714...
5 Block Group 4, Census Tract 43, Kings County, ... Block Group 4, Census Tract 43, Kings County, ... 1500000US360470043004 360470043004 5.156167e+06 2.133868e+06 310 288 50 25 ... 51 23 16 13 10 13 76 70 1500000US360470043004 (POLYGON ((-73.99111171049765 40.6888930639022...
6 Block Group 1, Census Tract 41, Kings County, ... Block Group 1, Census Tract 41, Kings County, ... 1500000US360470041001 360470041001 5.289765e+06 5.839860e+05 462 275 42 0 ... 49 40 51 8 13 23 68 64 1500000US360470041001 (POLYGON ((-73.98240685744892 40.6868788202442...
7 Block Group 3, Census Tract 43, Kings County, ... Block Group 3, Census Tract 43, Kings County, ... 1500000US360470043003 360470043003 2.804964e+06 3.004778e+06 430 282 25 9 ... 51 39 44 7 64 19 75 69 1500000US360470043003 POLYGON ((-73.98723838793184 40.68771396645536...
8 Block Group 2, Census Tract 41, Kings County, ... Block Group 2, Census Tract 41, Kings County, ... 1500000US360470041002 360470041002 3.246929e+06 8.994885e+05 323 157 8 36 ... 32 57 25 7 85 46 69 65 1500000US360470041002 POLYGON ((-73.98521659577254 40.68668959319085...
9 Block Group 1, Census Tract 13, Kings County, ... Block Group 1, Census Tract 13, Kings County, ... 1500000US360470013001 360470013001 7.166677e+06 1.468350e+05 1147 254 34 10 ... 159 145 127 60 34 34 23 22 1500000US360470013001 (POLYGON ((-73.9881549547864 40.69508114344289...
10 Block Group 1, Census Tract 11, Kings County, ... Block Group 1, Census Tract 11, Kings County, ... 1500000US360470011001 360470011001 2.523074e+06 2.588893e+06 75 83 0 41 ... 0 8 0 0 0 0 22 21 1500000US360470011001 (POLYGON ((-73.98682423503936 40.6938937250703...
11 Block Group 1, Census Tract 9, Kings County, N... Block Group 1, Census Tract 9, Kings County, N... 1500000US360470009001 360470009001 1.044992e+07 4.826333e+06 682 450 76 56 ... 62 125 44 50 52 83 20 19 1500000US360470009001 (POLYGON ((-73.99073068241701 40.6934277380062...
12 Block Group 1, Census Tract 11, Kings County, ... Block Group 1, Census Tract 9, Kings County, N... 1500000US360470009001 360470009001 2.523074e+06 2.588893e+06 682 450 76 56 ... 62 125 44 50 52 83 20 21 1500000US360470011001 POLYGON ((-73.98968108308657 40.69225601248485...
13 Block Group 3, Census Tract 15, Kings County, ... Block Group 1, Census Tract 31, Kings County, ... 1500000US360470031001 360470031001 8.143788e+06 2.965691e+05 0 0 0 0 ... 0 0 0 0 0 0 46 26 1500000US360470015003 POLYGON ((-73.98244284225586 40.69287526686779...
14 Block Group 1, Census Tract 31, Kings County, ... Block Group 1, Census Tract 31, Kings County, ... 1500000US360470031001 360470031001 4.260095e+06 1.822554e+06 0 0 0 0 ... 0 0 0 0 0 0 46 44 1500000US360470031001 (POLYGON ((-73.98244284225586 40.6928752668677...
15 Block Group 2, Census Tract 31, Kings County, ... Block Group 2, Census Tract 31, Kings County, ... 1500000US360470031002 360470031002 0.000000e+00 1.782760e+06 83 108 62 14 ... 0 0 0 0 0 0 47 45 1500000US360470031002 (POLYGON ((-73.98158772483383 40.6909854662658...
16 Block Group 1, Census Tract 37, Kings County, ... Block Group 1, Census Tract 39, Kings County, ... 1500000US360470039001 360470039001 2.760843e+06 3.556554e+06 158 154 13 17 ... 8 8 19 15 0 8 63 58 1500000US360470037001 POLYGON ((-73.97983016638672 40.68680535933928...
17 Block Group 1, Census Tract 39, Kings County, ... Block Group 1, Census Tract 39, Kings County, ... 1500000US360470039001 360470039001 5.301568e+06 1.133788e+05 158 154 13 17 ... 8 8 19 15 0 8 63 61 1500000US360470039001 (POLYGON ((-73.9786719698802 40.68522967010515...
18 Block Group 1, Census Tract 35, Kings County, ... Block Group 1, Census Tract 35, Kings County, ... 1500000US360470035001 360470035001 2.122820e+06 2.187168e+06 527 489 64 49 ... 69 49 10 18 0 41 55 53 1500000US360470035001 (POLYGON ((-73.97604749487012 40.6851941333957...
19 Block Group 3, Census Tract 41, Kings County, ... Block Group 3, Census Tract 41, Kings County, ... 1500000US360470041003 360470041003 4.171970e+06 1.114244e+06 365 269 13 14 ... 38 53 28 44 8 23 70 66 1500000US360470041003 POLYGON ((-73.98322167230421 40.68615376466381...
20 Block Group 2, Census Tract 39, Kings County, ... Block Group 2, Census Tract 39, Kings County, ... 1500000US360470039002 360470039002 1.537886e+06 4.512899e+06 148 128 8 0 ... 22 0 14 15 0 0 64 62 1500000US360470039002 POLYGON ((-73.98103731274125 40.68505412831944...
21 Block Group 3, Census Tract 39, Kings County, ... Block Group 3, Census Tract 39, Kings County, ... 1500000US360470039003 360470039003 1.840572e+06 7.748789e+05 353 305 149 22 ... 5 24 17 10 9 6 65 63 1500000US360470039003 POLYGON ((-73.97871150496528 40.6844054180393,...
22 Block Group 1, Census Tract 129.01, Kings Coun... Block Group 1, Census Tract 129.01, Kings Coun... 1500000US360470129011 360470129011 1.513746e+06 2.047302e+06 353 229 52 23 ... 11 26 27 0 12 7 255 259 1500000US360470129011 POLYGON ((-73.97617836053597 40.68321422955896...
23 Block Group 3, Census Tract 7, Kings County, N... Block Group 3, Census Tract 7, Kings County, N... 1500000US360470007003 360470007003 8.541489e+06 2.914260e+06 516 276 10 46 ... 52 75 47 16 40 120 19 18 1500000US360470007003 POLYGON ((-73.99347679990953 40.69230129739747...

24 rows × 38 columns


In [55]:
np.sum(nyc_shape.TotalTax20)


Out[55]:
102314497.07960001

In [56]:
np.sum(nyc_shape.TotalTax_1)


Out[56]:
55302022.024999999

In [57]:
np.sum(nyc_shape.VD01_x.astype(float))


Out[57]:
8273.0

In [58]:
np.sum(nyc_shape.VD01_y.astype(float))


Out[58]:
5488.0

In [59]:
#Unzipping downloaded nyc shapefile in a dataframe
zipfile.ZipFile(os.path.join("data/will200.zip")).extractall(r"data/will200")

In [60]:
# loading shape file for NYC
nyc_shape = gpd.read_file("data/will200/will200.shp")
nyc_shape


Out[60]:
CensusBloo CensusTrac GEO.id GEO.id2 MediamGros Median hou TotalTax20 VD01 VD02 VD03 VD04 VD05 VD06 VD07 VD08 VD09 VD10 VD11 geometry
0 3001 055100 1500000US360470551003 360470551003 925 54313 3.008752e+06 104 10 10 27 20 0 0 0 0 28 9 POLYGON ((994763.4347482463 199218.6635385387,...
1 3002 055100 1500000US360470551003 360470551003 925 54313 3.008752e+06 104 10 10 27 20 0 0 0 0 28 9 POLYGON ((994645.8196441368 198974.3437943111,...
2 3000 055100 1500000US360470551003 360470551003 925 54313 3.008752e+06 104 10 10 27 20 0 0 0 0 28 9 POLYGON ((994863.0397403392 199425.4936187313,...
3 2001 055100 1500000US360470551002 360470551002 563 30000 2.023738e+06 454 40 46 52 93 49 41 6 9 88 30 POLYGON ((995207.7795806602 199072.4836984025,...
4 2000 055100 1500000US360470551002 360470551002 563 30000 2.023738e+06 454 40 46 52 93 49 41 6 9 88 30 POLYGON ((995073.0296285347 199305.2936826193,...
5 2002 055100 1500000US360470551002 360470551002 563 30000 2.023738e+06 454 40 46 52 93 49 41 6 9 88 30 POLYGON ((995079.1495645404 198794.853746144, ...
6 3006 051900 1500000US360470519003 360470519003 755 35278 2.765746e+06 575 17 105 68 60 32 31 27 60 143 32 POLYGON ((996239.1595496207 199071.6936824018,...
7 3000 051900 1500000US360470519003 360470519003 755 35278 2.765746e+06 575 17 105 68 60 32 31 27 60 143 32 POLYGON ((996702.1496300519 199079.2436344088,...
8 3008 051900 1500000US360470519003 360470519003 755 35278 2.765746e+06 575 17 105 68 60 32 31 27 60 143 32 POLYGON ((996618.9795819744 198908.8236662501,...
9 3007 051900 1500000US360470519003 360470519003 755 35278 2.765746e+06 575 17 105 68 60 32 31 27 60 143 32 POLYGON ((995805.6030692169 199254.0337785716,...
10 3001 051900 1500000US360470519003 360470519003 755 35278 2.765746e+06 575 17 105 68 60 32 31 27 60 143 32 POLYGON ((996342.7397097172 199281.7037945974,...
11 3002 051900 1500000US360470519003 360470519003 755 35278 2.765746e+06 575 17 105 68 60 32 31 27 60 143 32 POLYGON ((995907.7995493121 199461.3937787648,...
12 3005 051900 1500000US360470519003 360470519003 755 35278 2.765746e+06 575 17 105 68 60 32 31 27 60 143 32 POLYGON ((996208.1195495918 199561.3136508578,...
13 3003 051900 1500000US360470519003 360470519003 755 35278 2.765746e+06 575 17 105 68 60 32 31 27 60 143 32 POLYGON ((996022.6895334191 199706.6837629932,...
14 1008 055100 1500000US360470551001 360470551001 666 26212 2.816056e+06 424 12 67 57 40 50 40 0 43 102 13 POLYGON ((993811.3996753597 198999.993714335, ...
15 1009 055100 1500000US360470551001 360470551001 666 26212 2.816056e+06 424 12 67 57 40 50 40 0 43 102 13 POLYGON ((993739.5796432928 199102.8038264308,...
16 1007 055100 1500000US360470551001 360470551001 666 26212 2.816056e+06 424 12 67 57 40 50 40 0 43 102 13 POLYGON ((994228.6796757483 199179.3936505021,...
17 1006 055100 1500000US360470551001 360470551001 666 26212 2.816056e+06 424 12 67 57 40 50 40 0 43 102 13 POLYGON ((994348.1198038596 199431.1038587365,...
18 1005 055100 1500000US360470551001 360470551001 666 26212 2.816056e+06 424 12 67 57 40 50 40 0 43 102 13 POLYGON ((993908.9098194505 199602.9136508965,...
19 1000 055100 1500000US360470551001 360470551001 666 26212 2.816056e+06 424 12 67 57 40 50 40 0 43 102 13 POLYGON ((994513.8196440139 199779.483763061, ...
20 1001 055100 1500000US360470551001 360470551001 666 26212 2.816056e+06 424 12 67 57 40 50 40 0 43 102 13 POLYGON ((993928.2897874685 199850.3437951269,...
21 1004 055100 1500000US360470551001 360470551001 666 26212 2.816056e+06 424 12 67 57 40 50 40 0 43 102 13 POLYGON ((993678.4396752359 199306.4436346204,...
22 4000 052300 1500000US360470523004 360470523004 533 35125 9.540152e+05 170 16 47 0 31 7 8 6 19 28 8 POLYGON ((995652.6297570744 199018.3637623521,...
23 4001 052300 1500000US360470523004 360470523004 533 35125 9.540152e+05 170 16 47 0 31 7 8 6 19 28 8 POLYGON ((995619.1495650433 198872.7138422165,...
24 5000 052300 1500000US360470523005 360470523005 461 20357 1.433303e+06 284 23 19 56 29 56 8 0 10 50 33 POLYGON ((996151.8295015394 198894.753650237, ...
25 5001 052300 1500000US360470523005 360470523005 461 20357 1.433303e+06 284 23 19 56 29 56 8 0 10 50 33 POLYGON ((996030.9695974268 198640.9437300006,...
26 2002 054900 1500000US360470549002 360470549002 633 36875 2.712518e+06 199 59 23 17 5 18 0 18 21 34 4 POLYGON ((994007.9598035428 198721.9436660761,...
27 2003 054900 1500000US360470549002 360470549002 633 36875 2.712518e+06 199 59 23 17 5 18 0 18 21 34 4 POLYGON ((993502.7297230721 198796.2238581452,...
28 2004 054900 1500000US360470549002 360470549002 633 36875 2.712518e+06 199 59 23 17 5 18 0 18 21 34 4 POLYGON ((993585.5797711494 198652.3137140112,...
29 2001 054900 1500000US360470549002 360470549002 633 36875 2.712518e+06 199 59 23 17 5 18 0 18 21 34 4 POLYGON ((994405.6895959132 198474.5936498457,...
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
93 1009 055500 1500000US360470555001 360470555001 1224 56016 1.514461e+07 222 0 15 38 30 29 6 9 19 62 14 POLYGON ((995016.7697884822 200406.7336836451,...
94 1010 055500 1500000US360470555001 360470555001 1224 56016 1.514461e+07 222 0 15 38 30 29 6 9 19 62 14 POLYGON ((994601.3995480954 200709.4137479271,...
95 1005 055500 1500000US360470555001 360470555001 1224 56016 1.514461e+07 222 0 15 38 30 29 6 9 19 62 14 POLYGON ((994624.8197081173 201171.8237323577,...
96 1003 055300 1500000US360470553001 360470553001 720 37823 2.652623e+06 307 35 33 35 37 31 52 8 26 50 0 POLYGON ((995931.0696933337 200113.2836993719,...
97 1002 055300 1500000US360470553001 360470553001 720 37823 2.652623e+06 307 35 33 35 37 31 52 8 26 50 0 POLYGON ((996012.2895334094 200174.4136834288,...
98 1000 055300 1500000US360470553001 360470553001 720 37823 2.652623e+06 307 35 33 35 37 31 52 8 26 50 0 POLYGON ((996335.1096297101 200575.2436358021,...
99 1001 055300 1500000US360470553001 360470553001 720 37823 2.652623e+06 307 35 33 35 37 31 52 8 26 50 0 POLYGON ((996206.5095655903 200418.1336196557,...
100 3003 055300 1500000US360470553003 360470553003 669 31688 1.475774e+06 289 28 47 0 41 39 29 13 0 72 20 POLYGON ((995536.1697249659 200322.0437635663,...
101 3004 055300 1500000US360470553003 360470553003 669 31688 1.475774e+06 289 28 47 0 41 39 29 13 0 72 20 POLYGON ((995347.5796447904 200096.6536833564,...
102 3000 055300 1500000US360470553003 360470553003 669 31688 1.475774e+06 289 28 47 0 41 39 29 13 0 72 20 POLYGON ((996006.1795814037 200892.7937160978,...
103 3002 055300 1500000US360470553003 360470553003 669 31688 1.475774e+06 289 28 47 0 41 39 29 13 0 72 20 POLYGON ((995648.7695330709 200461.1935876959,...
104 3001 055300 1500000US360470553003 360470553003 669 31688 1.475774e+06 289 28 47 0 41 39 29 13 0 72 20 POLYGON ((995853.1996132613 200707.7937799256,...
105 1002 055700 1500000US360470557001 360470557001 669 37841 2.597763e+06 390 47 50 49 34 41 29 20 27 66 27 POLYGON ((995338.2696927816 201363.2436365359,...
106 1001 055700 1500000US360470557001 360470557001 669 37841 2.597763e+06 390 47 50 49 34 41 29 20 27 66 27 POLYGON ((995282.1096927293 201535.9036046968,...
107 1003 051700 1500000US360470517001 360470517001 864 61023 1.531597e+06 161 7 29 22 44 18 18 9 9 0 5 POLYGON ((996654.3495660074 201202.7736203865,...
108 1004 051700 1500000US360470517001 360470517001 864 61023 1.531597e+06 161 7 29 22 44 18 18 9 9 0 5 POLYGON ((996590.6055659479 200892.5835400976,...
109 1002 051700 1500000US360470517001 360470517001 864 61023 1.531597e+06 161 7 29 22 44 18 18 9 9 0 5 POLYGON ((996882.59967822 201239.3736844206, 9...
110 1001 051700 1500000US360470517001 360470517001 864 61023 1.531597e+06 161 7 29 22 44 18 18 9 9 0 5 POLYGON ((997030.6096623577 201580.5837967384,...
111 2004 051700 1500000US360470517002 360470517002 781 45469 4.429708e+06 432 41 38 76 106 57 32 16 14 44 8 POLYGON ((996146.8695015347 201070.6236042635,...
112 2002 051700 1500000US360470517002 360470517002 781 45469 4.429708e+06 432 41 38 76 106 57 32 16 14 44 8 POLYGON ((996478.0495338432 201468.513652634, ...
113 2003 051700 1500000US360470517002 360470517002 781 45469 4.429708e+06 432 41 38 76 106 57 32 16 14 44 8 POLYGON ((996314.4596456909 201268.9936524482,...
114 2001 051700 1500000US360470517002 360470517002 781 45469 4.429708e+06 432 41 38 76 106 57 32 16 14 44 8 POLYGON ((996644.179709998 201670.2137488219, ...
115 2014 055700 1500000US360470557002 360470557002 638 45875 4.059491e+06 127 17 15 0 36 13 0 0 14 32 0 POLYGON ((995791.6495332039 201363.1135885359,...
116 2012 055700 1500000US360470557002 360470557002 638 45875 4.059491e+06 127 17 15 0 36 13 0 0 14 32 0 POLYGON ((995959.6195813604 201564.9437327238,...
117 2010 055700 1500000US360470557002 360470557002 638 45875 4.059491e+06 127 17 15 0 36 13 0 0 14 32 0 POLYGON ((995400.0296928391 201679.1837328302,...
118 2013 055700 1500000US360470557002 360470557002 638 45875 4.059491e+06 127 17 15 0 36 13 0 0 14 32 0 POLYGON ((996055.3597414496 202041.1336851673,...
119 2011 055700 1500000US360470557002 360470557002 638 45875 4.059491e+06 127 17 15 0 36 13 0 0 14 32 0 POLYGON ((995594.7297250205 201916.6437010514,...
120 2006 055700 1500000US360470557002 360470557002 638 45875 4.059491e+06 127 17 15 0 36 13 0 0 14 32 0 POLYGON ((996316.2595816925 202005.2637331339,...
121 2008 055700 1500000US360470557002 360470557002 638 45875 4.059491e+06 127 17 15 0 36 13 0 0 14 32 0 POLYGON ((995727.369725144 202083.6437652069, ...
122 2007 055700 1500000US360470557002 360470557002 638 45875 4.059491e+06 127 17 15 0 36 13 0 0 14 32 0 POLYGON ((995963.7096933641 202374.2636694775,...

123 rows × 19 columns


In [61]:
nyc_shape.drop(['CensusBloo', 'CensusTrac', 'geometry'], axis =1, inplace=True)
nyc_shape


Out[61]:
GEO.id GEO.id2 MediamGros Median hou TotalTax20 VD01 VD02 VD03 VD04 VD05 VD06 VD07 VD08 VD09 VD10 VD11
0 1500000US360470551003 360470551003 925 54313 3.008752e+06 104 10 10 27 20 0 0 0 0 28 9
1 1500000US360470551003 360470551003 925 54313 3.008752e+06 104 10 10 27 20 0 0 0 0 28 9
2 1500000US360470551003 360470551003 925 54313 3.008752e+06 104 10 10 27 20 0 0 0 0 28 9
3 1500000US360470551002 360470551002 563 30000 2.023738e+06 454 40 46 52 93 49 41 6 9 88 30
4 1500000US360470551002 360470551002 563 30000 2.023738e+06 454 40 46 52 93 49 41 6 9 88 30
5 1500000US360470551002 360470551002 563 30000 2.023738e+06 454 40 46 52 93 49 41 6 9 88 30
6 1500000US360470519003 360470519003 755 35278 2.765746e+06 575 17 105 68 60 32 31 27 60 143 32
7 1500000US360470519003 360470519003 755 35278 2.765746e+06 575 17 105 68 60 32 31 27 60 143 32
8 1500000US360470519003 360470519003 755 35278 2.765746e+06 575 17 105 68 60 32 31 27 60 143 32
9 1500000US360470519003 360470519003 755 35278 2.765746e+06 575 17 105 68 60 32 31 27 60 143 32
10 1500000US360470519003 360470519003 755 35278 2.765746e+06 575 17 105 68 60 32 31 27 60 143 32
11 1500000US360470519003 360470519003 755 35278 2.765746e+06 575 17 105 68 60 32 31 27 60 143 32
12 1500000US360470519003 360470519003 755 35278 2.765746e+06 575 17 105 68 60 32 31 27 60 143 32
13 1500000US360470519003 360470519003 755 35278 2.765746e+06 575 17 105 68 60 32 31 27 60 143 32
14 1500000US360470551001 360470551001 666 26212 2.816056e+06 424 12 67 57 40 50 40 0 43 102 13
15 1500000US360470551001 360470551001 666 26212 2.816056e+06 424 12 67 57 40 50 40 0 43 102 13
16 1500000US360470551001 360470551001 666 26212 2.816056e+06 424 12 67 57 40 50 40 0 43 102 13
17 1500000US360470551001 360470551001 666 26212 2.816056e+06 424 12 67 57 40 50 40 0 43 102 13
18 1500000US360470551001 360470551001 666 26212 2.816056e+06 424 12 67 57 40 50 40 0 43 102 13
19 1500000US360470551001 360470551001 666 26212 2.816056e+06 424 12 67 57 40 50 40 0 43 102 13
20 1500000US360470551001 360470551001 666 26212 2.816056e+06 424 12 67 57 40 50 40 0 43 102 13
21 1500000US360470551001 360470551001 666 26212 2.816056e+06 424 12 67 57 40 50 40 0 43 102 13
22 1500000US360470523004 360470523004 533 35125 9.540152e+05 170 16 47 0 31 7 8 6 19 28 8
23 1500000US360470523004 360470523004 533 35125 9.540152e+05 170 16 47 0 31 7 8 6 19 28 8
24 1500000US360470523005 360470523005 461 20357 1.433303e+06 284 23 19 56 29 56 8 0 10 50 33
25 1500000US360470523005 360470523005 461 20357 1.433303e+06 284 23 19 56 29 56 8 0 10 50 33
26 1500000US360470549002 360470549002 633 36875 2.712518e+06 199 59 23 17 5 18 0 18 21 34 4
27 1500000US360470549002 360470549002 633 36875 2.712518e+06 199 59 23 17 5 18 0 18 21 34 4
28 1500000US360470549002 360470549002 633 36875 2.712518e+06 199 59 23 17 5 18 0 18 21 34 4
29 1500000US360470549002 360470549002 633 36875 2.712518e+06 199 59 23 17 5 18 0 18 21 34 4
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
93 1500000US360470555001 360470555001 1224 56016 1.514461e+07 222 0 15 38 30 29 6 9 19 62 14
94 1500000US360470555001 360470555001 1224 56016 1.514461e+07 222 0 15 38 30 29 6 9 19 62 14
95 1500000US360470555001 360470555001 1224 56016 1.514461e+07 222 0 15 38 30 29 6 9 19 62 14
96 1500000US360470553001 360470553001 720 37823 2.652623e+06 307 35 33 35 37 31 52 8 26 50 0
97 1500000US360470553001 360470553001 720 37823 2.652623e+06 307 35 33 35 37 31 52 8 26 50 0
98 1500000US360470553001 360470553001 720 37823 2.652623e+06 307 35 33 35 37 31 52 8 26 50 0
99 1500000US360470553001 360470553001 720 37823 2.652623e+06 307 35 33 35 37 31 52 8 26 50 0
100 1500000US360470553003 360470553003 669 31688 1.475774e+06 289 28 47 0 41 39 29 13 0 72 20
101 1500000US360470553003 360470553003 669 31688 1.475774e+06 289 28 47 0 41 39 29 13 0 72 20
102 1500000US360470553003 360470553003 669 31688 1.475774e+06 289 28 47 0 41 39 29 13 0 72 20
103 1500000US360470553003 360470553003 669 31688 1.475774e+06 289 28 47 0 41 39 29 13 0 72 20
104 1500000US360470553003 360470553003 669 31688 1.475774e+06 289 28 47 0 41 39 29 13 0 72 20
105 1500000US360470557001 360470557001 669 37841 2.597763e+06 390 47 50 49 34 41 29 20 27 66 27
106 1500000US360470557001 360470557001 669 37841 2.597763e+06 390 47 50 49 34 41 29 20 27 66 27
107 1500000US360470517001 360470517001 864 61023 1.531597e+06 161 7 29 22 44 18 18 9 9 0 5
108 1500000US360470517001 360470517001 864 61023 1.531597e+06 161 7 29 22 44 18 18 9 9 0 5
109 1500000US360470517001 360470517001 864 61023 1.531597e+06 161 7 29 22 44 18 18 9 9 0 5
110 1500000US360470517001 360470517001 864 61023 1.531597e+06 161 7 29 22 44 18 18 9 9 0 5
111 1500000US360470517002 360470517002 781 45469 4.429708e+06 432 41 38 76 106 57 32 16 14 44 8
112 1500000US360470517002 360470517002 781 45469 4.429708e+06 432 41 38 76 106 57 32 16 14 44 8
113 1500000US360470517002 360470517002 781 45469 4.429708e+06 432 41 38 76 106 57 32 16 14 44 8
114 1500000US360470517002 360470517002 781 45469 4.429708e+06 432 41 38 76 106 57 32 16 14 44 8
115 1500000US360470557002 360470557002 638 45875 4.059491e+06 127 17 15 0 36 13 0 0 14 32 0
116 1500000US360470557002 360470557002 638 45875 4.059491e+06 127 17 15 0 36 13 0 0 14 32 0
117 1500000US360470557002 360470557002 638 45875 4.059491e+06 127 17 15 0 36 13 0 0 14 32 0
118 1500000US360470557002 360470557002 638 45875 4.059491e+06 127 17 15 0 36 13 0 0 14 32 0
119 1500000US360470557002 360470557002 638 45875 4.059491e+06 127 17 15 0 36 13 0 0 14 32 0
120 1500000US360470557002 360470557002 638 45875 4.059491e+06 127 17 15 0 36 13 0 0 14 32 0
121 1500000US360470557002 360470557002 638 45875 4.059491e+06 127 17 15 0 36 13 0 0 14 32 0
122 1500000US360470557002 360470557002 638 45875 4.059491e+06 127 17 15 0 36 13 0 0 14 32 0

123 rows × 16 columns


In [62]:
nyc_shape.drop_duplicates(inplace=True)
nyc_shape


Out[62]:
GEO.id GEO.id2 MediamGros Median hou TotalTax20 VD01 VD02 VD03 VD04 VD05 VD06 VD07 VD08 VD09 VD10 VD11
0 1500000US360470551003 360470551003 925 54313 3.008752e+06 104 10 10 27 20 0 0 0 0 28 9
3 1500000US360470551002 360470551002 563 30000 2.023738e+06 454 40 46 52 93 49 41 6 9 88 30
6 1500000US360470519003 360470519003 755 35278 2.765746e+06 575 17 105 68 60 32 31 27 60 143 32
14 1500000US360470551001 360470551001 666 26212 2.816056e+06 424 12 67 57 40 50 40 0 43 102 13
22 1500000US360470523004 360470523004 533 35125 9.540152e+05 170 16 47 0 31 7 8 6 19 28 8
24 1500000US360470523005 360470523005 461 20357 1.433303e+06 284 23 19 56 29 56 8 0 10 50 33
26 1500000US360470549002 360470549002 633 36875 2.712518e+06 199 59 23 17 5 18 0 18 21 34 4
38 1500000US360470551004 360470551004 431 26429 3.405331e+06 119 6 10 29 14 0 0 27 7 19 7
40 1500000US360470523003 360470523003 567 36429 9.807110e+05 344 64 50 8 67 37 23 7 26 52 10
45 1500000US360470523001 360470523001 354 20129 2.034168e+06 535 89 73 15 80 54 46 42 40 57 39
49 1500000US360470523002 360470523002 679 16979 3.269767e+06 456 32 45 45 53 7 37 26 25 171 15
52 1500000US360470525001 360470525001 315 12750 1.613152e+06 733 53 84 73 92 97 86 59 52 107 30
56 1500000US360470519002 360470519002 775 34531 4.265974e+06 224 7 23 15 47 7 32 8 14 64 7
64 1500000US360470553002 360470553002 557 35463 3.284624e+06 376 73 48 46 37 65 14 30 13 43 7
71 1500000US360470519001 360470519001 695 32008 5.146695e+06 347 23 27 34 55 33 26 22 8 101 18
82 1500000US360470555001 360470555001 1224 56016 1.514461e+07 222 0 15 38 30 29 6 9 19 62 14
96 1500000US360470553001 360470553001 720 37823 2.652623e+06 307 35 33 35 37 31 52 8 26 50 0
100 1500000US360470553003 360470553003 669 31688 1.475774e+06 289 28 47 0 41 39 29 13 0 72 20
105 1500000US360470557001 360470557001 669 37841 2.597763e+06 390 47 50 49 34 41 29 20 27 66 27
107 1500000US360470517001 360470517001 864 61023 1.531597e+06 161 7 29 22 44 18 18 9 9 0 5
111 1500000US360470517002 360470517002 781 45469 4.429708e+06 432 41 38 76 106 57 32 16 14 44 8
115 1500000US360470557002 360470557002 638 45875 4.059491e+06 127 17 15 0 36 13 0 0 14 32 0

In [63]:
np.sum(nyc_shape.TotalTax20)


Out[63]:
71606118.739999995

In [64]:
np.sum(nyc_shape.VD01.astype(float))


Out[64]:
7272.0

In [65]:
#Unzipping downloaded nyc shapefile in a dataframe
zipfile.ZipFile(os.path.join("data/will2010.zip")).extractall(r"data/will2010")

In [66]:
# loading shape file for NYC
nyc_shape = gpd.read_file("data/will2010/will210.shp")
nyc_shape


Out[66]:
CensusBloc CensusTrac GEO.id GEO.id2 Margin o_1 Margin of MediamGros MedianHous TotalTax20 VD01 ... VD03 VD04 VD05 VD06 VD07 VD08 VD09 VD10 VD11 geometry
0 3003 055300 1500000US360470553003 360470553003 30680 843 1164 61875 1.451061e+06 289 ... 47 0 41 39 29 13 0 72 20 POLYGON ((995533.5128540844 200319.9265096784,...
1 3001 055300 1500000US360470553003 360470553003 30680 843 1164 61875 1.451061e+06 289 ... 47 0 41 39 29 13 0 72 20 POLYGON ((995853.0955482572 200707.734461084, ...
2 3000 055300 1500000US360470553003 360470553003 30680 843 1164 61875 1.451061e+06 289 ... 47 0 41 39 29 13 0 72 20 POLYGON ((996005.1483937502 200891.6530565023,...
3 3002 055300 1500000US360470553003 360470553003 30680 843 1164 61875 1.451061e+06 289 ... 47 0 41 39 29 13 0 72 20 POLYGON ((995647.4706314206 200459.5823983401,...
4 3004 055300 1500000US360470553003 360470553003 30680 843 1164 61875 1.451061e+06 289 ... 47 0 41 39 29 13 0 72 20 POLYGON ((995344.1913823336 200092.6454681754,...
5 3007 051900 1500000US360470519003 360470519003 22896 222 1748 60134 1.214220e+06 575 ... 105 68 60 32 31 27 60 143 32 POLYGON ((995799.4480177611 199247.6071320027,...
6 3006 051900 1500000US360470519003 360470519003 22896 222 1748 60134 1.214220e+06 575 ... 105 68 60 32 31 27 60 143 32 POLYGON ((996242.3155703396 199078.3377855122,...
7 3002 051900 1500000US360470519003 360470519003 22896 222 1748 60134 1.214220e+06 575 ... 105 68 60 32 31 27 60 143 32 POLYGON ((995901.2942707539 199457.4886020124,...
8 3008 051900 1500000US360470519003 360470519003 22896 222 1748 60134 1.214220e+06 575 ... 105 68 60 32 31 27 60 143 32 POLYGON ((996620.7295116782 198912.101569429, ...
9 3000 051900 1500000US360470519003 360470519003 22896 222 1748 60134 1.214220e+06 575 ... 105 68 60 32 31 27 60 143 32 POLYGON ((996699.6548787504 199074.0789357573,...
10 3001 051900 1500000US360470519003 360470519003 22896 222 1748 60134 1.214220e+06 575 ... 105 68 60 32 31 27 60 143 32 POLYGON ((996340.8009376675 199277.4732140005,...
11 3003 051900 1500000US360470519003 360470519003 22896 222 1748 60134 1.214220e+06 575 ... 105 68 60 32 31 27 60 143 32 POLYGON ((996026.0443493426 199704.0540537536,...
12 3005 051900 1500000US360470519003 360470519003 22896 222 1748 60134 1.214220e+06 575 ... 105 68 60 32 31 27 60 143 32 POLYGON ((996208.1197725832 199561.3135255873,...
13 2005 055300 1500000US360470553002 360470553002 39190 423 1701 73750 1.342436e+06 376 ... 48 46 37 65 14 30 13 43 7 POLYGON ((995356.172985673 199527.3431210816, ...
14 2004 055300 1500000US360470553002 360470553002 39190 423 1701 73750 1.342436e+06 376 ... 48 46 37 65 14 30 13 43 7 POLYGON ((995009.4830622524 199681.0140735954,...
15 2006 055300 1500000US360470553002 360470553002 39190 423 1701 73750 1.342436e+06 376 ... 48 46 37 65 14 30 13 43 7 POLYGON ((995482.9922858477 199533.9904175103,...
16 2002 055300 1500000US360470553002 360470553002 39190 423 1701 73750 1.342436e+06 376 ... 48 46 37 65 14 30 13 43 7 POLYGON ((995169.795405671 199950.6674056649, ...
17 2003 055300 1500000US360470553002 360470553002 39190 423 1701 73750 1.342436e+06 376 ... 48 46 37 65 14 30 13 43 7 POLYGON ((995076.6492665112 199904.7813425958,...
18 2000 055300 1500000US360470553002 360470553002 39190 423 1701 73750 1.342436e+06 376 ... 48 46 37 65 14 30 13 43 7 POLYGON ((995713.8261417598 199807.4751067609,...
19 2001 055300 1500000US360470553002 360470553002 39190 423 1701 73750 1.342436e+06 376 ... 48 46 37 65 14 30 13 43 7 POLYGON ((995376.6020786762 199886.8092656732,...
20 1002 055700 1500000US360470557001 360470557001 18396 388 1712 53438 1.129687e+06 390 ... 50 49 34 41 29 20 27 66 27 POLYGON ((995633.658979252 201170.5120788366, ...
21 1001 055700 1500000US360470557001 360470557001 18396 388 1712 53438 1.129687e+06 390 ... 50 49 34 41 29 20 27 66 27 POLYGON ((995278.7830485851 201538.1438895017,...
22 1007 055100 1500000US360470551001 360470551001 50458 419 1640 65200 5.302822e+05 424 ... 67 57 40 50 40 0 43 102 13 POLYGON ((994227.2192068398 199176.260161832, ...
23 1006 055100 1500000US360470551001 360470551001 50458 419 1640 65200 5.302822e+05 424 ... 67 57 40 50 40 0 43 102 13 POLYGON ((994347.9138473421 199430.6566346735,...
24 1001 055100 1500000US360470551001 360470551001 50458 419 1640 65200 5.302822e+05 424 ... 67 57 40 50 40 0 43 102 13 POLYGON ((993932.8346250057 199848.5130824149,...
25 1008 055100 1500000US360470551001 360470551001 50458 419 1640 65200 5.302822e+05 424 ... 67 57 40 50 40 0 43 102 13 POLYGON ((993814.7603860945 198998.8589418381,...
26 1004 055100 1500000US360470551001 360470551001 50458 419 1640 65200 5.302822e+05 424 ... 67 57 40 50 40 0 43 102 13 POLYGON ((993678.6031780839 199306.3875262588,...
27 1005 055100 1500000US360470551001 360470551001 50458 419 1640 65200 5.302822e+05 424 ... 67 57 40 50 40 0 43 102 13 POLYGON ((993908.37010701 199601.3478943408, 9...
28 1009 055100 1500000US360470551001 360470551001 50458 419 1640 65200 5.302822e+05 424 ... 67 57 40 50 40 0 43 102 13 POLYGON ((993739.699512586 199103.1251375079, ...
29 1000 055100 1500000US360470551001 360470551001 50458 419 1640 65200 5.302822e+05 424 ... 67 57 40 50 40 0 43 102 13 POLYGON ((994513.8196835071 199779.4836929291,...
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
93 1020 055500 1500000US360470555001 360470555001 18454 206 1997 96350 6.779472e+05 222 ... 15 38 30 29 6 9 19 62 14 POLYGON ((994840.820998013 200195.1485439986, ...
94 1001 055500 1500000US360470555001 360470555001 18454 206 1997 96350 6.779472e+05 222 ... 15 38 30 29 6 9 19 62 14 POLYGON ((995113.575045675 201337.3762464225, ...
95 1006 055500 1500000US360470555001 360470555001 18454 206 1997 96350 6.779472e+05 222 ... 15 38 30 29 6 9 19 62 14 POLYGON ((995327.1261277497 200791.7759596705,...
96 1008 055500 1500000US360470555001 360470555001 18454 206 1997 96350 6.779472e+05 222 ... 15 38 30 29 6 9 19 62 14 POLYGON ((995178.7031962574 200612.5788115114,...
97 1004 055500 1500000US360470555001 360470555001 18454 206 1997 96350 6.779472e+05 222 ... 15 38 30 29 6 9 19 62 14 POLYGON ((994953.4670981765 201261.4754795879,...
98 1009 055500 1500000US360470555001 360470555001 18454 206 1997 96350 6.779472e+05 222 ... 15 38 30 29 6 9 19 62 14 POLYGON ((995040.0079355836 200440.6378824264,...
99 1016 055500 1500000US360470555001 360470555001 18454 206 1997 96350 6.779472e+05 222 ... 15 38 30 29 6 9 19 62 14 POLYGON ((994079.541352585 200591.4922395051, ...
100 1021 055500 1500000US360470555001 360470555001 18454 206 1997 96350 6.779472e+05 222 ... 15 38 30 29 6 9 19 62 14 POLYGON ((994134.9447850883 200138.5695889294,...
101 3002 055100 1500000US360470551003 360470551003 24594 155 1104 57599 3.673678e+05 104 ... 10 27 20 0 0 0 0 28 9 POLYGON ((994636.245619759 198955.3688713461, ...
102 3001 055100 1500000US360470551003 360470551003 24594 155 1104 57599 3.673678e+05 104 ... 10 27 20 0 0 0 0 28 9 POLYGON ((994763.4336855859 199218.6606675833,...
103 3000 055100 1500000US360470551003 360470551003 24594 155 1104 57599 3.673678e+05 104 ... 10 27 20 0 0 0 0 28 9 POLYGON ((994863.0555335879 199425.526723668, ...
104 1006 052500 1500000US360470525001 360470525001 1366 51 325 15867 1.812191e+06 733 ... 84 73 92 97 86 59 52 107 30 POLYGON ((994781.3772192597 198040.8149261773,...
105 1005 052500 1500000US360470525001 360470525001 1366 51 325 15867 1.812191e+06 733 ... 84 73 92 97 86 59 52 107 30 POLYGON ((995121.7342827102 197775.7393795627,...
106 1003 052500 1500000US360470525001 360470525001 1366 51 325 15867 1.812191e+06 733 ... 84 73 92 97 86 59 52 107 30 POLYGON ((995623.9704738221 197801.7597871438,...
107 1002 052500 1500000US360470525001 360470525001 1366 51 325 15867 1.812191e+06 733 ... 84 73 92 97 86 59 52 107 30 POLYGON ((995729.7253417312 198018.6963564213,...
108 2002 055100 1500000US360470551002 360470551002 28122 272 1104 52656 5.806674e+05 454 ... 46 52 93 49 41 6 9 88 30 POLYGON ((995079.35070467 198795.2615800053, 9...
109 2001 055100 1500000US360470551002 360470551002 28122 272 1104 52656 5.806674e+05 454 ... 46 52 93 49 41 6 9 88 30 POLYGON ((995207.5853563398 199072.0713938326,...
110 2000 055100 1500000US360470551002 360470551002 28122 272 1104 52656 5.806674e+05 454 ... 46 52 93 49 41 6 9 88 30 POLYGON ((995283.7121725976 199224.5792909265,...
111 2010 055700 1500000US360470557002 360470557002 34954 572 1888 85962 7.972190e+05 127 ... 15 0 36 13 0 0 14 32 0 POLYGON ((995397.984581843 201679.8765456676, ...
112 2013 055700 1500000US360470557002 360470557002 34954 572 1888 85962 7.972190e+05 127 ... 15 0 36 13 0 0 14 32 0 POLYGON ((996055.7057073414 202040.8541701734,...
113 2014 055700 1500000US360470557002 360470557002 34954 572 1888 85962 7.972190e+05 127 ... 15 0 36 13 0 0 14 32 0 POLYGON ((995789.0546658337 201360.0556630045,...
114 2011 055700 1500000US360470557002 360470557002 34954 572 1888 85962 7.972190e+05 127 ... 15 0 36 13 0 0 14 32 0 POLYGON ((995589.0714700073 201919.8980472535,...
115 2012 055700 1500000US360470557002 360470557002 34954 572 1888 85962 7.972190e+05 127 ... 15 0 36 13 0 0 14 32 0 POLYGON ((995955.0812366754 201560.1704202592,...
116 2008 055700 1500000US360470557002 360470557002 34954 572 1888 85962 7.972190e+05 127 ... 15 0 36 13 0 0 14 32 0 POLYGON ((995726.6689638346 202082.8219339997,...
117 2006 055700 1500000US360470557002 360470557002 34954 572 1888 85962 7.972190e+05 127 ... 15 0 36 13 0 0 14 32 0 POLYGON ((996072.4481279254 202103.6076535881,...
118 2007 055700 1500000US360470557002 360470557002 34954 572 1888 85962 7.972190e+05 127 ... 15 0 36 13 0 0 14 32 0 POLYGON ((995963.3240023404 202373.7704990953,...
119 4001 052300 1500000US360470523004 360470523004 30817 620 641 44786 8.704884e+05 170 ... 47 0 31 7 8 6 19 28 8 POLYGON ((995619.7731802613 198874.0028923452,...
120 4000 052300 1500000US360470523004 360470523004 30817 620 641 44786 8.704884e+05 170 ... 47 0 31 7 8 6 19 28 8 POLYGON ((995652.9286257625 199019.8844903409,...
121 4001 055100 1500000US360470551004 360470551004 34606 394 1012 67697 2.040639e+05 119 ... 10 29 14 0 0 27 7 19 7 POLYGON ((994517.110031262 198702.0937873423, ...
122 4000 055100 1500000US360470551004 360470551004 34606 394 1012 67697 2.040639e+05 119 ... 10 29 14 0 0 27 7 19 7 POLYGON ((994940.2046575099 198501.0502261668,...

123 rows × 21 columns


In [67]:
nyc_shape.drop(['CensusBloc', 'CensusTrac', 'geometry'], axis =1, inplace=True)
nyc_shape


Out[67]:
GEO.id GEO.id2 Margin o_1 Margin of MediamGros MedianHous TotalTax20 VD01 VD02 VD03 VD04 VD05 VD06 VD07 VD08 VD09 VD10 VD11
0 1500000US360470553003 360470553003 30680 843 1164 61875 1.451061e+06 289 28 47 0 41 39 29 13 0 72 20
1 1500000US360470553003 360470553003 30680 843 1164 61875 1.451061e+06 289 28 47 0 41 39 29 13 0 72 20
2 1500000US360470553003 360470553003 30680 843 1164 61875 1.451061e+06 289 28 47 0 41 39 29 13 0 72 20
3 1500000US360470553003 360470553003 30680 843 1164 61875 1.451061e+06 289 28 47 0 41 39 29 13 0 72 20
4 1500000US360470553003 360470553003 30680 843 1164 61875 1.451061e+06 289 28 47 0 41 39 29 13 0 72 20
5 1500000US360470519003 360470519003 22896 222 1748 60134 1.214220e+06 575 17 105 68 60 32 31 27 60 143 32
6 1500000US360470519003 360470519003 22896 222 1748 60134 1.214220e+06 575 17 105 68 60 32 31 27 60 143 32
7 1500000US360470519003 360470519003 22896 222 1748 60134 1.214220e+06 575 17 105 68 60 32 31 27 60 143 32
8 1500000US360470519003 360470519003 22896 222 1748 60134 1.214220e+06 575 17 105 68 60 32 31 27 60 143 32
9 1500000US360470519003 360470519003 22896 222 1748 60134 1.214220e+06 575 17 105 68 60 32 31 27 60 143 32
10 1500000US360470519003 360470519003 22896 222 1748 60134 1.214220e+06 575 17 105 68 60 32 31 27 60 143 32
11 1500000US360470519003 360470519003 22896 222 1748 60134 1.214220e+06 575 17 105 68 60 32 31 27 60 143 32
12 1500000US360470519003 360470519003 22896 222 1748 60134 1.214220e+06 575 17 105 68 60 32 31 27 60 143 32
13 1500000US360470553002 360470553002 39190 423 1701 73750 1.342436e+06 376 73 48 46 37 65 14 30 13 43 7
14 1500000US360470553002 360470553002 39190 423 1701 73750 1.342436e+06 376 73 48 46 37 65 14 30 13 43 7
15 1500000US360470553002 360470553002 39190 423 1701 73750 1.342436e+06 376 73 48 46 37 65 14 30 13 43 7
16 1500000US360470553002 360470553002 39190 423 1701 73750 1.342436e+06 376 73 48 46 37 65 14 30 13 43 7
17 1500000US360470553002 360470553002 39190 423 1701 73750 1.342436e+06 376 73 48 46 37 65 14 30 13 43 7
18 1500000US360470553002 360470553002 39190 423 1701 73750 1.342436e+06 376 73 48 46 37 65 14 30 13 43 7
19 1500000US360470553002 360470553002 39190 423 1701 73750 1.342436e+06 376 73 48 46 37 65 14 30 13 43 7
20 1500000US360470557001 360470557001 18396 388 1712 53438 1.129687e+06 390 47 50 49 34 41 29 20 27 66 27
21 1500000US360470557001 360470557001 18396 388 1712 53438 1.129687e+06 390 47 50 49 34 41 29 20 27 66 27
22 1500000US360470551001 360470551001 50458 419 1640 65200 5.302822e+05 424 12 67 57 40 50 40 0 43 102 13
23 1500000US360470551001 360470551001 50458 419 1640 65200 5.302822e+05 424 12 67 57 40 50 40 0 43 102 13
24 1500000US360470551001 360470551001 50458 419 1640 65200 5.302822e+05 424 12 67 57 40 50 40 0 43 102 13
25 1500000US360470551001 360470551001 50458 419 1640 65200 5.302822e+05 424 12 67 57 40 50 40 0 43 102 13
26 1500000US360470551001 360470551001 50458 419 1640 65200 5.302822e+05 424 12 67 57 40 50 40 0 43 102 13
27 1500000US360470551001 360470551001 50458 419 1640 65200 5.302822e+05 424 12 67 57 40 50 40 0 43 102 13
28 1500000US360470551001 360470551001 50458 419 1640 65200 5.302822e+05 424 12 67 57 40 50 40 0 43 102 13
29 1500000US360470551001 360470551001 50458 419 1640 65200 5.302822e+05 424 12 67 57 40 50 40 0 43 102 13
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
93 1500000US360470555001 360470555001 18454 206 1997 96350 6.779472e+05 222 0 15 38 30 29 6 9 19 62 14
94 1500000US360470555001 360470555001 18454 206 1997 96350 6.779472e+05 222 0 15 38 30 29 6 9 19 62 14
95 1500000US360470555001 360470555001 18454 206 1997 96350 6.779472e+05 222 0 15 38 30 29 6 9 19 62 14
96 1500000US360470555001 360470555001 18454 206 1997 96350 6.779472e+05 222 0 15 38 30 29 6 9 19 62 14
97 1500000US360470555001 360470555001 18454 206 1997 96350 6.779472e+05 222 0 15 38 30 29 6 9 19 62 14
98 1500000US360470555001 360470555001 18454 206 1997 96350 6.779472e+05 222 0 15 38 30 29 6 9 19 62 14
99 1500000US360470555001 360470555001 18454 206 1997 96350 6.779472e+05 222 0 15 38 30 29 6 9 19 62 14
100 1500000US360470555001 360470555001 18454 206 1997 96350 6.779472e+05 222 0 15 38 30 29 6 9 19 62 14
101 1500000US360470551003 360470551003 24594 155 1104 57599 3.673678e+05 104 10 10 27 20 0 0 0 0 28 9
102 1500000US360470551003 360470551003 24594 155 1104 57599 3.673678e+05 104 10 10 27 20 0 0 0 0 28 9
103 1500000US360470551003 360470551003 24594 155 1104 57599 3.673678e+05 104 10 10 27 20 0 0 0 0 28 9
104 1500000US360470525001 360470525001 1366 51 325 15867 1.812191e+06 733 53 84 73 92 97 86 59 52 107 30
105 1500000US360470525001 360470525001 1366 51 325 15867 1.812191e+06 733 53 84 73 92 97 86 59 52 107 30
106 1500000US360470525001 360470525001 1366 51 325 15867 1.812191e+06 733 53 84 73 92 97 86 59 52 107 30
107 1500000US360470525001 360470525001 1366 51 325 15867 1.812191e+06 733 53 84 73 92 97 86 59 52 107 30
108 1500000US360470551002 360470551002 28122 272 1104 52656 5.806674e+05 454 40 46 52 93 49 41 6 9 88 30
109 1500000US360470551002 360470551002 28122 272 1104 52656 5.806674e+05 454 40 46 52 93 49 41 6 9 88 30
110 1500000US360470551002 360470551002 28122 272 1104 52656 5.806674e+05 454 40 46 52 93 49 41 6 9 88 30
111 1500000US360470557002 360470557002 34954 572 1888 85962 7.972190e+05 127 17 15 0 36 13 0 0 14 32 0
112 1500000US360470557002 360470557002 34954 572 1888 85962 7.972190e+05 127 17 15 0 36 13 0 0 14 32 0
113 1500000US360470557002 360470557002 34954 572 1888 85962 7.972190e+05 127 17 15 0 36 13 0 0 14 32 0
114 1500000US360470557002 360470557002 34954 572 1888 85962 7.972190e+05 127 17 15 0 36 13 0 0 14 32 0
115 1500000US360470557002 360470557002 34954 572 1888 85962 7.972190e+05 127 17 15 0 36 13 0 0 14 32 0
116 1500000US360470557002 360470557002 34954 572 1888 85962 7.972190e+05 127 17 15 0 36 13 0 0 14 32 0
117 1500000US360470557002 360470557002 34954 572 1888 85962 7.972190e+05 127 17 15 0 36 13 0 0 14 32 0
118 1500000US360470557002 360470557002 34954 572 1888 85962 7.972190e+05 127 17 15 0 36 13 0 0 14 32 0
119 1500000US360470523004 360470523004 30817 620 641 44786 8.704884e+05 170 16 47 0 31 7 8 6 19 28 8
120 1500000US360470523004 360470523004 30817 620 641 44786 8.704884e+05 170 16 47 0 31 7 8 6 19 28 8
121 1500000US360470551004 360470551004 34606 394 1012 67697 2.040639e+05 119 6 10 29 14 0 0 27 7 19 7
122 1500000US360470551004 360470551004 34606 394 1012 67697 2.040639e+05 119 6 10 29 14 0 0 27 7 19 7

123 rows × 18 columns


In [68]:
nyc_shape.drop_duplicates(inplace=True)
nyc_shape


Out[68]:
GEO.id GEO.id2 Margin o_1 Margin of MediamGros MedianHous TotalTax20 VD01 VD02 VD03 VD04 VD05 VD06 VD07 VD08 VD09 VD10 VD11
0 1500000US360470553003 360470553003 30680 843 1164 61875 1.451061e+06 289 28 47 0 41 39 29 13 0 72 20
5 1500000US360470519003 360470519003 22896 222 1748 60134 1.214220e+06 575 17 105 68 60 32 31 27 60 143 32
13 1500000US360470553002 360470553002 39190 423 1701 73750 1.342436e+06 376 73 48 46 37 65 14 30 13 43 7
20 1500000US360470557001 360470557001 18396 388 1712 53438 1.129687e+06 390 47 50 49 34 41 29 20 27 66 27
22 1500000US360470551001 360470551001 50458 419 1640 65200 5.302822e+05 424 12 67 57 40 50 40 0 43 102 13
30 1500000US360470523003 360470523003 29650 480 1004 47775 1.444762e+06 344 64 50 8 67 37 23 7 26 52 10
35 1500000US360470523002 360470523002 52860 358 1138 39231 2.220642e+06 456 32 45 45 53 7 37 26 25 171 15
38 1500000US360470517002 360470517002 24219 439 1542 81827 1.703405e+06 432 41 38 76 106 57 32 16 14 44 8
42 1500000US360470553001 360470553001 34648 *** 2,000+ 83828 9.771189e+05 307 35 33 35 37 31 52 8 26 50 0
46 1500000US360470519001 360470519001 21572 *** 2,000+ 80733 1.516244e+06 347 23 27 34 55 33 26 22 8 101 18
57 1500000US360470523001 360470523001 13653 258 839 32981 1.048107e+06 535 89 73 15 80 54 46 42 40 57 39
61 1500000US360470519002 360470519002 13226 263 1750 90542 9.252660e+05 224 7 23 15 47 7 32 8 14 64 7
69 1500000US360470549002 360470549002 52323 *** 2,000+ 72019 1.294321e+06 199 59 23 17 5 18 0 18 21 34 4
81 1500000US360470517001 360470517001 26052 422 1433 60625 4.270027e+06 161 7 29 22 44 18 18 9 9 0 5
85 1500000US360470523005 360470523005 14807 120 426 29236 5.039769e+05 284 23 19 56 29 56 8 0 10 50 33
87 1500000US360470555001 360470555001 18454 206 1997 96350 6.779472e+05 222 0 15 38 30 29 6 9 19 62 14
101 1500000US360470551003 360470551003 24594 155 1104 57599 3.673678e+05 104 10 10 27 20 0 0 0 0 28 9
104 1500000US360470525001 360470525001 1366 51 325 15867 1.812191e+06 733 53 84 73 92 97 86 59 52 107 30
108 1500000US360470551002 360470551002 28122 272 1104 52656 5.806674e+05 454 40 46 52 93 49 41 6 9 88 30
111 1500000US360470557002 360470557002 34954 572 1888 85962 7.972190e+05 127 17 15 0 36 13 0 0 14 32 0
119 1500000US360470523004 360470523004 30817 620 641 44786 8.704884e+05 170 16 47 0 31 7 8 6 19 28 8
121 1500000US360470551004 360470551004 34606 394 1012 67697 2.040639e+05 119 6 10 29 14 0 0 27 7 19 7

In [69]:
np.sum(nyc_shape.TotalTax20)


Out[69]:
26881499.657499995

In [70]:
np.sum(nyc_shape.VD01.astype(float))


Out[70]:
7272.0

In [ ]: