Wrangling Data

Author: Avikal Somvanshi


In [1]:
from __future__ import print_function, division
import pandas as pd
import pylab as pl
import os
import csv
from pandas.tools.plotting import scatter_matrix
import sys
reload(sys)
import zipfile
import geopandas as gpd
sys.setdefaultencoding('utf-8')
%pylab inline

In [2]:
#Unzippping downloaded median income data of 1999 in a dataframe
zipfile.ZipFile(os.path.join("data/income_00.zip")).extractall(r"data/income_00")

In [3]:
#Reading and saving downloaded median income data of 1999 in a dataframe
data_2000 = pd.read_csv('data/income_00/DEC_00_SF3_P052_with_ann.csv')
data_2000.head()


Out[3]:
GEO.id GEO.id2 GEO.display-label VD01 VD02 VD03 VD04 VD05 VD06 VD07 VD08 VD09 VD10 VD11 VD12 VD13 VD14 VD15 VD16 VD17
0 Id Id2 Geography Total: Less than $10,000 $10,000 to $14,999 $15,000 to $19,999 $20,000 to $24,999 $25,000 to $29,999 $30,000 to $34,999 $35,000 to $39,999 $40,000 to $44,999 $45,000 to $49,999 $50,000 to $59,999 $60,000 to $74,999 $75,000 to $99,999 $100,000 to $124,999 $125,000 to $149,999 $150,000 to $199,999 $200,000 or more
1 1500000US360470001001 360470001001 Block Group 1, Census Tract 1, Kings County, N... 859 48 45 16 31 18 59 35 31 8 70 105 68 115 52 39 119
2 1500000US360470001002 360470001002 Block Group 2, Census Tract 1, Kings County, N... 1303 63 0 66 44 22 75 54 94 86 147 174 57 151 131 87 52
3 1500000US360470001003 360470001003 Block Group 3, Census Tract 1, Kings County, N... 246 16 6 10 23 10 13 8 13 12 16 31 23 24 8 15 18
4 1500000US360470002001 360470002001 Block Group 1, Census Tract 2, Kings County, N... 118 6 10 15 0 12 9 16 3 10 15 0 15 7 0 0 0

In [4]:
#Unzipping downloaded median income data of 2013 in a dataframe
zipfile.ZipFile(os.path.join("data/income_13.zip")).extractall(r"data/income_13")

In [5]:
#Reading and saving downloaded median income data of 2013 in a dataframe
data_2013 = pd.read_csv('data/income_13/ACS_13_5YR_B19001_with_ann.csv')
data_2013.head()


Out[5]:
GEO.id GEO.id2 GEO.display-label HD01_VD01 HD02_VD01 HD01_VD02 HD02_VD02 HD01_VD03 HD02_VD03 HD01_VD04 ... HD01_VD13 HD02_VD13 HD01_VD14 HD02_VD14 HD01_VD15 HD02_VD15 HD01_VD16 HD02_VD16 HD01_VD17 HD02_VD17
0 Id Id2 Geography Estimate; Total: Margin of Error; Total: Estimate; Total: - Less than $10,000 Margin of Error; Total: - Less than $10,000 Estimate; Total: - $10,000 to $14,999 Margin of Error; Total: - $10,000 to $14,999 Estimate; Total: - $15,000 to $19,999 ... Estimate; Total: - $75,000 to $99,999 Margin of Error; Total: - $75,000 to $99,999 Estimate; Total: - $100,000 to $124,999 Margin of Error; Total: - $100,000 to $124,999 Estimate; Total: - $125,000 to $149,999 Margin of Error; Total: - $125,000 to $149,999 Estimate; Total: - $150,000 to $199,999 Margin of Error; Total: - $150,000 to $199,999 Estimate; Total: - $200,000 or more Margin of Error; Total: - $200,000 or more
1 1500000US360470001001 360470001001 Block Group 1, Census Tract 1, Kings County, N... 308 73 54 48 14 22 0 ... 37 37 22 25 5 8 39 35 87 70
2 1500000US360470001002 360470001002 Block Group 2, Census Tract 1, Kings County, N... 291 86 33 36 15 23 0 ... 0 12 13 22 0 12 0 12 119 63
3 1500000US360470001003 360470001003 Block Group 3, Census Tract 1, Kings County, N... 710 114 33 39 51 80 0 ... 151 75 96 53 33 39 118 73 71 42
4 1500000US360470001004 360470001004 Block Group 4, Census Tract 1, Kings County, N... 797 126 47 43 30 33 21 ... 107 59 23 28 59 43 32 37 123 73

5 rows × 37 columns


In [6]:
data_2013.columns


Out[6]:
Index([u'GEO.id', u'GEO.id2', u'GEO.display-label', u'HD01_VD01', u'HD02_VD01',
       u'HD01_VD02', u'HD02_VD02', u'HD01_VD03', u'HD02_VD03', u'HD01_VD04',
       u'HD02_VD04', u'HD01_VD05', u'HD02_VD05', u'HD01_VD06', u'HD02_VD06',
       u'HD01_VD07', u'HD02_VD07', u'HD01_VD08', u'HD02_VD08', u'HD01_VD09',
       u'HD02_VD09', u'HD01_VD10', u'HD02_VD10', u'HD01_VD11', u'HD02_VD11',
       u'HD01_VD12', u'HD02_VD12', u'HD01_VD13', u'HD02_VD13', u'HD01_VD14',
       u'HD02_VD14', u'HD01_VD15', u'HD02_VD15', u'HD01_VD16', u'HD02_VD16',
       u'HD01_VD17', u'HD02_VD17'],
      dtype='object')

In [7]:
data_2013 = data_2013[[u'GEO.id', u'GEO.id2', u'GEO.display-label', u'HD01_VD01', u'HD01_VD02', u'HD01_VD03', u'HD01_VD04', 
                       u'HD01_VD05', u'HD01_VD06', u'HD01_VD07', u'HD01_VD08', u'HD01_VD09', u'HD01_VD10', u'HD01_VD11', 
                       u'HD01_VD12', u'HD01_VD13', u'HD01_VD14', u'HD01_VD15', u'HD01_VD16', u'HD01_VD17']]

In [8]:
data_2013.head()


Out[8]:
GEO.id GEO.id2 GEO.display-label HD01_VD01 HD01_VD02 HD01_VD03 HD01_VD04 HD01_VD05 HD01_VD06 HD01_VD07 HD01_VD08 HD01_VD09 HD01_VD10 HD01_VD11 HD01_VD12 HD01_VD13 HD01_VD14 HD01_VD15 HD01_VD16 HD01_VD17
0 Id Id2 Geography Estimate; Total: Estimate; Total: - Less than $10,000 Estimate; Total: - $10,000 to $14,999 Estimate; Total: - $15,000 to $19,999 Estimate; Total: - $20,000 to $24,999 Estimate; Total: - $25,000 to $29,999 Estimate; Total: - $30,000 to $34,999 Estimate; Total: - $35,000 to $39,999 Estimate; Total: - $40,000 to $44,999 Estimate; Total: - $45,000 to $49,999 Estimate; Total: - $50,000 to $59,999 Estimate; Total: - $60,000 to $74,999 Estimate; Total: - $75,000 to $99,999 Estimate; Total: - $100,000 to $124,999 Estimate; Total: - $125,000 to $149,999 Estimate; Total: - $150,000 to $199,999 Estimate; Total: - $200,000 or more
1 1500000US360470001001 360470001001 Block Group 1, Census Tract 1, Kings County, N... 308 54 14 0 0 0 0 5 5 6 13 21 37 22 5 39 87
2 1500000US360470001002 360470001002 Block Group 2, Census Tract 1, Kings County, N... 291 33 15 0 0 0 46 40 12 0 0 13 0 13 0 0 119
3 1500000US360470001003 360470001003 Block Group 3, Census Tract 1, Kings County, N... 710 33 51 0 27 13 58 0 0 16 14 29 151 96 33 118 71
4 1500000US360470001004 360470001004 Block Group 4, Census Tract 1, Kings County, N... 797 47 30 21 88 0 35 0 45 0 79 108 107 23 59 32 123

In [9]:
#Unzipping downloaded nyc shapefile in a dataframe
zipfile.ZipFile(os.path.join("data/cb_2015_36_bg_500k.zip")).extractall(r"data/cb_2015_36_bg_500k")

In [10]:
# loading shape file for NYC
nyc_shape = gpd.read_file("data/cb_2015_36_bg_500k/cb_2015_36_bg_500k.shp")
nyc_shape.head()


Out[10]:
AFFGEOID ALAND AWATER BLKGRPCE COUNTYFP GEOID LSAD NAME STATEFP TRACTCE geometry
0 1500000US360610211000 0 307945 0 061 360610211000 BG 0 36 021100 POLYGON ((-73.968082 40.8207, -73.967982575439...
1 1500000US360290131014 557271 351509 4 029 360290131014 BG 4 36 013101 POLYGON ((-78.89711856256349 42.75125713600959...
2 1500000US360050516002 256313 0 2 005 360050516002 BG 2 36 051600 POLYGON ((-73.791504 40.855456, -73.7874279999...
3 1500000US360810183004 33973 0 4 081 360810183004 BG 4 36 018300 POLYGON ((-73.92145099999999 40.743634, -73.92...
4 1500000US360470276003 70988 0 3 047 360470276003 BG 3 36 027600 POLYGON ((-74.001302 40.611068, -74.0010729999...

In [11]:
nyc_shape.columns = [[u'GEO.id',    u'ALAND',   u'AWATER', u'BLKGRPCE', u'COUNTYFP',
          u'GEO.id2',     u'LSAD',     u'NAME',  u'STATEFP',  u'TRACTCE',
       u'geometry']]

In [12]:
nyc_shape.head()


Out[12]:
GEO.id ALAND AWATER BLKGRPCE COUNTYFP GEO.id2 LSAD NAME STATEFP TRACTCE geometry
0 1500000US360610211000 0 307945 0 061 360610211000 BG 0 36 021100 POLYGON ((-73.968082 40.8207, -73.967982575439...
1 1500000US360290131014 557271 351509 4 029 360290131014 BG 4 36 013101 POLYGON ((-78.89711856256349 42.75125713600959...
2 1500000US360050516002 256313 0 2 005 360050516002 BG 2 36 051600 POLYGON ((-73.791504 40.855456, -73.7874279999...
3 1500000US360810183004 33973 0 4 081 360810183004 BG 4 36 018300 POLYGON ((-73.92145099999999 40.743634, -73.92...
4 1500000US360470276003 70988 0 3 047 360470276003 BG 3 36 027600 POLYGON ((-74.001302 40.611068, -74.0010729999...

In [13]:
bky_shape = nyc_shape[[u'GEO.id', u'GEO.id2', u'geometry']]
bky_shape.head()


Out[13]:
GEO.id GEO.id2 geometry
0 1500000US360610211000 360610211000 POLYGON ((-73.968082 40.8207, -73.967982575439...
1 1500000US360290131014 360290131014 POLYGON ((-78.89711856256349 42.75125713600959...
2 1500000US360050516002 360050516002 POLYGON ((-73.791504 40.855456, -73.7874279999...
3 1500000US360810183004 360810183004 POLYGON ((-73.92145099999999 40.743634, -73.92...
4 1500000US360470276003 360470276003 POLYGON ((-74.001302 40.611068, -74.0010729999...

In [ ]:


In [14]:
#Merging 2000 the dataframes to a mother dataframe
income_2000 = pd.merge(data_2000, bky_shape, how='left', on=['GEO.id', 'GEO.id2'])
income_2000.head()


Out[14]:
GEO.id GEO.id2 GEO.display-label VD01 VD02 VD03 VD04 VD05 VD06 VD07 ... VD09 VD10 VD11 VD12 VD13 VD14 VD15 VD16 VD17 geometry
0 Id Id2 Geography Total: Less than $10,000 $10,000 to $14,999 $15,000 to $19,999 $20,000 to $24,999 $25,000 to $29,999 $30,000 to $34,999 ... $40,000 to $44,999 $45,000 to $49,999 $50,000 to $59,999 $60,000 to $74,999 $75,000 to $99,999 $100,000 to $124,999 $125,000 to $149,999 $150,000 to $199,999 $200,000 or more NaN
1 1500000US360470001001 360470001001 Block Group 1, Census Tract 1, Kings County, N... 859 48 45 16 31 18 59 ... 31 8 70 105 68 115 52 39 119 POLYGON ((-73.99669799999999 40.700877, -73.99...
2 1500000US360470001002 360470001002 Block Group 2, Census Tract 1, Kings County, N... 1303 63 0 66 44 22 75 ... 94 86 147 174 57 151 131 87 52 POLYGON ((-73.995379 40.700309, -73.993672 40....
3 1500000US360470001003 360470001003 Block Group 3, Census Tract 1, Kings County, N... 246 16 6 10 23 10 13 ... 13 12 16 31 23 24 8 15 18 POLYGON ((-73.993672 40.699836, -73.9926119999...
4 1500000US360470002001 360470002001 Block Group 1, Census Tract 2, Kings County, N... 118 6 10 15 0 12 9 ... 3 10 15 0 15 7 0 0 0 POLYGON ((-74.012844 40.653016, -74.0150479999...

5 rows × 21 columns


In [15]:
#Merging 2013 the dataframes to a mother dataframe
income_2013 = pd.merge(data_2013, bky_shape, how='left', on=['GEO.id', 'GEO.id2'])
income_2013.head()


Out[15]:
GEO.id GEO.id2 GEO.display-label HD01_VD01 HD01_VD02 HD01_VD03 HD01_VD04 HD01_VD05 HD01_VD06 HD01_VD07 ... HD01_VD09 HD01_VD10 HD01_VD11 HD01_VD12 HD01_VD13 HD01_VD14 HD01_VD15 HD01_VD16 HD01_VD17 geometry
0 Id Id2 Geography Estimate; Total: Estimate; Total: - Less than $10,000 Estimate; Total: - $10,000 to $14,999 Estimate; Total: - $15,000 to $19,999 Estimate; Total: - $20,000 to $24,999 Estimate; Total: - $25,000 to $29,999 Estimate; Total: - $30,000 to $34,999 ... Estimate; Total: - $40,000 to $44,999 Estimate; Total: - $45,000 to $49,999 Estimate; Total: - $50,000 to $59,999 Estimate; Total: - $60,000 to $74,999 Estimate; Total: - $75,000 to $99,999 Estimate; Total: - $100,000 to $124,999 Estimate; Total: - $125,000 to $149,999 Estimate; Total: - $150,000 to $199,999 Estimate; Total: - $200,000 or more NaN
1 1500000US360470001001 360470001001 Block Group 1, Census Tract 1, Kings County, N... 308 54 14 0 0 0 0 ... 5 6 13 21 37 22 5 39 87 POLYGON ((-73.99669799999999 40.700877, -73.99...
2 1500000US360470001002 360470001002 Block Group 2, Census Tract 1, Kings County, N... 291 33 15 0 0 0 46 ... 12 0 0 13 0 13 0 0 119 POLYGON ((-73.995379 40.700309, -73.993672 40....
3 1500000US360470001003 360470001003 Block Group 3, Census Tract 1, Kings County, N... 710 33 51 0 27 13 58 ... 0 16 14 29 151 96 33 118 71 POLYGON ((-73.993672 40.699836, -73.9926119999...
4 1500000US360470001004 360470001004 Block Group 4, Census Tract 1, Kings County, N... 797 47 30 21 88 0 35 ... 45 0 79 108 107 23 59 32 123 POLYGON ((-73.99271 40.698097, -73.991759 40.7...

5 rows × 21 columns


In [ ]:


In [16]:
#Saving the dataset as csv
income_2000.to_csv('income_2000.csv')
income_2013.to_csv('income_2013.csv')

In [17]:
#Checking the CSV
data = pd.read_csv('income_2013.csv')
data.head()


Out[17]:
Unnamed: 0 GEO.id GEO.id2 GEO.display-label HD01_VD01 HD01_VD02 HD01_VD03 HD01_VD04 HD01_VD05 HD01_VD06 ... HD01_VD09 HD01_VD10 HD01_VD11 HD01_VD12 HD01_VD13 HD01_VD14 HD01_VD15 HD01_VD16 HD01_VD17 geometry
0 0 Id Id2 Geography Estimate; Total: Estimate; Total: - Less than $10,000 Estimate; Total: - $10,000 to $14,999 Estimate; Total: - $15,000 to $19,999 Estimate; Total: - $20,000 to $24,999 Estimate; Total: - $25,000 to $29,999 ... Estimate; Total: - $40,000 to $44,999 Estimate; Total: - $45,000 to $49,999 Estimate; Total: - $50,000 to $59,999 Estimate; Total: - $60,000 to $74,999 Estimate; Total: - $75,000 to $99,999 Estimate; Total: - $100,000 to $124,999 Estimate; Total: - $125,000 to $149,999 Estimate; Total: - $150,000 to $199,999 Estimate; Total: - $200,000 or more NaN
1 1 1500000US360470001001 360470001001 Block Group 1, Census Tract 1, Kings County, N... 308 54 14 0 0 0 ... 5 6 13 21 37 22 5 39 87 POLYGON ((-73.99669799999999 40.700877, -73.99...
2 2 1500000US360470001002 360470001002 Block Group 2, Census Tract 1, Kings County, N... 291 33 15 0 0 0 ... 12 0 0 13 0 13 0 0 119 POLYGON ((-73.995379 40.700309, -73.993672 40....
3 3 1500000US360470001003 360470001003 Block Group 3, Census Tract 1, Kings County, N... 710 33 51 0 27 13 ... 0 16 14 29 151 96 33 118 71 POLYGON ((-73.993672 40.699836, -73.9926119999...
4 4 1500000US360470001004 360470001004 Block Group 4, Census Tract 1, Kings County, N... 797 47 30 21 88 0 ... 45 0 79 108 107 23 59 32 123 POLYGON ((-73.99271 40.698097, -73.991759 40.7...

5 rows × 22 columns

Good to go!


In [18]:
data.columns


Out[18]:
Index([u'Unnamed: 0', u'GEO.id', u'GEO.id2', u'GEO.display-label',
       u'HD01_VD01', u'HD01_VD02', u'HD01_VD03', u'HD01_VD04', u'HD01_VD05',
       u'HD01_VD06', u'HD01_VD07', u'HD01_VD08', u'HD01_VD09', u'HD01_VD10',
       u'HD01_VD11', u'HD01_VD12', u'HD01_VD13', u'HD01_VD14', u'HD01_VD15',
       u'HD01_VD16', u'HD01_VD17', u'geometry'],
      dtype='object')

In [19]:
data.columns = [[u'Unnamed: 0', u'GEO.id', u'GEO.id2', u'GEO.display-label',
       u'VD01', u'VD02', u'VD03', u'VD04', u'VD05',
       u'VD06', u'VD07', u'VD08', u'VD09', u'VD10',
       u'VD11', u'VD12', u'VD13', u'VD14', u'VD15',
       u'VD16', u'VD17', u'geometry']]

In [20]:
data.drop([0], inplace=True)
data.head()


Out[20]:
Unnamed: 0 GEO.id GEO.id2 GEO.display-label VD01 VD02 VD03 VD04 VD05 VD06 ... VD09 VD10 VD11 VD12 VD13 VD14 VD15 VD16 VD17 geometry
1 1 1500000US360470001001 360470001001 Block Group 1, Census Tract 1, Kings County, N... 308 54 14 0 0 0 ... 5 6 13 21 37 22 5 39 87 POLYGON ((-73.99669799999999 40.700877, -73.99...
2 2 1500000US360470001002 360470001002 Block Group 2, Census Tract 1, Kings County, N... 291 33 15 0 0 0 ... 12 0 0 13 0 13 0 0 119 POLYGON ((-73.995379 40.700309, -73.993672 40....
3 3 1500000US360470001003 360470001003 Block Group 3, Census Tract 1, Kings County, N... 710 33 51 0 27 13 ... 0 16 14 29 151 96 33 118 71 POLYGON ((-73.993672 40.699836, -73.9926119999...
4 4 1500000US360470001004 360470001004 Block Group 4, Census Tract 1, Kings County, N... 797 47 30 21 88 0 ... 45 0 79 108 107 23 59 32 123 POLYGON ((-73.99271 40.698097, -73.991759 40.7...
5 5 1500000US360470002001 360470002001 Block Group 1, Census Tract 2, Kings County, N... 371 48 5 48 21 29 ... 5 19 47 23 16 33 0 5 0 POLYGON ((-74.012844 40.653016, -74.0150479999...

5 rows × 22 columns


In [21]:
intax_2013 = data[['GEO.id', 'GEO.display-label', 'geometry']]
intax_2013.head()


Out[21]:
GEO.id GEO.display-label geometry
1 1500000US360470001001 Block Group 1, Census Tract 1, Kings County, N... POLYGON ((-73.99669799999999 40.700877, -73.99...
2 1500000US360470001002 Block Group 2, Census Tract 1, Kings County, N... POLYGON ((-73.995379 40.700309, -73.993672 40....
3 1500000US360470001003 Block Group 3, Census Tract 1, Kings County, N... POLYGON ((-73.993672 40.699836, -73.9926119999...
4 1500000US360470001004 Block Group 4, Census Tract 1, Kings County, N... POLYGON ((-73.99271 40.698097, -73.991759 40.7...
5 1500000US360470002001 Block Group 1, Census Tract 2, Kings County, N... POLYGON ((-74.012844 40.653016, -74.0150479999...

In [22]:
intax_2013['B1'] = (data.VD02.astype(float) * 5000 * 0.04) + (data.VD03.astype(float)* 12500 * 0.04)
intax_2013['B2'] = (data.VD04.astype(float) * (((17500 - 17050) * 0.045) + 682)) + (data.VD05.astype(float)* (((22500 - 17050) * 0.045) + 682))
intax_2013['B3'] = (data.VD06.astype(float) * (((27500 - 23450) * 0.0525) + 970)) 
intax_2013['B4'] = (data.VD07.astype(float) * (((32500 - 27750) * 0.059) + 1195.75)) + (data.VD08.astype(float)* (((37500 - 27750) * 0.059) + 1195.75)) + (data.VD09.astype(float)* (((42500 - 27750) * 0.059) + 1195.75))
intax_2013['B5'] = (data.VD10.astype(float) * (((47500 - 42750) * 0.0645) + 2080.75)) + (data.VD11.astype(float) * (((55000 - 42750) * 0.0645) + 2080.75)) + (data.VD12.astype(float) * (((67500 - 42750) * 0.0645) + 2080.75)) + (data.VD13.astype(float) * (((87500 - 42750) * 0.0645) + 2080.75)) + (data.VD14.astype(float) * (((112500 - 42750) * 0.0645) + 2080.75)) + (data.VD15.astype(float) * (((137500 - 42750) * 0.0645) + 2080.75))
intax_2013['B6'] = (data.VD16.astype(float) * (((175000 - 160500) * 0.0665) + 9675.63)) + (data.VD17.astype(float) * (((319000 - 160500) * 0.0665) + 9675.63))
intax_2013.head()


Out[22]:
GEO.id GEO.display-label geometry B1 B2 B3 B4 B5 B6
1 1500000US360470001001 Block Group 1, Census Tract 1, Kings County, N... POLYGON ((-73.99669799999999 40.700877, -73.99... 17800.0 0.00 0.000 19185.0 498359.750 2173736.88
2 1500000US360470001002 Block Group 2, Census Tract 1, Kings County, N... POLYGON ((-73.995379 40.700309, -73.993672 40.... 14100.0 0.00 0.000 163528.0 133337.750 2405689.72
3 1500000US360470001003 Block Group 3, Census Tract 1, Kings County, N... POLYGON ((-73.993672 40.699836, -73.9926119999... 32100.0 25035.75 15374.125 85608.0 1837042.875 2690833.32
4 1500000US360470001004 Block Group 4, Census Tract 1, Kings County, N... POLYGON ((-73.99271 40.698097, -73.991759 40.7... 24400.0 96345.25 0.000 144630.0 1790077.750 2827029.40
5 1500000US360470002001 Block Group 1, Census Tract 2, Kings County, N... POLYGON ((-74.012844 40.653016, -74.0150479999... 12100.0 53180.25 34296.125 124567.0 561462.000 53199.40

In [23]:
intax_2013['TotalTax2013'] = intax_2013.B1 + intax_2013.B2 + intax_2013.B3 + intax_2013.B4 + intax_2013.B5 + intax_2013.B6
intax_2013.head()


Out[23]:
GEO.id GEO.display-label geometry B1 B2 B3 B4 B5 B6 TotalTax2013
1 1500000US360470001001 Block Group 1, Census Tract 1, Kings County, N... POLYGON ((-73.99669799999999 40.700877, -73.99... 17800.0 0.00 0.000 19185.0 498359.750 2173736.88 2709081.630
2 1500000US360470001002 Block Group 2, Census Tract 1, Kings County, N... POLYGON ((-73.995379 40.700309, -73.993672 40.... 14100.0 0.00 0.000 163528.0 133337.750 2405689.72 2716655.470
3 1500000US360470001003 Block Group 3, Census Tract 1, Kings County, N... POLYGON ((-73.993672 40.699836, -73.9926119999... 32100.0 25035.75 15374.125 85608.0 1837042.875 2690833.32 4685994.070
4 1500000US360470001004 Block Group 4, Census Tract 1, Kings County, N... POLYGON ((-73.99271 40.698097, -73.991759 40.7... 24400.0 96345.25 0.000 144630.0 1790077.750 2827029.40 4882482.400
5 1500000US360470002001 Block Group 1, Census Tract 2, Kings County, N... POLYGON ((-74.012844 40.653016, -74.0150479999... 12100.0 53180.25 34296.125 124567.0 561462.000 53199.40 838804.775

In [24]:
#Checking the CSV
data = pd.read_csv('income_2000.csv')
data.head()


Out[24]:
Unnamed: 0 GEO.id GEO.id2 GEO.display-label VD01 VD02 VD03 VD04 VD05 VD06 ... VD09 VD10 VD11 VD12 VD13 VD14 VD15 VD16 VD17 geometry
0 0 Id Id2 Geography Total: Less than $10,000 $10,000 to $14,999 $15,000 to $19,999 $20,000 to $24,999 $25,000 to $29,999 ... $40,000 to $44,999 $45,000 to $49,999 $50,000 to $59,999 $60,000 to $74,999 $75,000 to $99,999 $100,000 to $124,999 $125,000 to $149,999 $150,000 to $199,999 $200,000 or more NaN
1 1 1500000US360470001001 360470001001 Block Group 1, Census Tract 1, Kings County, N... 859 48 45 16 31 18 ... 31 8 70 105 68 115 52 39 119 POLYGON ((-73.99669799999999 40.700877, -73.99...
2 2 1500000US360470001002 360470001002 Block Group 2, Census Tract 1, Kings County, N... 1303 63 0 66 44 22 ... 94 86 147 174 57 151 131 87 52 POLYGON ((-73.995379 40.700309, -73.993672 40....
3 3 1500000US360470001003 360470001003 Block Group 3, Census Tract 1, Kings County, N... 246 16 6 10 23 10 ... 13 12 16 31 23 24 8 15 18 POLYGON ((-73.993672 40.699836, -73.9926119999...
4 4 1500000US360470002001 360470002001 Block Group 1, Census Tract 2, Kings County, N... 118 6 10 15 0 12 ... 3 10 15 0 15 7 0 0 0 POLYGON ((-74.012844 40.653016, -74.0150479999...

5 rows × 22 columns

Good to go!


In [25]:
data.columns


Out[25]:
Index([u'Unnamed: 0', u'GEO.id', u'GEO.id2', u'GEO.display-label', u'VD01',
       u'VD02', u'VD03', u'VD04', u'VD05', u'VD06', u'VD07', u'VD08', u'VD09',
       u'VD10', u'VD11', u'VD12', u'VD13', u'VD14', u'VD15', u'VD16', u'VD17',
       u'geometry'],
      dtype='object')

In [26]:
data.columns = [[u'Unnamed: 0', u'GEO.id', u'GEO.id2', u'GEO.display-label',
       u'VD01', u'VD02', u'VD03', u'VD04', u'VD05',
       u'VD06', u'VD07', u'VD08', u'VD09', u'VD10',
       u'VD11', u'VD12', u'VD13', u'VD14', u'VD15',
       u'VD16', u'VD17', u'geometry']]

In [27]:
data.drop([0], inplace=True)
data.head()


Out[27]:
Unnamed: 0 GEO.id GEO.id2 GEO.display-label VD01 VD02 VD03 VD04 VD05 VD06 ... VD09 VD10 VD11 VD12 VD13 VD14 VD15 VD16 VD17 geometry
1 1 1500000US360470001001 360470001001 Block Group 1, Census Tract 1, Kings County, N... 859 48 45 16 31 18 ... 31 8 70 105 68 115 52 39 119 POLYGON ((-73.99669799999999 40.700877, -73.99...
2 2 1500000US360470001002 360470001002 Block Group 2, Census Tract 1, Kings County, N... 1303 63 0 66 44 22 ... 94 86 147 174 57 151 131 87 52 POLYGON ((-73.995379 40.700309, -73.993672 40....
3 3 1500000US360470001003 360470001003 Block Group 3, Census Tract 1, Kings County, N... 246 16 6 10 23 10 ... 13 12 16 31 23 24 8 15 18 POLYGON ((-73.993672 40.699836, -73.9926119999...
4 4 1500000US360470002001 360470002001 Block Group 1, Census Tract 2, Kings County, N... 118 6 10 15 0 12 ... 3 10 15 0 15 7 0 0 0 POLYGON ((-74.012844 40.653016, -74.0150479999...
5 5 1500000US360470002002 360470002002 Block Group 2, Census Tract 2, Kings County, N... 232 46 29 18 25 19 ... 4 5 14 16 12 0 11 0 0 NaN

5 rows × 22 columns


In [28]:
intax_2000 = data[['GEO.id', 'GEO.display-label', 'geometry']]
intax_2000.head()


Out[28]:
GEO.id GEO.display-label geometry
1 1500000US360470001001 Block Group 1, Census Tract 1, Kings County, N... POLYGON ((-73.99669799999999 40.700877, -73.99...
2 1500000US360470001002 Block Group 2, Census Tract 1, Kings County, N... POLYGON ((-73.995379 40.700309, -73.993672 40....
3 1500000US360470001003 Block Group 3, Census Tract 1, Kings County, N... POLYGON ((-73.993672 40.699836, -73.9926119999...
4 1500000US360470002001 Block Group 1, Census Tract 2, Kings County, N... POLYGON ((-74.012844 40.653016, -74.0150479999...
5 1500000US360470002002 Block Group 2, Census Tract 2, Kings County, N... NaN

In [29]:
intax_2000['B1'] = (data.VD02.astype(float) * 5000 * 0.04) + (data.VD03.astype(float)* 12500 * 0.04)
intax_2000['B2'] = (data.VD04.astype(float) * (((17500 - 16000) * 0.045) + 640)) 
intax_2000['B3'] = (data.VD05.astype(float)* (((22500 - 22000) * 0.0525) + 910)) 
intax_2000['B4'] = (data.VD06.astype(float) * (((27500 - 26000) * 0.059) + 1120)) + (data.VD07.astype(float) * (((32500 - 26000) * 0.059) + 1120)) + (data.VD08.astype(float)* (((37500 - 26000) * 0.059) + 1120))
intax_2000['B5'] = (data.VD09.astype(float)* (((42500 - 40000) * 0.0685) + 1946)) + (data.VD10.astype(float) * (((47500 - 40000) * 0.0685) + 1946)) + (data.VD11.astype(float) * (((55000 - 40000) * 0.0685) + 1946)) + (data.VD12.astype(float) * (((67500 - 40000) * 0.0685) + 1946)) + (data.VD13.astype(float) * (((87500 - 40000) * 0.0685) + 1946)) + (data.VD14.astype(float) * (((112500 - 40000) * 0.0685) + 1946)) + (data.VD15.astype(float) * (((137500 - 40000) * 0.0685) + 1946)) + (data.VD16.astype(float) * (((175000 - 40000) * 0.0685) + 1946)) + (data.VD17.astype(float) * (((319000 - 40000) * 0.0685) + 1946))
intax_2000.head()


Out[29]:
GEO.id GEO.display-label geometry B1 B2 B3 B4 B5
1 1500000US360470001001 Block Group 1, Census Tract 1, Kings County, N... POLYGON ((-73.99669799999999 40.700877, -73.99... 32100.0 11320.0 29023.75 173407.0 5234949.25
2 1500000US360470001002 Block Group 2, Census Tract 1, Kings County, N... POLYGON ((-73.995379 40.700309, -73.993672 40.... 12600.0 46695.0 41195.00 236468.5 6052843.25
3 1500000US360470001003 Block Group 3, Census Tract 1, Kings County, N... POLYGON ((-73.993672 40.699836, -73.9926119999... 6200.0 7075.0 21533.75 46018.5 1124763.25
4 1500000US360470002001 Block Group 1, Census Tract 2, Kings County, N... POLYGON ((-74.012844 40.653016, -74.0150479999... 6200.0 10612.5 0.00 56809.5 201933.75
5 1500000US360470002002 Block Group 2, Census Tract 2, Kings County, N... NaN 23700.0 12735.0 23406.25 75527.0 280942.00

In [30]:
intax_2013['TotalTax2000'] = intax_2000.B1 + intax_2000.B2 + intax_2000.B3 + intax_2000.B4 + intax_2000.B5
intax_2013.head()


Out[30]:
GEO.id GEO.display-label geometry B1 B2 B3 B4 B5 B6 TotalTax2013 TotalTax2000
1 1500000US360470001001 Block Group 1, Census Tract 1, Kings County, N... POLYGON ((-73.99669799999999 40.700877, -73.99... 17800.0 0.00 0.000 19185.0 498359.750 2173736.88 2709081.630 5480800.00
2 1500000US360470001002 Block Group 2, Census Tract 1, Kings County, N... POLYGON ((-73.995379 40.700309, -73.993672 40.... 14100.0 0.00 0.000 163528.0 133337.750 2405689.72 2716655.470 6389801.75
3 1500000US360470001003 Block Group 3, Census Tract 1, Kings County, N... POLYGON ((-73.993672 40.699836, -73.9926119999... 32100.0 25035.75 15374.125 85608.0 1837042.875 2690833.32 4685994.070 1205590.50
4 1500000US360470001004 Block Group 4, Census Tract 1, Kings County, N... POLYGON ((-73.99271 40.698097, -73.991759 40.7... 24400.0 96345.25 0.000 144630.0 1790077.750 2827029.40 4882482.400 275555.75
5 1500000US360470002001 Block Group 1, Census Tract 2, Kings County, N... POLYGON ((-74.012844 40.653016, -74.0150479999... 12100.0 53180.25 34296.125 124567.0 561462.000 53199.40 838804.775 416310.25

In [31]:
intax_2013.columns


Out[31]:
Index([           u'GEO.id', u'GEO.display-label',          u'geometry',
                      u'B1',                u'B2',                u'B3',
                      u'B4',                u'B5',                u'B6',
            u'TotalTax2013',      u'TotalTax2000'],
      dtype='object')

In [32]:
taxes = intax_2013[[u'GEO.id', u'GEO.display-label', u'geometry', u'TotalTax2013', u'TotalTax2000']]

In [33]:
taxes.TotalTax2000 = taxes.TotalTax2000 * 1.13
taxes.TotalTax2013 = taxes.TotalTax2013 * 1.04
taxes.head()


Out[33]:
GEO.id GEO.display-label geometry TotalTax2013 TotalTax2000
1 1500000US360470001001 Block Group 1, Census Tract 1, Kings County, N... POLYGON ((-73.99669799999999 40.700877, -73.99... 2.817445e+06 6.193304e+06
2 1500000US360470001002 Block Group 2, Census Tract 1, Kings County, N... POLYGON ((-73.995379 40.700309, -73.993672 40.... 2.825322e+06 7.220476e+06
3 1500000US360470001003 Block Group 3, Census Tract 1, Kings County, N... POLYGON ((-73.993672 40.699836, -73.9926119999... 4.873434e+06 1.362317e+06
4 1500000US360470001004 Block Group 4, Census Tract 1, Kings County, N... POLYGON ((-73.99271 40.698097, -73.991759 40.7... 5.077782e+06 3.113780e+05
5 1500000US360470002001 Block Group 1, Census Tract 2, Kings County, N... POLYGON ((-74.012844 40.653016, -74.0150479999... 8.723570e+05 4.704306e+05

In [34]:
#Saving the dataset as csv
taxes.to_csv('taxes.csv')

In [35]:
dataw = pd.read_csv ('taxes.csv')
dataw.head()


Out[35]:
Unnamed: 0 GEO.id GEO.display-label geometry TotalTax2013 TotalTax2000
0 1 1500000US360470001001 Block Group 1, Census Tract 1, Kings County, N... POLYGON ((-73.99669799999999 40.700877, -73.99... 2.817445e+06 6.193304e+06
1 2 1500000US360470001002 Block Group 2, Census Tract 1, Kings County, N... POLYGON ((-73.995379 40.700309, -73.993672 40.... 2.825322e+06 7.220476e+06
2 3 1500000US360470001003 Block Group 3, Census Tract 1, Kings County, N... POLYGON ((-73.993672 40.699836, -73.9926119999... 4.873434e+06 1.362317e+06
3 4 1500000US360470001004 Block Group 4, Census Tract 1, Kings County, N... POLYGON ((-73.99271 40.698097, -73.991759 40.7... 5.077782e+06 3.113780e+05
4 5 1500000US360470002001 Block Group 1, Census Tract 2, Kings County, N... POLYGON ((-74.012844 40.653016, -74.0150479999... 8.723570e+05 4.704306e+05

In [36]:
sum(taxes.TotalTax2000)


Out[36]:
2569863909.6524997

In [37]:
sum(taxes.TotalTax2013)


Out[37]:
3639857681.1219997

In [38]:
#https://www.tax-brackets.org/newyorktaxtable/2002

In [39]:
rent_2000 = pd.read_csv('rent_2000.csv')
rent_2013 = pd.read_csv('rent_2013.csv')

In [40]:
data1 = pd.read_csv('metrotech.csv')
data1.head()


Out[40]:
Unnamed: 0 GEO.id
0 0 1500000US360470033002
1 1 1500000US360470037001
2 2 1500000US360470009002
3 3 1500000US360470043002
4 4 1500000US360470043001

In [41]:
met = pd.merge(taxes, data1,how='right')
met.head()


Out[41]:
GEO.id GEO.display-label geometry TotalTax2013 TotalTax2000 Unnamed: 0
0 1500000US360470007003 Block Group 3, Census Tract 7, Kings County, N... POLYGON ((-73.995914 40.691416, -73.99508 40.6... 8.541489e+06 2.914260e+06 23
1 1500000US360470009001 Block Group 1, Census Tract 9, Kings County, N... POLYGON ((-73.99310299999999 40.692846, -73.99... 1.044992e+07 4.826333e+06 11
2 1500000US360470009001 Block Group 1, Census Tract 9, Kings County, N... POLYGON ((-73.99310299999999 40.692846, -73.99... 1.044992e+07 4.826333e+06 12
3 1500000US360470009002 Block Group 2, Census Tract 9, Kings County, N... POLYGON ((-73.993736 40.69153499999999, -73.98... 9.948306e+06 4.825448e+06 2
4 1500000US360470011001 Block Group 1, Census Tract 11, Kings County, ... POLYGON ((-73.990447 40.693735, -73.990663 40.... 2.523074e+06 2.588893e+06 10

In [42]:
sum(met.TotalTax2000)


Out[42]:
55622271.087499999

In [45]:
sum(met.TotalTax2013)


Out[45]:
108898373.5944

In [46]:
rent_2000.head()


Out[46]:
Unnamed: 0 GEO.id GEO.id2 GEO.display-label VD01 VD02 VD03 VD04 VD05 VD06 VD07 VD08 VD09 VD10 VD11 geometry
0 0 Id Id2 Geography Total: Less than 10 percent 10 to 14 percent 15 to 19 percent 20 to 24 percent 25 to 29 percent 30 to 34 percent 35 to 39 percent 40 to 49 percent 50 percent or more Not computed NaN
1 1 1500000US360470001001 360470001001 Block Group 1, Census Tract 1, Kings County, N... 497 53 106 60 49 27 28 22 67 64 21 POLYGON ((-73.99669799999999 40.700877, -73.99...
2 2 1500000US360470001002 360470001002 Block Group 2, Census Tract 1, Kings County, N... 550 85 108 78 68 67 62 12 0 59 11 POLYGON ((-73.995379 40.700309, -73.993672 40....
3 3 1500000US360470001003 360470001003 Block Group 3, Census Tract 1, Kings County, N... 118 14 19 15 9 11 2 4 5 30 9 POLYGON ((-73.993672 40.699836, -73.9926119999...
4 4 1500000US360470002001 360470002001 Block Group 1, Census Tract 2, Kings County, N... 129 6 11 15 29 6 5 12 16 19 10 POLYGON ((-74.012844 40.653016, -74.0150479999...

In [103]:
bky2000 = rent_2000[[u'GEO.id2', u'VD01', u'VD02', u'VD03',
       u'VD04', u'VD05', u'VD06', u'VD07', u'VD08', u'VD09', u'VD10', u'VD11']]
bky2000.describe()


Out[103]:
GEO.id2 VD01 VD02 VD03 VD04 VD05 VD06 VD07 VD08 VD09 VD10 VD11
count 2062 2062 2062 2062 2062 2062 2062 2062 2062 2062 2062 2062
unique 2062 661 141 157 158 143 142 113 86 103 253 110
top 360470566002 0 0 0 0 0 0 0 0 0 0 0
freq 1 36 399 228 207 258 344 436 594 406 95 439

In [104]:
bky2000.drop([0])


Out[104]:
GEO.id2 VD01 VD02 VD03 VD04 VD05 VD06 VD07 VD08 VD09 VD10 VD11
1 360470001001 497 53 106 60 49 27 28 22 67 64 21
2 360470001002 550 85 108 78 68 67 62 12 0 59 11
3 360470001003 118 14 19 15 9 11 2 4 5 30 9
4 360470002001 129 6 11 15 29 6 5 12 16 19 10
5 360470002002 199 23 28 6 25 30 0 15 12 60 0
6 360470003011 316 54 58 41 40 21 27 30 0 30 15
7 360470003012 362 66 60 19 59 46 26 0 23 36 27
8 360470003013 199 26 12 13 0 26 25 9 14 52 22
9 360470003014 159 24 9 12 19 37 0 22 0 36 0
10 360470003015 305 59 13 58 11 35 11 35 17 66 0
11 360470003021 22 11 0 0 11 0 0 0 0 0 0
12 360470005001 547 23 129 162 24 47 34 11 47 58 12
13 360470005002 587 108 91 101 127 11 58 12 12 46 21
14 360470005003 279 14 95 43 43 14 14 14 14 28 0
15 360470005004 450 51 60 60 96 72 31 10 30 20 20
16 360470005005 439 34 61 45 105 26 38 0 36 69 25
17 360470007001 540 49 93 28 101 46 47 26 49 82 19
18 360470007002 453 41 9 58 98 41 48 36 30 83 9
19 360470007003 276 46 36 39 64 19 19 0 10 33 10
20 360470009001 450 56 72 65 58 38 23 9 9 91 29
21 360470009002 289 59 72 32 16 8 27 0 28 21 26
22 360470011001 83 41 0 0 0 42 0 0 0 0 0
23 360470013001 254 10 30 86 8 0 0 46 32 16 26
24 360470013002 14 0 0 0 0 7 0 0 0 7 0
25 360470018001 2 2 0 0 0 0 0 0 0 0 0
26 360470020001 101 5 4 5 16 5 0 26 6 21 13
27 360470020002 124 0 13 5 7 0 12 14 6 54 13
28 360470020003 142 16 0 5 45 18 9 0 9 30 10
29 360470021001 105 0 16 26 0 19 0 0 7 18 19
30 360470021002 120 13 0 0 30 12 13 13 0 39 0
... ... ... ... ... ... ... ... ... ... ... ... ...
2032 360471184001 261 10 11 26 29 20 0 20 22 102 21
2033 360471184002 301 0 30 67 38 41 39 11 11 64 0
2034 360471184003 318 22 0 17 51 52 27 0 29 96 24
2035 360471186001 201 11 10 9 22 22 49 18 0 60 0
2036 360471186002 286 20 0 48 49 0 26 0 36 107 0
2037 360471188001 225 6 20 0 0 20 23 33 6 111 6
2038 360471188002 247 6 27 27 19 15 12 0 25 98 18
2039 360471188003 410 6 15 44 0 29 9 26 45 177 59
2040 360471190001 417 30 50 25 13 31 23 14 9 205 17
2041 360471192001 144 7 6 9 18 0 29 0 6 54 15
2042 360471192002 167 21 21 0 27 15 0 14 15 46 8
2043 360471192003 158 10 29 25 9 0 0 8 14 29 34
2044 360471194001 183 0 7 21 23 20 8 0 9 78 17
2045 360471194002 241 11 45 64 16 8 9 0 39 49 0
2046 360471194003 280 26 28 25 17 0 22 23 17 86 36
2047 360471196001 269 28 21 9 26 28 6 8 20 114 9
2048 360471196002 207 16 41 14 21 7 13 9 13 52 21
2049 360471196003 383 6 40 65 41 45 8 18 16 122 22
2050 360471196004 314 0 34 36 41 27 45 12 7 99 13
2051 360471200001 224 0 34 12 19 21 4 0 35 72 27
2052 360471200002 111 0 25 15 11 0 12 12 12 24 0
2053 360471202001 135 22 0 30 9 12 14 17 7 24 0
2054 360471202002 138 8 7 23 12 0 9 0 5 50 24
2055 360471208001 97 0 25 13 0 35 0 11 0 13 0
2056 360471208002 1840 125 294 267 210 194 136 116 135 305 58
2057 360471208003 354 0 30 0 40 40 32 33 14 165 0
2058 360471210001 1365 192 181 133 129 174 87 64 55 213 137
2059 360471214001 1479 208 83 138 167 178 81 57 76 261 230
2060 360471220001 627 24 64 69 83 43 33 30 56 161 64
2061 360471220002 95 10 0 25 25 0 14 8 0 13 0

2061 rows × 12 columns


In [109]:
bky2000.VD01.sum()


Out[109]:
'Total:4975501181291993163621991593052254758727945043954045327645028983254142101124142105120871025835444914926218058107105521818218322801083610289486210345203489108304795283109723471541283052797327515726927529102532822884352502232281561521791033243222611912321821943001013889544189588467622461644161123140284435891981816514735825018035054788310377291272506223253284231231331342360390590204215233321191315568187423225308244275426317152205110348629231719938230626128432330035228538733334141738828340827828333634934635135529128234425027680763330272358513421417400307280408250330411363228292321251399412360342277420221303423429446296437365245439396180249537317329456317240421422301249213306276257208257265206438378262107019736122118620549337511053413822933217618643716728154922042033629129442022424731137024824323729636833129327317726924340525331633743033729330941324917121928726518325929840942941427435457258400309272802723315014527031723226126518710622122733221023525223820902252773042309432334536229026927339028433522550646730831522730332128838511428657635846824935329435234028743211013524752617541616815820325137512327219024524954156154025736002862656721751262893911471544133525812434433263462831631751700179702081903202832313052338734519721732326124835662221673243222639835741029736425730715629543639115620540847837324964299210342420175701671841256420240927729686805451327117204165380297321469267256432868515209165307388138922112231313842901632383902695293071982712365453482823912523892763835441303622772591369324225428722544718037219929163117149269202352196337384284270201231300134305145175169906240537437165117246265503433290148173331522273297438230745727272174164361373435406180194225263448329132175130175119290376225415293459449211145111233579282202510256445225329147207226314165321733087205109303723944434571016265207266291202333282166357935562312933742892683082171892433412333113353242893011712972932572613893163050352158264261335294249122613170469215352241324269333374401272166353521376256321254246273794743364119617443327049126837138338136297103232017513409115845870237345309126275148385355349528311261156401301209397207233473239177335260483492332302262361254353297278133189325540302152142712113472842021033064121514925089535362131019142527513418431431335329574441881120355761144448751738037540831038436046432111388287541564542286360464191021767768936456912543127029213258115464464599395984622485586186390179295245891252621822712831623483064643003304283141615710226236328924369937547101312152110215829133325956733821340534407530279113338344563404635675185326511276642548529104718882767814173096023175998419511127331245403670452250296174196621372743232791506968216753248341469144237387277259396177941640289250195164197126496261240295421614456739351565196215231962002472142902252071391082069511362141673171326306222114394145394151429218247270197303392283165165911953251951769964182273442245017827022520915324139723114324918321452323122931629232729712525213425336221729922322728723320313124127017627501232452453282723041082302442961982883591233022941991113121942603451491833171701942083543043504552411749123521820329013837922522822771193139252448236270143182301380267405445285731802723383214248463443763134103303413414263016311951641301513353013842083082813423284773774362263923202173653632723132222531112225144355582652804363394092731853582753563144192002502972551851042651453762164813823198000143583281421478257423317701511861367104413718826492126132225243245130177291341397339272286509370210276490366210286648282006483933014252910581018852742764311361862854173693071346746822782703218994052775164824044235882862932862793923233132473102944864851742543501844712703063773101141532593343013013014114556470520307597434268446560125044916867461703877573952307555187519761586951512482290317269363389161464720636773258954360947716143235337031334722457595506253379100535456344170284285190733376727326388280195272244366369280153162234367265331313255188452182772022842223113882783263132131893442041592861702264664473050113568440125947140530303962791073788585873881174124191313236254453491993502315244244541041193874514622173073762896165413232221623173951573901272683395417650731316156223402982842521474414526342419213348328452154313251865732797943340339937311171050266293285172117474471421311307200536251383012933154637820317241932373119212646549019629829534926637940142740737536131712419923033755040015132015091403721683972761822294672483845532029241838883660555841647735643762616313217142538229914494892623415324561674446172823831721656189219251435306703589711213478163031010918612934826823020081001311431091597943888114487133698721826719591422124922014010228630014718555170603791912195162251034421972252362182134233392153581996199118306202602785851171581011924410364406301461563390359425650479325317344301741927682316026781211721976022135847231821033856127232328224445922021186986387890074824320713791943204274515014203892133833163113260284317238428948251375178724874043402102544212555592973412654722421391251140158549111219311110211414614312101851331469489094131250537353143293194258345221251379110231202239323252188462233348222422203104345970223112295050112722230783249718745050248837723743957043746429039843548518559628865143995565284821122313013318254843934178301663042972673051843727113950540816848534873142251497741849516961315941110175591821833514423307110523412531513225650375344196339155162192803522707313511925226912194724084171419827033111901261011287381295161592321576355246219185447234218283557640440147179176152179172171771541321534421354262854431949914546229013511411439975511356811241467146851163252453531895273982922132122884073291922323553171736217237513015148671433532010517250161721023882283732182722431061651427922225321231117839644020632111610872343104152581211371291512201431752132273147230027637335430640219033523034136602662082663332613013182012862252474104171441671581832412802692073833142241111351389718403541365147962795'

In [96]:
bky2000.loc['Total']= bky2000.astype(float).sum()
bky2000.tail()



ValueErrorTraceback (most recent call last)
<ipython-input-96-2bc6fdf9cb97> in <module>()
----> 1 bky2000.loc['Total']= bky2000.astype(float).sum()
      2 bky2000.tail()

/opt/rh/anaconda/root/envs/PUI2016_Python2/lib/python2.7/site-packages/pandas/core/generic.pyc in astype(self, dtype, copy, raise_on_error, **kwargs)
   2948 
   2949         mgr = self._data.astype(dtype=dtype, copy=copy,
-> 2950                                 raise_on_error=raise_on_error, **kwargs)
   2951         return self._constructor(mgr).__finalize__(self)
   2952 

/opt/rh/anaconda/root/envs/PUI2016_Python2/lib/python2.7/site-packages/pandas/core/internals.pyc in astype(self, dtype, **kwargs)
   2936 
   2937     def astype(self, dtype, **kwargs):
-> 2938         return self.apply('astype', dtype=dtype, **kwargs)
   2939 
   2940     def convert(self, **kwargs):

/opt/rh/anaconda/root/envs/PUI2016_Python2/lib/python2.7/site-packages/pandas/core/internals.pyc in apply(self, f, axes, filter, do_integrity_check, consolidate, raw, **kwargs)
   2888 
   2889             kwargs['mgr'] = self
-> 2890             applied = getattr(b, f)(**kwargs)
   2891             result_blocks = _extend_blocks(applied, result_blocks)
   2892 

/opt/rh/anaconda/root/envs/PUI2016_Python2/lib/python2.7/site-packages/pandas/core/internals.pyc in astype(self, dtype, copy, raise_on_error, values, **kwargs)
    432                **kwargs):
    433         return self._astype(dtype, copy=copy, raise_on_error=raise_on_error,
--> 434                             values=values, **kwargs)
    435 
    436     def _astype(self, dtype, copy=False, raise_on_error=True, values=None,

/opt/rh/anaconda/root/envs/PUI2016_Python2/lib/python2.7/site-packages/pandas/core/internals.pyc in _astype(self, dtype, copy, raise_on_error, values, klass, mgr, **kwargs)
    475 
    476                 # _astype_nansafe works fine with 1-d only
--> 477                 values = com._astype_nansafe(values.ravel(), dtype, copy=True)
    478                 values = values.reshape(self.shape)
    479 

/opt/rh/anaconda/root/envs/PUI2016_Python2/lib/python2.7/site-packages/pandas/core/common.pyc in _astype_nansafe(arr, dtype, copy)
   1918 
   1919     if copy:
-> 1920         return arr.astype(dtype)
   1921     return arr.view(dtype)
   1922 

ValueError: could not convert string to float: Not computed21119100152722000122102025199102926026001313101900072124112520903093518506350203303314400223380032008100001635001896507600000179020008028161802500331613017002821151890810148181070131951600

In [ ]:


In [47]:
metr00 = pd.merge(rent_2000, data1,on='GEO.id', how='right')
metr00.head()


Out[47]:
Unnamed: 0_x GEO.id GEO.id2 GEO.display-label VD01 VD02 VD03 VD04 VD05 VD06 VD07 VD08 VD09 VD10 VD11 geometry Unnamed: 0_y
0 19 1500000US360470007003 360470007003 Block Group 3, Census Tract 7, Kings County, N... 276 46 36 39 64 19 19 0 10 33 10 POLYGON ((-73.995914 40.691416, -73.99508 40.6... 23
1 20 1500000US360470009001 360470009001 Block Group 1, Census Tract 9, Kings County, N... 450 56 72 65 58 38 23 9 9 91 29 POLYGON ((-73.99310299999999 40.692846, -73.99... 11
2 20 1500000US360470009001 360470009001 Block Group 1, Census Tract 9, Kings County, N... 450 56 72 65 58 38 23 9 9 91 29 POLYGON ((-73.99310299999999 40.692846, -73.99... 12
3 21 1500000US360470009002 360470009002 Block Group 2, Census Tract 9, Kings County, N... 289 59 72 32 16 8 27 0 28 21 26 POLYGON ((-73.993736 40.69153499999999, -73.98... 2
4 22 1500000US360470011001 360470011001 Block Group 1, Census Tract 11, Kings County, ... 83 41 0 0 0 42 0 0 0 0 0 POLYGON ((-73.990447 40.693735, -73.990663 40.... 10

In [48]:
sum(metr00.VD01.astype(float))


Out[48]:
5488.0

In [49]:
sum(metr00.VD10.astype(float))


Out[49]:
920.0

In [50]:
sum(metr00.VD10.astype(float))/sum(metr00.VD01.astype(float))


Out[50]:
0.16763848396501457

In [86]:
metr00.columns


Out[86]:
Index([u'Unnamed: 0_x', u'GEO.id', u'GEO.id2', u'GEO.display-label', u'VD01',
       u'VD02', u'VD03', u'VD04', u'VD05', u'VD06', u'VD07', u'VD08', u'VD09',
       u'VD10', u'VD11', u'geometry', u'Unnamed: 0_y'],
      dtype='object')

In [87]:
met00 = metr00[[u'GEO.id2',u'VD01',
       u'VD02', u'VD03', u'VD04', u'VD05', u'VD06', u'VD07', u'VD08', u'VD09',
       u'VD10', u'VD11']]
met00.head()


Out[87]:
GEO.id2 VD01 VD02 VD03 VD04 VD05 VD06 VD07 VD08 VD09 VD10 VD11
0 360470007003 276 46 36 39 64 19 19 0 10 33 10
1 360470009001 450 56 72 65 58 38 23 9 9 91 29
2 360470009001 450 56 72 65 58 38 23 9 9 91 29
3 360470009002 289 59 72 32 16 8 27 0 28 21 26
4 360470011001 83 41 0 0 0 42 0 0 0 0 0

In [88]:
met00.loc['Total']= met00.astype(float).sum()
met00


Out[88]:
GEO.id2 VD01 VD02 VD03 VD04 VD05 VD06 VD07 VD08 VD09 VD10 VD11
0 360470007003 276 46 36 39 64 19 19 0 10 33 10
1 360470009001 450 56 72 65 58 38 23 9 9 91 29
2 360470009001 450 56 72 65 58 38 23 9 9 91 29
3 360470009002 289 59 72 32 16 8 27 0 28 21 26
4 360470011001 83 41 0 0 0 42 0 0 0 0 0
5 360470013001 254 10 30 86 8 0 0 46 32 16 26
6 360470031001 0 0 0 0 0 0 0 0 0 0 0
7 360470031001 0 0 0 0 0 0 0 0 0 0 0
8 360470031002 108 14 0 19 12 28 0 7 0 22 6
9 360470033002 486 38 39 48 71 48 33 23 62 91 33
10 360470035001 489 49 43 87 62 38 56 21 35 58 40
11 360470037001 109 10 0 31 26 0 21 0 0 21 0
12 360470039001 154 17 16 13 11 22 0 29 16 30 0
13 360470039001 154 17 16 13 11 22 0 29 16 30 0
14 360470039002 128 0 0 66 10 25 0 0 14 13 0
15 360470039003 305 22 37 17 38 59 34 19 13 58 8
16 360470041001 275 0 27 40 27 79 9 27 5 61 0
17 360470041002 157 36 16 51 22 0 12 10 0 10 0
18 360470041003 269 14 43 42 27 28 14 11 37 37 16
19 360470043001 0 0 0 0 0 0 0 0 0 0 0
20 360470043002 253 0 10 32 62 33 34 0 17 47 18
21 360470043003 282 9 23 46 82 30 9 18 8 48 9
22 360470043004 288 25 18 32 60 53 9 9 5 71 6
23 360470129011 229 23 35 8 9 19 31 9 24 71 0
Total 8.65128e+12 5488 542 605 832 734 629 354 276 340 920 256

In [51]:
metr13 = pd.merge(rent_2013, data1,on='GEO.id', how='right')
metr13.head()


Out[51]:
Unnamed: 0_x GEO.id GEO.id2 GEO.display-label HD01_VD01 HD01_VD02 HD01_VD03 HD01_VD04 HD01_VD05 HD01_VD06 HD01_VD07 HD01_VD08 HD01_VD09 HD01_VD10 HD01_VD11 geometry Unnamed: 0_y
0 18 1500000US360470007003 360470007003 Block Group 3, Census Tract 7, Kings County, N... 407 31 48 36 84 74 11 9 35 79 0 POLYGON ((-73.995914 40.691416, -73.99508 40.6... 23
1 19 1500000US360470009001 360470009001 Block Group 1, Census Tract 9, Kings County, N... 640 46 62 117 83 41 20 19 66 150 36 POLYGON ((-73.99310299999999 40.692846, -73.99... 11
2 19 1500000US360470009001 360470009001 Block Group 1, Census Tract 9, Kings County, N... 640 46 62 117 83 41 20 19 66 150 36 POLYGON ((-73.99310299999999 40.692846, -73.99... 12
3 20 1500000US360470009002 360470009002 Block Group 2, Census Tract 9, Kings County, N... 782 65 119 139 94 132 24 35 0 80 94 POLYGON ((-73.993736 40.69153499999999, -73.98... 2
4 21 1500000US360470011001 360470011001 Block Group 1, Census Tract 11, Kings County, ... 313 22 6 44 88 31 32 21 31 38 0 POLYGON ((-73.990447 40.693735, -73.990663 40.... 10

In [52]:
sum(metr13.HD01_VD01.astype(float))


Out[52]:
8469.0

In [53]:
sum(metr13.HD01_VD10.astype(float))


Out[53]:
1674.0

In [54]:
sum(metr13.HD01_VD10.astype(float))/sum(metr13.HD01_VD01.astype(float))


Out[54]:
0.19766206163655686

In [82]:
metr13.columns


Out[82]:
Index([u'Unnamed: 0_x', u'GEO.id', u'GEO.id2', u'GEO.display-label',
       u'HD01_VD01', u'HD01_VD02', u'HD01_VD03', u'HD01_VD04', u'HD01_VD05',
       u'HD01_VD06', u'HD01_VD07', u'HD01_VD08', u'HD01_VD09', u'HD01_VD10',
       u'HD01_VD11', u'geometry', u'Unnamed: 0_y'],
      dtype='object')

In [84]:
met13 = metr13[[u'GEO.id2',
       u'HD01_VD01', u'HD01_VD02', u'HD01_VD03', u'HD01_VD04', u'HD01_VD05',
       u'HD01_VD06', u'HD01_VD07', u'HD01_VD08', u'HD01_VD09', u'HD01_VD10',
       u'HD01_VD11']]
met13.head()


Out[84]:
GEO.id2 HD01_VD01 HD01_VD02 HD01_VD03 HD01_VD04 HD01_VD05 HD01_VD06 HD01_VD07 HD01_VD08 HD01_VD09 HD01_VD10 HD01_VD11
0 360470007003 407 31 48 36 84 74 11 9 35 79 0
1 360470009001 640 46 62 117 83 41 20 19 66 150 36
2 360470009001 640 46 62 117 83 41 20 19 66 150 36
3 360470009002 782 65 119 139 94 132 24 35 0 80 94
4 360470011001 313 22 6 44 88 31 32 21 31 38 0

In [85]:
met13.loc['Total']= met13.astype(float).sum()
met13


Out[85]:
GEO.id2 HD01_VD01 HD01_VD02 HD01_VD03 HD01_VD04 HD01_VD05 HD01_VD06 HD01_VD07 HD01_VD08 HD01_VD09 HD01_VD10 HD01_VD11
0 360470007003 407 31 48 36 84 74 11 9 35 79 0
1 360470009001 640 46 62 117 83 41 20 19 66 150 36
2 360470009001 640 46 62 117 83 41 20 19 66 150 36
3 360470009002 782 65 119 139 94 132 24 35 0 80 94
4 360470011001 313 22 6 44 88 31 32 21 31 38 0
5 360470013001 134 0 24 8 52 27 5 0 13 0 5
6 360470031001 364 11 33 28 95 63 5 16 14 88 11
7 360470031001 364 11 33 28 95 63 5 16 14 88 11
8 360470031002 0 0 0 0 0 0 0 0 0 0 0
9 360470033002 897 33 76 75 153 138 60 55 49 258 0
10 360470035001 142 4 7 15 24 21 20 7 17 27 0
11 360470037001 314 0 63 37 60 35 12 17 54 32 4
12 360470039001 390 29 39 62 52 41 20 17 52 47 31
13 360470039001 390 29 39 62 52 41 20 17 52 47 31
14 360470039002 284 0 0 19 6 67 8 31 0 153 0
15 360470039003 144 40 10 30 0 21 0 0 7 36 0
16 360470041001 562 21 80 81 74 56 40 44 89 41 36
17 360470041002 281 9 19 16 7 81 9 10 30 100 0
18 360470041003 201 0 35 77 9 11 7 0 10 52 0
19 360470043001 0 0 0 0 0 0 0 0 0 0 0
20 360470043002 259 11 23 48 14 47 24 42 32 10 8
21 360470043003 344 0 26 27 67 68 0 0 75 56 25
22 360470043004 387 7 33 25 123 77 12 19 36 55 0
23 360470129011 230 0 0 25 49 0 17 38 0 87 14
Total 8.65128e+12 8469 415 837 1116 1364 1176 371 432 742 1674 342

In [55]:
data1 = pd.read_csv('willy.csv')
data1.head()


Out[55]:
Unnamed: 0 GEO.id
0 0 1500000US360470551003
1 3 1500000US360470551002
2 6 1500000US360470519003
3 14 1500000US360470551001
4 22 1500000US360470523004

In [56]:
met = pd.merge(taxes, data1,how='right')
met.head()


Out[56]:
GEO.id GEO.display-label geometry TotalTax2013 TotalTax2000 Unnamed: 0
0 1500000US360470517001 Block Group 1, Census Tract 517, Kings County,... POLYGON ((-73.95713099999999 40.718388, -73.95... 1.531597e+06 4.270027e+06 107
1 1500000US360470517002 Block Group 2, Census Tract 517, Kings County,... POLYGON ((-73.958416 40.719189, -73.955443 40.... 4.429708e+06 1.703405e+06 111
2 1500000US360470519001 Block Group 1, Census Tract 519, Kings County,... POLYGON ((-73.955844 40.717589, -73.9552509999... 5.146695e+06 1.516244e+06 71
3 1500000US360470519002 Block Group 2, Census Tract 519, Kings County,... POLYGON ((-73.956439 40.717032, -73.952754 40.... 4.265974e+06 9.252660e+05 56
4 1500000US360470519003 Block Group 3, Census Tract 519, Kings County,... POLYGON ((-73.958811 40.714824, -73.9582229999... 2.765746e+06 1.214220e+06 6

In [57]:
sum(met.TotalTax2000)


Out[57]:
26881499.657499991

In [58]:
sum(met.TotalTax2013)


Out[58]:
71606118.74000001

In [ ]:


In [59]:
willy00 = pd.merge(rent_2000, data1,on='GEO.id', how='right')
willy00.head()


Out[59]:
Unnamed: 0_x GEO.id GEO.id2 GEO.display-label VD01 VD02 VD03 VD04 VD05 VD06 VD07 VD08 VD09 VD10 VD11 geometry Unnamed: 0_y
0 1307 1500000US360470517001 360470517001 Block Group 1, Census Tract 517, Kings County,... 161 7 29 22 44 18 18 9 9 0 5 POLYGON ((-73.95713099999999 40.718388, -73.95... 107
1 1308 1500000US360470517002 360470517002 Block Group 2, Census Tract 517, Kings County,... 432 41 38 76 106 57 32 16 14 44 8 POLYGON ((-73.958416 40.719189, -73.955443 40.... 111
2 1312 1500000US360470519001 360470519001 Block Group 1, Census Tract 519, Kings County,... 347 23 27 34 55 33 26 22 8 101 18 POLYGON ((-73.955844 40.717589, -73.9552509999... 71
3 1313 1500000US360470519002 360470519002 Block Group 2, Census Tract 519, Kings County,... 224 7 23 15 47 7 32 8 14 64 7 POLYGON ((-73.956439 40.717032, -73.952754 40.... 56
4 1314 1500000US360470519003 360470519003 Block Group 3, Census Tract 519, Kings County,... 575 17 105 68 60 32 31 27 60 143 32 POLYGON ((-73.958811 40.714824, -73.9582229999... 6

In [60]:
sum(willy00.VD01.astype(float))


Out[60]:
7272.0

In [61]:
sum(willy00.VD10.astype(float))


Out[61]:
1413.0

In [62]:
sum(willy00.VD10.astype(float)) / sum(willy00.VD01.astype(float))


Out[62]:
0.19430693069306931

In [79]:
willy00.columns


Out[79]:
Index([u'Unnamed: 0_x', u'GEO.id', u'GEO.id2', u'GEO.display-label', u'VD01',
       u'VD02', u'VD03', u'VD04', u'VD05', u'VD06', u'VD07', u'VD08', u'VD09',
       u'VD10', u'VD11', u'geometry', u'Unnamed: 0_y'],
      dtype='object')

In [80]:
will00 = willy00[[u'GEO.id2',u'VD01',
       u'VD02', u'VD03', u'VD04', u'VD05', u'VD06', u'VD07', u'VD08', u'VD09',
       u'VD10', u'VD11']]
will00.head()


Out[80]:
GEO.id2 VD01 VD02 VD03 VD04 VD05 VD06 VD07 VD08 VD09 VD10 VD11
0 360470517001 161 7 29 22 44 18 18 9 9 0 5
1 360470517002 432 41 38 76 106 57 32 16 14 44 8
2 360470519001 347 23 27 34 55 33 26 22 8 101 18
3 360470519002 224 7 23 15 47 7 32 8 14 64 7
4 360470519003 575 17 105 68 60 32 31 27 60 143 32

In [81]:
will00.loc['Total']= will00.astype(float).sum()
will00


Out[81]:
GEO.id2 VD01 VD02 VD03 VD04 VD05 VD06 VD07 VD08 VD09 VD10 VD11
0 360470517001 161 7 29 22 44 18 18 9 9 0 5
1 360470517002 432 41 38 76 106 57 32 16 14 44 8
2 360470519001 347 23 27 34 55 33 26 22 8 101 18
3 360470519002 224 7 23 15 47 7 32 8 14 64 7
4 360470519003 575 17 105 68 60 32 31 27 60 143 32
5 360470523001 535 89 73 15 80 54 46 42 40 57 39
6 360470523002 456 32 45 45 53 7 37 26 25 171 15
7 360470523003 344 64 50 8 67 37 23 7 26 52 10
8 360470523004 170 16 47 0 31 7 8 6 19 28 8
9 360470523005 284 23 19 56 29 56 8 0 10 50 33
10 360470525001 733 53 84 73 92 97 86 59 52 107 30
11 360470549002 199 59 23 17 5 18 0 18 21 34 4
12 360470551001 424 12 67 57 40 50 40 0 43 102 13
13 360470551002 454 40 46 52 93 49 41 6 9 88 30
14 360470551003 104 10 10 27 20 0 0 0 0 28 9
15 360470551004 119 6 10 29 14 0 0 27 7 19 7
16 360470553001 307 35 33 35 37 31 52 8 26 50 0
17 360470553002 376 73 48 46 37 65 14 30 13 43 7
18 360470553003 289 28 47 0 41 39 29 13 0 72 20
19 360470555001 222 0 15 38 30 29 6 9 19 62 14
20 360470557001 390 47 50 49 34 41 29 20 27 66 27
21 360470557002 127 17 15 0 36 13 0 0 14 32 0
Total 7.93035e+12 7272 699 904 762 1051 740 558 353 456 1413 336

In [63]:
willy13 = pd.merge(rent_2013, data1,on='GEO.id', how='right')


Out[63]:
Unnamed: 0_x GEO.id GEO.id2 GEO.display-label HD01_VD01 HD01_VD02 HD01_VD03 HD01_VD04 HD01_VD05 HD01_VD06 HD01_VD07 HD01_VD08 HD01_VD09 HD01_VD10 HD01_VD11 geometry Unnamed: 0_y
0 1298 1500000US360470517001 360470517001 Block Group 1, Census Tract 517, Kings County,... 261 0 15 32 22 11 61 29 24 31 36 POLYGON ((-73.95713099999999 40.718388, -73.95... 107
1 1299 1500000US360470517002 360470517002 Block Group 2, Census Tract 517, Kings County,... 482 48 21 91 99 0 56 64 7 96 0 POLYGON ((-73.958416 40.719189, -73.955443 40.... 111
2 1303 1500000US360470519001 360470519001 Block Group 1, Census Tract 519, Kings County,... 666 40 85 145 45 36 111 2 23 179 0 POLYGON ((-73.955844 40.717589, -73.9552509999... 71
3 1304 1500000US360470519002 360470519002 Block Group 2, Census Tract 519, Kings County,... 488 0 88 115 83 43 10 39 45 55 10 POLYGON ((-73.956439 40.717032, -73.952754 40.... 56
4 1305 1500000US360470519003 360470519003 Block Group 3, Census Tract 519, Kings County,... 469 40 10 21 76 84 37 0 11 190 0 POLYGON ((-73.958811 40.714824, -73.9582229999... 6
5 1309 1500000US360470523001 360470523001 Block Group 1, Census Tract 523, Kings County,... 651 15 78 59 116 13 36 15 108 157 54 POLYGON ((-73.95753499999999 40.711609, -73.95... 45
6 1310 1500000US360470523002 360470523002 Block Group 2, Census Tract 523, Kings County,... 719 0 98 140 15 65 57 13 121 210 0 POLYGON ((-73.960195 40.710355, -73.959064 40.... 49
7 1311 1500000US360470523003 360470523003 Block Group 3, Census Tract 523, Kings County,... 281 0 0 32 79 15 0 51 35 42 27 POLYGON ((-73.961421 40.711349, -73.961029 40.... 40
8 1312 1500000US360470523004 360470523004 Block Group 4, Census Tract 523, Kings County,... 163 0 16 16 35 16 0 35 0 45 0 POLYGON ((-73.95979699999999 40.713887, -73.95... 22
9 1313 1500000US360470523005 360470523005 Block Group 5, Census Tract 523, Kings County,... 436 104 0 36 86 75 0 80 15 40 0 POLYGON ((-73.958342 40.713306, -73.9568099999... 24
10 1314 1500000US360470525001 360470525001 Block Group 1, Census Tract 525, Kings County,... 769 25 68 123 80 121 134 3 38 166 11 POLYGON ((-73.96229199999999 40.709921, -73.96... 52
11 1383 1500000US360470549002 360470549002 Block Group 2, Census Tract 549, Kings County,... 457 22 0 53 68 39 31 7 22 215 0 POLYGON ((-73.96905 40.712482, -73.96886099999... 26
12 1387 1500000US360470551001 360470551001 Block Group 1, Census Tract 551, Kings County,... 285 63 17 15 26 26 35 35 39 29 0 POLYGON ((-73.96836999999999 40.714127, -73.96... 14
13 1388 1500000US360470551002 360470551002 Block Group 2, Census Tract 551, Kings County,... 582 14 40 49 133 66 50 16 79 86 49 POLYGON ((-73.961287 40.714502, -73.9597969999... 3
14 1389 1500000US360470551003 360470551003 Block Group 3, Census Tract 551, Kings County,... 584 72 90 75 75 34 23 0 14 187 14 POLYGON ((-73.962777 40.715106, -73.961287 40.... 0
15 1390 1500000US360470551004 360470551004 Block Group 4, Census Tract 551, Kings County,... 504 52 130 62 126 17 29 0 0 88 0 POLYGON ((-73.96407099999999 40.713079, -73.96... 38
16 1394 1500000US360470553001 360470553001 Block Group 1, Census Tract 553, Kings County,... 357 0 86 59 11 12 35 16 65 55 18 POLYGON ((-73.958913 40.716728, -73.957725 40.... 96
17 1395 1500000US360470553002 360470553002 Block Group 2, Census Tract 553, Kings County,... 421 8 56 57 64 34 70 33 32 67 0 POLYGON ((-73.962289 40.71558, -73.961912 40.7... 64
18 1396 1500000US360470553003 360470553003 Block Group 3, Census Tract 553, Kings County,... 200 12 0 35 17 15 45 16 0 53 7 POLYGON ((-73.960196 40.717528, -73.9590089999... 100
19 1400 1500000US360470555001 360470555001 Block Group 1, Census Tract 555, Kings County,... 1379 74 105 112 257 207 170 70 73 239 72 POLYGON ((-73.967578 40.716496, -73.966511 40.... 82
20 1404 1500000US360470557001 360470557001 Block Group 1, Census Tract 557, Kings County,... 418 21 45 60 68 34 0 0 51 119 20 POLYGON ((-73.96278699999999 40.722562, -73.96... 105
21 1405 1500000US360470557002 360470557002 Block Group 2, Census Tract 557, Kings County,... 373 38 52 48 55 51 51 8 31 39 0 POLYGON ((-73.96264499999999 40.722747, -73.96... 115

In [64]:
sum(willy13.HD01_VD01.astype(float))


Out[64]:
10945.0

In [65]:
sum(willy13.HD01_VD10.astype(float))


Out[65]:
2388.0

In [66]:
sum(willy13.HD01_VD10.astype(float)) / sum(willy13.HD01_VD01.astype(float))


Out[66]:
0.21818181818181817

In [69]:
willy13.columns


Out[69]:
Index([u'Unnamed: 0_x', u'GEO.id', u'GEO.id2', u'GEO.display-label',
       u'HD01_VD01', u'HD01_VD02', u'HD01_VD03', u'HD01_VD04', u'HD01_VD05',
       u'HD01_VD06', u'HD01_VD07', u'HD01_VD08', u'HD01_VD09', u'HD01_VD10',
       u'HD01_VD11', u'geometry', u'Unnamed: 0_y'],
      dtype='object')

In [78]:
will13 = willy13[[u'GEO.id2',
       u'HD01_VD01', u'HD01_VD02', u'HD01_VD03', u'HD01_VD04', u'HD01_VD05',
       u'HD01_VD06', u'HD01_VD07', u'HD01_VD08', u'HD01_VD09', u'HD01_VD10',
       u'HD01_VD11']]
will13.head()


Out[78]:
GEO.id2 HD01_VD01 HD01_VD02 HD01_VD03 HD01_VD04 HD01_VD05 HD01_VD06 HD01_VD07 HD01_VD08 HD01_VD09 HD01_VD10 HD01_VD11
0 360470517001 261 0 15 32 22 11 61 29 24 31 36
1 360470517002 482 48 21 91 99 0 56 64 7 96 0
2 360470519001 666 40 85 145 45 36 111 2 23 179 0
3 360470519002 488 0 88 115 83 43 10 39 45 55 10
4 360470519003 469 40 10 21 76 84 37 0 11 190 0

In [77]:
will13.loc['Total']= will13.astype(float).sum()
will13


Out[77]:
GEO.id2 HD01_VD01 HD01_VD02 HD01_VD03 HD01_VD04 HD01_VD05 HD01_VD06 HD01_VD07 HD01_VD08 HD01_VD09 HD01_VD10 HD01_VD11
0 360470517001 261 0 15 32 22 11 61 29 24 31 36
1 360470517002 482 48 21 91 99 0 56 64 7 96 0
2 360470519001 666 40 85 145 45 36 111 2 23 179 0
3 360470519002 488 0 88 115 83 43 10 39 45 55 10
4 360470519003 469 40 10 21 76 84 37 0 11 190 0
5 360470523001 651 15 78 59 116 13 36 15 108 157 54
6 360470523002 719 0 98 140 15 65 57 13 121 210 0
7 360470523003 281 0 0 32 79 15 0 51 35 42 27
8 360470523004 163 0 16 16 35 16 0 35 0 45 0
9 360470523005 436 104 0 36 86 75 0 80 15 40 0
10 360470525001 769 25 68 123 80 121 134 3 38 166 11
11 360470549002 457 22 0 53 68 39 31 7 22 215 0
12 360470551001 285 63 17 15 26 26 35 35 39 29 0
13 360470551002 582 14 40 49 133 66 50 16 79 86 49
14 360470551003 584 72 90 75 75 34 23 0 14 187 14
15 360470551004 504 52 130 62 126 17 29 0 0 88 0
16 360470553001 357 0 86 59 11 12 35 16 65 55 18
17 360470553002 421 8 56 57 64 34 70 33 32 67 0
18 360470553003 200 12 0 35 17 15 45 16 0 53 7
19 360470555001 1379 74 105 112 257 207 170 70 73 239 72
20 360470557001 418 21 45 60 68 34 0 0 51 119 20
21 360470557002 373 38 52 48 55 51 51 8 31 39 0
Total 7.93035e+12 10945 648 1100 1435 1636 1014 1041 532 833 2388 318

In [ ]: