Wrangling Data

Author: Avikal Somvanshi


In [1]:
from __future__ import print_function, division
import pandas as pd
import pylab as pl
import os
import csv
from pandas.tools.plotting import scatter_matrix
import sys
reload(sys)
import zipfile
import geopandas as gpd
sys.setdefaultencoding('utf-8')
%pylab inline

In [2]:
#Unzippping downloaded median income data of 1999 in a dataframe
zipfile.ZipFile(os.path.join("data/income_00.zip")).extractall(r"data/income_00")

In [3]:
#Reading and saving downloaded median income data of 1999 in a dataframe
data_2000 = pd.read_csv('data/income_00/DEC_00_SF3_P052_with_ann.csv')
data_2000.head()


Out[3]:
GEO.id GEO.id2 GEO.display-label VD01 VD02 VD03 VD04 VD05 VD06 VD07 VD08 VD09 VD10 VD11 VD12 VD13 VD14 VD15 VD16 VD17
0 Id Id2 Geography Total: Less than $10,000 $10,000 to $14,999 $15,000 to $19,999 $20,000 to $24,999 $25,000 to $29,999 $30,000 to $34,999 $35,000 to $39,999 $40,000 to $44,999 $45,000 to $49,999 $50,000 to $59,999 $60,000 to $74,999 $75,000 to $99,999 $100,000 to $124,999 $125,000 to $149,999 $150,000 to $199,999 $200,000 or more
1 1500000US360470001001 360470001001 Block Group 1, Census Tract 1, Kings County, N... 859 48 45 16 31 18 59 35 31 8 70 105 68 115 52 39 119
2 1500000US360470001002 360470001002 Block Group 2, Census Tract 1, Kings County, N... 1303 63 0 66 44 22 75 54 94 86 147 174 57 151 131 87 52
3 1500000US360470001003 360470001003 Block Group 3, Census Tract 1, Kings County, N... 246 16 6 10 23 10 13 8 13 12 16 31 23 24 8 15 18
4 1500000US360470002001 360470002001 Block Group 1, Census Tract 2, Kings County, N... 118 6 10 15 0 12 9 16 3 10 15 0 15 7 0 0 0

In [4]:
#Unzipping downloaded median income data of 2013 in a dataframe
zipfile.ZipFile(os.path.join("data/income_13.zip")).extractall(r"data/income_13")

In [5]:
#Reading and saving downloaded median income data of 2013 in a dataframe
data_2013 = pd.read_csv('data/income_13/ACS_13_5YR_B19001_with_ann.csv')
data_2013.head()


Out[5]:
GEO.id GEO.id2 GEO.display-label HD01_VD01 HD02_VD01 HD01_VD02 HD02_VD02 HD01_VD03 HD02_VD03 HD01_VD04 ... HD01_VD13 HD02_VD13 HD01_VD14 HD02_VD14 HD01_VD15 HD02_VD15 HD01_VD16 HD02_VD16 HD01_VD17 HD02_VD17
0 Id Id2 Geography Estimate; Total: Margin of Error; Total: Estimate; Total: - Less than $10,000 Margin of Error; Total: - Less than $10,000 Estimate; Total: - $10,000 to $14,999 Margin of Error; Total: - $10,000 to $14,999 Estimate; Total: - $15,000 to $19,999 ... Estimate; Total: - $75,000 to $99,999 Margin of Error; Total: - $75,000 to $99,999 Estimate; Total: - $100,000 to $124,999 Margin of Error; Total: - $100,000 to $124,999 Estimate; Total: - $125,000 to $149,999 Margin of Error; Total: - $125,000 to $149,999 Estimate; Total: - $150,000 to $199,999 Margin of Error; Total: - $150,000 to $199,999 Estimate; Total: - $200,000 or more Margin of Error; Total: - $200,000 or more
1 1500000US360470001001 360470001001 Block Group 1, Census Tract 1, Kings County, N... 308 73 54 48 14 22 0 ... 37 37 22 25 5 8 39 35 87 70
2 1500000US360470001002 360470001002 Block Group 2, Census Tract 1, Kings County, N... 291 86 33 36 15 23 0 ... 0 12 13 22 0 12 0 12 119 63
3 1500000US360470001003 360470001003 Block Group 3, Census Tract 1, Kings County, N... 710 114 33 39 51 80 0 ... 151 75 96 53 33 39 118 73 71 42
4 1500000US360470001004 360470001004 Block Group 4, Census Tract 1, Kings County, N... 797 126 47 43 30 33 21 ... 107 59 23 28 59 43 32 37 123 73

5 rows × 37 columns


In [6]:
data_2013.columns


Out[6]:
Index([u'GEO.id', u'GEO.id2', u'GEO.display-label', u'HD01_VD01', u'HD02_VD01',
       u'HD01_VD02', u'HD02_VD02', u'HD01_VD03', u'HD02_VD03', u'HD01_VD04',
       u'HD02_VD04', u'HD01_VD05', u'HD02_VD05', u'HD01_VD06', u'HD02_VD06',
       u'HD01_VD07', u'HD02_VD07', u'HD01_VD08', u'HD02_VD08', u'HD01_VD09',
       u'HD02_VD09', u'HD01_VD10', u'HD02_VD10', u'HD01_VD11', u'HD02_VD11',
       u'HD01_VD12', u'HD02_VD12', u'HD01_VD13', u'HD02_VD13', u'HD01_VD14',
       u'HD02_VD14', u'HD01_VD15', u'HD02_VD15', u'HD01_VD16', u'HD02_VD16',
       u'HD01_VD17', u'HD02_VD17'],
      dtype='object')

In [7]:
data_2013 = data_2013[[u'GEO.id', u'GEO.id2', u'GEO.display-label', u'HD01_VD01', u'HD01_VD02', u'HD01_VD03', u'HD01_VD04', 
                       u'HD01_VD05', u'HD01_VD06', u'HD01_VD07', u'HD01_VD08', u'HD01_VD09', u'HD01_VD10', u'HD01_VD11', 
                       u'HD01_VD12', u'HD01_VD13', u'HD01_VD14', u'HD01_VD15', u'HD01_VD16', u'HD01_VD17']]

In [8]:
data_2013.head()


Out[8]:
GEO.id GEO.id2 GEO.display-label HD01_VD01 HD01_VD02 HD01_VD03 HD01_VD04 HD01_VD05 HD01_VD06 HD01_VD07 HD01_VD08 HD01_VD09 HD01_VD10 HD01_VD11 HD01_VD12 HD01_VD13 HD01_VD14 HD01_VD15 HD01_VD16 HD01_VD17
0 Id Id2 Geography Estimate; Total: Estimate; Total: - Less than $10,000 Estimate; Total: - $10,000 to $14,999 Estimate; Total: - $15,000 to $19,999 Estimate; Total: - $20,000 to $24,999 Estimate; Total: - $25,000 to $29,999 Estimate; Total: - $30,000 to $34,999 Estimate; Total: - $35,000 to $39,999 Estimate; Total: - $40,000 to $44,999 Estimate; Total: - $45,000 to $49,999 Estimate; Total: - $50,000 to $59,999 Estimate; Total: - $60,000 to $74,999 Estimate; Total: - $75,000 to $99,999 Estimate; Total: - $100,000 to $124,999 Estimate; Total: - $125,000 to $149,999 Estimate; Total: - $150,000 to $199,999 Estimate; Total: - $200,000 or more
1 1500000US360470001001 360470001001 Block Group 1, Census Tract 1, Kings County, N... 308 54 14 0 0 0 0 5 5 6 13 21 37 22 5 39 87
2 1500000US360470001002 360470001002 Block Group 2, Census Tract 1, Kings County, N... 291 33 15 0 0 0 46 40 12 0 0 13 0 13 0 0 119
3 1500000US360470001003 360470001003 Block Group 3, Census Tract 1, Kings County, N... 710 33 51 0 27 13 58 0 0 16 14 29 151 96 33 118 71
4 1500000US360470001004 360470001004 Block Group 4, Census Tract 1, Kings County, N... 797 47 30 21 88 0 35 0 45 0 79 108 107 23 59 32 123

In [9]:
#Unzipping downloaded nyc shapefile in a dataframe
zipfile.ZipFile(os.path.join("data/cb_2015_36_bg_500k.zip")).extractall(r"data/cb_2015_36_bg_500k")

In [10]:
# loading shape file for NYC
nyc_shape = gpd.read_file("data/cb_2015_36_bg_500k/cb_2015_36_bg_500k.shp")
nyc_shape.head()


Out[10]:
AFFGEOID ALAND AWATER BLKGRPCE COUNTYFP GEOID LSAD NAME STATEFP TRACTCE geometry
0 1500000US360610211000 0 307945 0 061 360610211000 BG 0 36 021100 POLYGON ((-73.968082 40.8207, -73.967982575439...
1 1500000US360290131014 557271 351509 4 029 360290131014 BG 4 36 013101 POLYGON ((-78.89711856256349 42.75125713600959...
2 1500000US360050516002 256313 0 2 005 360050516002 BG 2 36 051600 POLYGON ((-73.791504 40.855456, -73.7874279999...
3 1500000US360810183004 33973 0 4 081 360810183004 BG 4 36 018300 POLYGON ((-73.92145099999999 40.743634, -73.92...
4 1500000US360470276003 70988 0 3 047 360470276003 BG 3 36 027600 POLYGON ((-74.001302 40.611068, -74.0010729999...

In [11]:
nyc_shape.columns = [[u'GEO.id',    u'ALAND',   u'AWATER', u'BLKGRPCE', u'COUNTYFP',
          u'GEO.id2',     u'LSAD',     u'NAME',  u'STATEFP',  u'TRACTCE',
       u'geometry']]

In [12]:
nyc_shape.head()


Out[12]:
GEO.id ALAND AWATER BLKGRPCE COUNTYFP GEO.id2 LSAD NAME STATEFP TRACTCE geometry
0 1500000US360610211000 0 307945 0 061 360610211000 BG 0 36 021100 POLYGON ((-73.968082 40.8207, -73.967982575439...
1 1500000US360290131014 557271 351509 4 029 360290131014 BG 4 36 013101 POLYGON ((-78.89711856256349 42.75125713600959...
2 1500000US360050516002 256313 0 2 005 360050516002 BG 2 36 051600 POLYGON ((-73.791504 40.855456, -73.7874279999...
3 1500000US360810183004 33973 0 4 081 360810183004 BG 4 36 018300 POLYGON ((-73.92145099999999 40.743634, -73.92...
4 1500000US360470276003 70988 0 3 047 360470276003 BG 3 36 027600 POLYGON ((-74.001302 40.611068, -74.0010729999...

In [13]:
bky_shape = nyc_shape[[u'GEO.id', u'GEO.id2', u'geometry']]
bky_shape.head()


Out[13]:
GEO.id GEO.id2 geometry
0 1500000US360610211000 360610211000 POLYGON ((-73.968082 40.8207, -73.967982575439...
1 1500000US360290131014 360290131014 POLYGON ((-78.89711856256349 42.75125713600959...
2 1500000US360050516002 360050516002 POLYGON ((-73.791504 40.855456, -73.7874279999...
3 1500000US360810183004 360810183004 POLYGON ((-73.92145099999999 40.743634, -73.92...
4 1500000US360470276003 360470276003 POLYGON ((-74.001302 40.611068, -74.0010729999...

In [ ]:


In [14]:
#Merging 2000 the dataframes to a mother dataframe
income_2000 = pd.merge(data_2000, bky_shape, how='left', on=['GEO.id', 'GEO.id2'])
income_2000.head()


Out[14]:
GEO.id GEO.id2 GEO.display-label VD01 VD02 VD03 VD04 VD05 VD06 VD07 ... VD09 VD10 VD11 VD12 VD13 VD14 VD15 VD16 VD17 geometry
0 Id Id2 Geography Total: Less than $10,000 $10,000 to $14,999 $15,000 to $19,999 $20,000 to $24,999 $25,000 to $29,999 $30,000 to $34,999 ... $40,000 to $44,999 $45,000 to $49,999 $50,000 to $59,999 $60,000 to $74,999 $75,000 to $99,999 $100,000 to $124,999 $125,000 to $149,999 $150,000 to $199,999 $200,000 or more NaN
1 1500000US360470001001 360470001001 Block Group 1, Census Tract 1, Kings County, N... 859 48 45 16 31 18 59 ... 31 8 70 105 68 115 52 39 119 POLYGON ((-73.99669799999999 40.700877, -73.99...
2 1500000US360470001002 360470001002 Block Group 2, Census Tract 1, Kings County, N... 1303 63 0 66 44 22 75 ... 94 86 147 174 57 151 131 87 52 POLYGON ((-73.995379 40.700309, -73.993672 40....
3 1500000US360470001003 360470001003 Block Group 3, Census Tract 1, Kings County, N... 246 16 6 10 23 10 13 ... 13 12 16 31 23 24 8 15 18 POLYGON ((-73.993672 40.699836, -73.9926119999...
4 1500000US360470002001 360470002001 Block Group 1, Census Tract 2, Kings County, N... 118 6 10 15 0 12 9 ... 3 10 15 0 15 7 0 0 0 POLYGON ((-74.012844 40.653016, -74.0150479999...

5 rows × 21 columns


In [15]:
#Merging 2013 the dataframes to a mother dataframe
income_2013 = pd.merge(data_2013, bky_shape, how='left', on=['GEO.id', 'GEO.id2'])
income_2013.head()


Out[15]:
GEO.id GEO.id2 GEO.display-label HD01_VD01 HD01_VD02 HD01_VD03 HD01_VD04 HD01_VD05 HD01_VD06 HD01_VD07 ... HD01_VD09 HD01_VD10 HD01_VD11 HD01_VD12 HD01_VD13 HD01_VD14 HD01_VD15 HD01_VD16 HD01_VD17 geometry
0 Id Id2 Geography Estimate; Total: Estimate; Total: - Less than $10,000 Estimate; Total: - $10,000 to $14,999 Estimate; Total: - $15,000 to $19,999 Estimate; Total: - $20,000 to $24,999 Estimate; Total: - $25,000 to $29,999 Estimate; Total: - $30,000 to $34,999 ... Estimate; Total: - $40,000 to $44,999 Estimate; Total: - $45,000 to $49,999 Estimate; Total: - $50,000 to $59,999 Estimate; Total: - $60,000 to $74,999 Estimate; Total: - $75,000 to $99,999 Estimate; Total: - $100,000 to $124,999 Estimate; Total: - $125,000 to $149,999 Estimate; Total: - $150,000 to $199,999 Estimate; Total: - $200,000 or more NaN
1 1500000US360470001001 360470001001 Block Group 1, Census Tract 1, Kings County, N... 308 54 14 0 0 0 0 ... 5 6 13 21 37 22 5 39 87 POLYGON ((-73.99669799999999 40.700877, -73.99...
2 1500000US360470001002 360470001002 Block Group 2, Census Tract 1, Kings County, N... 291 33 15 0 0 0 46 ... 12 0 0 13 0 13 0 0 119 POLYGON ((-73.995379 40.700309, -73.993672 40....
3 1500000US360470001003 360470001003 Block Group 3, Census Tract 1, Kings County, N... 710 33 51 0 27 13 58 ... 0 16 14 29 151 96 33 118 71 POLYGON ((-73.993672 40.699836, -73.9926119999...
4 1500000US360470001004 360470001004 Block Group 4, Census Tract 1, Kings County, N... 797 47 30 21 88 0 35 ... 45 0 79 108 107 23 59 32 123 POLYGON ((-73.99271 40.698097, -73.991759 40.7...

5 rows × 21 columns


In [ ]:


In [16]:
#Saving the dataset as csv
income_2000.to_csv('income_2000.csv')
income_2013.to_csv('income_2013.csv')

In [17]:
#Checking the CSV
data2013 = pd.read_csv('income_2013.csv')
data2013.head()


Out[17]:
Unnamed: 0 GEO.id GEO.id2 GEO.display-label HD01_VD01 HD01_VD02 HD01_VD03 HD01_VD04 HD01_VD05 HD01_VD06 ... HD01_VD09 HD01_VD10 HD01_VD11 HD01_VD12 HD01_VD13 HD01_VD14 HD01_VD15 HD01_VD16 HD01_VD17 geometry
0 0 Id Id2 Geography Estimate; Total: Estimate; Total: - Less than $10,000 Estimate; Total: - $10,000 to $14,999 Estimate; Total: - $15,000 to $19,999 Estimate; Total: - $20,000 to $24,999 Estimate; Total: - $25,000 to $29,999 ... Estimate; Total: - $40,000 to $44,999 Estimate; Total: - $45,000 to $49,999 Estimate; Total: - $50,000 to $59,999 Estimate; Total: - $60,000 to $74,999 Estimate; Total: - $75,000 to $99,999 Estimate; Total: - $100,000 to $124,999 Estimate; Total: - $125,000 to $149,999 Estimate; Total: - $150,000 to $199,999 Estimate; Total: - $200,000 or more NaN
1 1 1500000US360470001001 360470001001 Block Group 1, Census Tract 1, Kings County, N... 308 54 14 0 0 0 ... 5 6 13 21 37 22 5 39 87 POLYGON ((-73.99669799999999 40.700877, -73.99...
2 2 1500000US360470001002 360470001002 Block Group 2, Census Tract 1, Kings County, N... 291 33 15 0 0 0 ... 12 0 0 13 0 13 0 0 119 POLYGON ((-73.995379 40.700309, -73.993672 40....
3 3 1500000US360470001003 360470001003 Block Group 3, Census Tract 1, Kings County, N... 710 33 51 0 27 13 ... 0 16 14 29 151 96 33 118 71 POLYGON ((-73.993672 40.699836, -73.9926119999...
4 4 1500000US360470001004 360470001004 Block Group 4, Census Tract 1, Kings County, N... 797 47 30 21 88 0 ... 45 0 79 108 107 23 59 32 123 POLYGON ((-73.99271 40.698097, -73.991759 40.7...

5 rows × 22 columns

Good to go!


In [18]:
data2013.columns


Out[18]:
Index([u'Unnamed: 0', u'GEO.id', u'GEO.id2', u'GEO.display-label',
       u'HD01_VD01', u'HD01_VD02', u'HD01_VD03', u'HD01_VD04', u'HD01_VD05',
       u'HD01_VD06', u'HD01_VD07', u'HD01_VD08', u'HD01_VD09', u'HD01_VD10',
       u'HD01_VD11', u'HD01_VD12', u'HD01_VD13', u'HD01_VD14', u'HD01_VD15',
       u'HD01_VD16', u'HD01_VD17', u'geometry'],
      dtype='object')

In [19]:
data2013.columns = [[u'Unnamed: 0', u'GEO.id', u'GEO.id2', u'GEO.display-label',
       u'VD01', u'VD02', u'VD03', u'VD04', u'VD05',
       u'VD06', u'VD07', u'VD08', u'VD09', u'VD10',
       u'VD11', u'VD12', u'VD13', u'VD14', u'VD15',
       u'VD16', u'VD17', u'geometry']]

In [20]:
data2013.drop([0], inplace=True)
data2013.head()


Out[20]:
Unnamed: 0 GEO.id GEO.id2 GEO.display-label VD01 VD02 VD03 VD04 VD05 VD06 ... VD09 VD10 VD11 VD12 VD13 VD14 VD15 VD16 VD17 geometry
1 1 1500000US360470001001 360470001001 Block Group 1, Census Tract 1, Kings County, N... 308 54 14 0 0 0 ... 5 6 13 21 37 22 5 39 87 POLYGON ((-73.99669799999999 40.700877, -73.99...
2 2 1500000US360470001002 360470001002 Block Group 2, Census Tract 1, Kings County, N... 291 33 15 0 0 0 ... 12 0 0 13 0 13 0 0 119 POLYGON ((-73.995379 40.700309, -73.993672 40....
3 3 1500000US360470001003 360470001003 Block Group 3, Census Tract 1, Kings County, N... 710 33 51 0 27 13 ... 0 16 14 29 151 96 33 118 71 POLYGON ((-73.993672 40.699836, -73.9926119999...
4 4 1500000US360470001004 360470001004 Block Group 4, Census Tract 1, Kings County, N... 797 47 30 21 88 0 ... 45 0 79 108 107 23 59 32 123 POLYGON ((-73.99271 40.698097, -73.991759 40.7...
5 5 1500000US360470002001 360470002001 Block Group 1, Census Tract 2, Kings County, N... 371 48 5 48 21 29 ... 5 19 47 23 16 33 0 5 0 POLYGON ((-74.012844 40.653016, -74.0150479999...

5 rows × 22 columns


In [21]:
#Checking the CSV
data2000 = pd.read_csv('income_2000.csv')
data2000.head()


Out[21]:
Unnamed: 0 GEO.id GEO.id2 GEO.display-label VD01 VD02 VD03 VD04 VD05 VD06 ... VD09 VD10 VD11 VD12 VD13 VD14 VD15 VD16 VD17 geometry
0 0 Id Id2 Geography Total: Less than $10,000 $10,000 to $14,999 $15,000 to $19,999 $20,000 to $24,999 $25,000 to $29,999 ... $40,000 to $44,999 $45,000 to $49,999 $50,000 to $59,999 $60,000 to $74,999 $75,000 to $99,999 $100,000 to $124,999 $125,000 to $149,999 $150,000 to $199,999 $200,000 or more NaN
1 1 1500000US360470001001 360470001001 Block Group 1, Census Tract 1, Kings County, N... 859 48 45 16 31 18 ... 31 8 70 105 68 115 52 39 119 POLYGON ((-73.99669799999999 40.700877, -73.99...
2 2 1500000US360470001002 360470001002 Block Group 2, Census Tract 1, Kings County, N... 1303 63 0 66 44 22 ... 94 86 147 174 57 151 131 87 52 POLYGON ((-73.995379 40.700309, -73.993672 40....
3 3 1500000US360470001003 360470001003 Block Group 3, Census Tract 1, Kings County, N... 246 16 6 10 23 10 ... 13 12 16 31 23 24 8 15 18 POLYGON ((-73.993672 40.699836, -73.9926119999...
4 4 1500000US360470002001 360470002001 Block Group 1, Census Tract 2, Kings County, N... 118 6 10 15 0 12 ... 3 10 15 0 15 7 0 0 0 POLYGON ((-74.012844 40.653016, -74.0150479999...

5 rows × 22 columns

Good to go!


In [22]:
data2000.columns


Out[22]:
Index([u'Unnamed: 0', u'GEO.id', u'GEO.id2', u'GEO.display-label', u'VD01',
       u'VD02', u'VD03', u'VD04', u'VD05', u'VD06', u'VD07', u'VD08', u'VD09',
       u'VD10', u'VD11', u'VD12', u'VD13', u'VD14', u'VD15', u'VD16', u'VD17',
       u'geometry'],
      dtype='object')

In [23]:
data2000.columns = [[u'Unnamed: 0', u'GEO.id', u'GEO.id2', u'GEO.display-label',
       u'VD01', u'VD02', u'VD03', u'VD04', u'VD05',
       u'VD06', u'VD07', u'VD08', u'VD09', u'VD10',
       u'VD11', u'VD12', u'VD13', u'VD14', u'VD15',
       u'VD16', u'VD17', u'geometry']]

In [24]:
data2000.drop([0], inplace=True)
data2000.head()


Out[24]:
Unnamed: 0 GEO.id GEO.id2 GEO.display-label VD01 VD02 VD03 VD04 VD05 VD06 ... VD09 VD10 VD11 VD12 VD13 VD14 VD15 VD16 VD17 geometry
1 1 1500000US360470001001 360470001001 Block Group 1, Census Tract 1, Kings County, N... 859 48 45 16 31 18 ... 31 8 70 105 68 115 52 39 119 POLYGON ((-73.99669799999999 40.700877, -73.99...
2 2 1500000US360470001002 360470001002 Block Group 2, Census Tract 1, Kings County, N... 1303 63 0 66 44 22 ... 94 86 147 174 57 151 131 87 52 POLYGON ((-73.995379 40.700309, -73.993672 40....
3 3 1500000US360470001003 360470001003 Block Group 3, Census Tract 1, Kings County, N... 246 16 6 10 23 10 ... 13 12 16 31 23 24 8 15 18 POLYGON ((-73.993672 40.699836, -73.9926119999...
4 4 1500000US360470002001 360470002001 Block Group 1, Census Tract 2, Kings County, N... 118 6 10 15 0 12 ... 3 10 15 0 15 7 0 0 0 POLYGON ((-74.012844 40.653016, -74.0150479999...
5 5 1500000US360470002002 360470002002 Block Group 2, Census Tract 2, Kings County, N... 232 46 29 18 25 19 ... 4 5 14 16 12 0 11 0 0 NaN

5 rows × 22 columns


In [25]:
data1 = pd.read_csv('metrotech.csv')
data1.head()


Out[25]:
Unnamed: 0 GEO.id
0 0 1500000US360470033002
1 1 1500000US360470037001
2 2 1500000US360470009002
3 3 1500000US360470043002
4 4 1500000US360470043001

In [28]:
met2000 = pd.merge(data2000, data1,on='GEO.id', how='right')
met2000.head()


Out[28]:
Unnamed: 0_x GEO.id GEO.id2 GEO.display-label VD01 VD02 VD03 VD04 VD05 VD06 ... VD10 VD11 VD12 VD13 VD14 VD15 VD16 VD17 geometry Unnamed: 0_y
0 19 1500000US360470007003 360470007003 Block Group 3, Census Tract 7, Kings County, N... 516 10 0 25 40 8 ... 9 22 52 75 47 16 40 120 POLYGON ((-73.995914 40.691416, -73.99508 40.6... 23
1 20 1500000US360470009001 360470009001 Block Group 1, Census Tract 9, Kings County, N... 682 76 29 19 33 7 ... 33 21 62 125 44 50 52 83 POLYGON ((-73.99310299999999 40.692846, -73.99... 11
2 20 1500000US360470009001 360470009001 Block Group 1, Census Tract 9, Kings County, N... 682 76 29 19 33 7 ... 33 21 62 125 44 50 52 83 POLYGON ((-73.99310299999999 40.692846, -73.99... 12
3 21 1500000US360470009002 360470009002 Block Group 2, Census Tract 9, Kings County, N... 366 40 9 0 10 7 ... 19 45 25 35 29 59 19 39 POLYGON ((-73.993736 40.69153499999999, -73.98... 2
4 22 1500000US360470011001 360470011001 Block Group 1, Census Tract 11, Kings County, ... 75 0 0 0 0 42 ... 0 0 0 8 0 0 0 0 POLYGON ((-73.990447 40.693735, -73.990663 40.... 10

5 rows × 23 columns


In [29]:
#Saving the dataset as csv
met2000.to_csv('met_2000.csv')

In [30]:
met2013 = pd.merge(data2013, data1,on='GEO.id', how='right')
met2013.head()


Out[30]:
Unnamed: 0_x GEO.id GEO.id2 GEO.display-label VD01 VD02 VD03 VD04 VD05 VD06 ... VD10 VD11 VD12 VD13 VD14 VD15 VD16 VD17 geometry Unnamed: 0_y
0 18 1500000US360470007003 360470007003 Block Group 3, Census Tract 7, Kings County, N... 761 40 0 0 9 18 ... 0 35 14 65 63 92 71 279 POLYGON ((-73.995914 40.691416, -73.99508 40.6... 23
1 19 1500000US360470009001 360470009001 Block Group 1, Census Tract 9, Kings County, N... 1116 74 17 0 0 25 ... 57 47 138 110 145 48 96 307 POLYGON ((-73.99310299999999 40.692846, -73.99... 11
2 19 1500000US360470009001 360470009001 Block Group 1, Census Tract 9, Kings County, N... 1116 74 17 0 0 25 ... 57 47 138 110 145 48 96 307 POLYGON ((-73.99310299999999 40.692846, -73.99... 12
3 20 1500000US360470009002 360470009002 Block Group 2, Census Tract 9, Kings County, N... 934 69 0 8 45 24 ... 0 49 12 45 85 30 187 304 POLYGON ((-73.993736 40.69153499999999, -73.98... 2
4 21 1500000US360470011001 360470011001 Block Group 1, Census Tract 11, Kings County, ... 313 0 7 0 4 0 ... 0 21 30 40 48 40 90 21 POLYGON ((-73.990447 40.693735, -73.990663 40.... 10

5 rows × 23 columns


In [31]:
#Saving the dataset as csv
met2013.to_csv('met_2013.csv')

In [32]:
data1 = pd.read_csv('willy.csv')
data1.head()


Out[32]:
Unnamed: 0 GEO.id
0 0 1500000US360470551003
1 3 1500000US360470551002
2 6 1500000US360470519003
3 14 1500000US360470551001
4 22 1500000US360470523004

In [33]:
willy2000 = pd.merge(data2000, data1,on='GEO.id', how='right')
willy2000.head()


Out[33]:
Unnamed: 0_x GEO.id GEO.id2 GEO.display-label VD01 VD02 VD03 VD04 VD05 VD06 ... VD10 VD11 VD12 VD13 VD14 VD15 VD16 VD17 geometry Unnamed: 0_y
0 1307 1500000US360470517001 360470517001 Block Group 1, Census Tract 517, Kings County,... 191 19 14 11 0 16 ... 0 7 53 38 0 0 0 9 POLYGON ((-73.95713099999999 40.718388, -73.95... 107
1 1308 1500000US360470517002 360470517002 Block Group 2, Census Tract 517, Kings County,... 525 64 32 23 22 45 ... 20 88 71 46 28 11 0 0 POLYGON ((-73.958416 40.719189, -73.955443 40.... 111
2 1312 1500000US360470519001 360470519001 Block Group 1, Census Tract 519, Kings County,... 405 87 9 21 38 21 ... 39 20 10 22 9 10 0 0 POLYGON ((-73.955844 40.717589, -73.9552509999... 71
3 1313 1500000US360470519002 360470519002 Block Group 2, Census Tract 519, Kings County,... 248 21 27 7 22 20 ... 9 33 20 7 0 0 0 0 POLYGON ((-73.956439 40.717032, -73.952754 40.... 56
4 1314 1500000US360470519003 360470519003 Block Group 3, Census Tract 519, Kings County,... 678 117 37 36 24 67 ... 43 50 65 49 30 0 9 5 POLYGON ((-73.958811 40.714824, -73.9582229999... 6

5 rows × 23 columns


In [34]:
#Saving the dataset as csv
willy2000.to_csv('wil_2000.csv')

In [35]:
willy2013 = pd.merge(data2013, data1,on='GEO.id', how='right')
willy2013.head()


Out[35]:
Unnamed: 0_x GEO.id GEO.id2 GEO.display-label VD01 VD02 VD03 VD04 VD05 VD06 ... VD10 VD11 VD12 VD13 VD14 VD15 VD16 VD17 geometry Unnamed: 0_y
0 1298 1500000US360470517001 360470517001 Block Group 1, Census Tract 517, Kings County,... 293 36 10 14 0 7 ... 7 30 21 53 18 0 37 20 POLYGON ((-73.95713099999999 40.718388, -73.95... 107
1 1299 1500000US360470517002 360470517002 Block Group 2, Census Tract 517, Kings County,... 621 22 19 28 31 0 ... 89 17 55 106 71 35 43 98 POLYGON ((-73.958416 40.719189, -73.955443 40.... 111
2 1303 1500000US360470519001 360470519001 Block Group 1, Census Tract 519, Kings County,... 788 52 28 18 14 13 ... 76 26 31 140 72 39 91 94 POLYGON ((-73.955844 40.717589, -73.9552509999... 71
3 1304 1500000US360470519002 360470519002 Block Group 2, Census Tract 519, Kings County,... 571 29 10 0 26 30 ... 0 12 20 202 9 28 111 70 POLYGON ((-73.956439 40.717032, -73.952754 40.... 56
4 1305 1500000US360470519003 360470519003 Block Group 3, Census Tract 519, Kings County,... 563 13 38 72 0 0 ... 19 28 98 95 20 0 10 60 POLYGON ((-73.958811 40.714824, -73.9582229999... 6

5 rows × 23 columns


In [36]:
#Saving the dataset as csv
willy2013.to_csv('data/will_2013.csv')