In [1]:
from __future__ import print_function, division
import pandas as pd
import pylab as pl
import os
import csv
from pandas.tools.plotting import scatter_matrix
import sys
reload(sys)
import zipfile
import geopandas as gpd
sys.setdefaultencoding('utf-8')
%pylab inline
In [2]:
#Unzippping downloaded median income data of 1999 in a dataframe
zipfile.ZipFile(os.path.join("data/income_00.zip")).extractall(r"data/income_00")
In [3]:
#Reading and saving downloaded median income data of 1999 in a dataframe
data_2000 = pd.read_csv('data/income_00/DEC_00_SF3_P052_with_ann.csv')
data_2000.head()
Out[3]:
In [4]:
#Unzipping downloaded median income data of 2013 in a dataframe
zipfile.ZipFile(os.path.join("data/income_13.zip")).extractall(r"data/income_13")
In [5]:
#Reading and saving downloaded median income data of 2013 in a dataframe
data_2013 = pd.read_csv('data/income_13/ACS_13_5YR_B19001_with_ann.csv')
data_2013.head()
Out[5]:
In [6]:
data_2013.columns
Out[6]:
In [7]:
data_2013 = data_2013[[u'GEO.id', u'GEO.id2', u'GEO.display-label', u'HD01_VD01', u'HD01_VD02', u'HD01_VD03', u'HD01_VD04',
u'HD01_VD05', u'HD01_VD06', u'HD01_VD07', u'HD01_VD08', u'HD01_VD09', u'HD01_VD10', u'HD01_VD11',
u'HD01_VD12', u'HD01_VD13', u'HD01_VD14', u'HD01_VD15', u'HD01_VD16', u'HD01_VD17']]
In [8]:
data_2013.head()
Out[8]:
In [9]:
#Unzipping downloaded nyc shapefile in a dataframe
zipfile.ZipFile(os.path.join("data/cb_2015_36_bg_500k.zip")).extractall(r"data/cb_2015_36_bg_500k")
In [10]:
# loading shape file for NYC
nyc_shape = gpd.read_file("data/cb_2015_36_bg_500k/cb_2015_36_bg_500k.shp")
nyc_shape.head()
Out[10]:
In [11]:
nyc_shape.columns = [[u'GEO.id', u'ALAND', u'AWATER', u'BLKGRPCE', u'COUNTYFP',
u'GEO.id2', u'LSAD', u'NAME', u'STATEFP', u'TRACTCE',
u'geometry']]
In [12]:
nyc_shape.head()
Out[12]:
In [13]:
bky_shape = nyc_shape[[u'GEO.id', u'GEO.id2', u'geometry']]
bky_shape.head()
Out[13]:
In [ ]:
In [14]:
#Merging 2000 the dataframes to a mother dataframe
income_2000 = pd.merge(data_2000, bky_shape, how='left', on=['GEO.id', 'GEO.id2'])
income_2000.head()
Out[14]:
In [15]:
#Merging 2013 the dataframes to a mother dataframe
income_2013 = pd.merge(data_2013, bky_shape, how='left', on=['GEO.id', 'GEO.id2'])
income_2013.head()
Out[15]:
In [ ]:
In [16]:
#Saving the dataset as csv
income_2000.to_csv('income_2000.csv')
income_2013.to_csv('income_2013.csv')
In [17]:
#Checking the CSV
data2013 = pd.read_csv('income_2013.csv')
data2013.head()
Out[17]:
In [18]:
data2013.columns
Out[18]:
In [19]:
data2013.columns = [[u'Unnamed: 0', u'GEO.id', u'GEO.id2', u'GEO.display-label',
u'VD01', u'VD02', u'VD03', u'VD04', u'VD05',
u'VD06', u'VD07', u'VD08', u'VD09', u'VD10',
u'VD11', u'VD12', u'VD13', u'VD14', u'VD15',
u'VD16', u'VD17', u'geometry']]
In [20]:
data2013.drop([0], inplace=True)
data2013.head()
Out[20]:
In [21]:
#Checking the CSV
data2000 = pd.read_csv('income_2000.csv')
data2000.head()
Out[21]:
In [22]:
data2000.columns
Out[22]:
In [23]:
data2000.columns = [[u'Unnamed: 0', u'GEO.id', u'GEO.id2', u'GEO.display-label',
u'VD01', u'VD02', u'VD03', u'VD04', u'VD05',
u'VD06', u'VD07', u'VD08', u'VD09', u'VD10',
u'VD11', u'VD12', u'VD13', u'VD14', u'VD15',
u'VD16', u'VD17', u'geometry']]
In [24]:
data2000.drop([0], inplace=True)
data2000.head()
Out[24]:
In [25]:
data1 = pd.read_csv('metrotech.csv')
data1.head()
Out[25]:
In [28]:
met2000 = pd.merge(data2000, data1,on='GEO.id', how='right')
met2000.head()
Out[28]:
In [29]:
#Saving the dataset as csv
met2000.to_csv('met_2000.csv')
In [30]:
met2013 = pd.merge(data2013, data1,on='GEO.id', how='right')
met2013.head()
Out[30]:
In [31]:
#Saving the dataset as csv
met2013.to_csv('met_2013.csv')
In [32]:
data1 = pd.read_csv('willy.csv')
data1.head()
Out[32]:
In [33]:
willy2000 = pd.merge(data2000, data1,on='GEO.id', how='right')
willy2000.head()
Out[33]:
In [34]:
#Saving the dataset as csv
willy2000.to_csv('wil_2000.csv')
In [35]:
willy2013 = pd.merge(data2013, data1,on='GEO.id', how='right')
willy2013.head()
Out[35]:
In [36]:
#Saving the dataset as csv
willy2013.to_csv('will_2013.csv')