(c) Karen Belita
Team Neighborhood Change
Last Updated 9/7/2016
To create a table that has the CBSA code for City and State abbreviation that belongs to that MSA that. MSA = Metropolitan Statistical Area
CBSA-CITY_STATE.csv --- for ing_wr_walkscore and ing_wr_FTA
In [6]:
import os
import requests
import pandas as pd
import csv
import urllib2
import openpyxl
import csv
import xlrd
import numpy as np
In [8]:
xls_file = os.path.join(os.getcwd(), 'MSA_principal.xls')
data_xls = pd.read_excel(xls_file, 'List 2', index_col=None)
data_xls.to_csv('principal.csv', encoding='utf-8')
In [9]:
df = pd.read_csv('principal.csv')
In [10]:
df.columns = ["DELETE", "CBSA", "CBSA_TITLE", "CBSA TYPE", "PRINCIPAL", "FIPS_STATE", "FIPS_PLACE"]
In [14]:
df.head(10)
Out[14]:
In [15]:
df = df.drop([0,1]) ## drop rows
In [16]:
df.head()
Out[16]:
In [17]:
df.drop(df.columns[[0]], axis=1, inplace=True) ## remove column 0
In [18]:
df["FIPS"] = df["FIPS_STATE"].map(str) + df["FIPS_PLACE"].map(str) ## creates new wrong column called MSA
In [19]:
df.head()
Out[19]:
In [24]:
df_ABB = df.drop(df.columns[[2,5,6]], axis=1) ## remove FIPS and FIPS_PLACE
In [25]:
df_ABB.drop(df_ABB.tail(4).index, inplace=True) ## remove the bottom
In [26]:
df_ABB.tail()
Out[26]:
In [23]:
df_ABB.head()
Out[23]:
In [27]:
xls_state = os.path.join(os.getcwd(), 'MSA_STATE.xls')
data_state= pd.read_excel(xls_state, 'cqr_universe_fixedwidth_all', index_col=None)
data_state.to_csv('state.csv', encoding='utf-8')
In [28]:
dfs_x= pd.read_csv('state.csv')
In [29]:
dfs_x.drop(dfs_x.columns[[0,3,4,5,6,7]], axis=1, inplace=True) ## remove column 0
In [30]:
dfs_x.columns = ["STATE_ABB", "FIPS_STATE"]
In [31]:
dfs_x["FIPS_STATE"] = dfs_x.FIPS_STATE.apply(lambda x: str(int(x)).zfill(2))
In [32]:
dfs_x.head()
Out[32]:
In [65]:
df_c_abb = pd.merge(df_ABB, dfs_x, how = "left", on = "FIPS_STATE")
In [66]:
df_c_abb = df_c_abb.drop_duplicates(take_last=True) ## REMOVE DUPLICATES
In [67]:
df_c_abb.head()
Out[67]:
In [68]:
df_c_abb.reset_index(level=0, inplace=True) ## reset
In [69]:
df_c_abb.head()
Out[69]:
In [70]:
df_c_abb.drop(df_c_abb.columns[[0,2,4]], axis=1, inplace=True) ## remove column 0
In [71]:
df_c_abb.head()
Out[71]:
In [72]:
df_c_abb.columns = ["CBSA", "CITY", "STATE"]
In [73]:
df_c_abb.head()
Out[73]:
In [40]:
df_c_abb.to_csv("CBSA-CITY-ST.csv")
In [ ]: