In [10]:
import pandas as pd
In [11]:
df = pd.read_csv('MedianValueSqFt_by_zip.csv', converters={'RegionName': lambda x: str(x)})
In [12]:
df2 = pd.read_csv('ACS.csv', converters={'zip.code.tabulation.area.': lambda x: str(x)})
In [13]:
df2.head()
Out[13]:
In [14]:
df2['RegionName'] = df2['zip.code.tabulation.area.']
In [15]:
res_df = df.merge(df2, how='right', on='RegionName')
In [16]:
res_df.reset_index(drop=True).head()
Out[16]:
In [8]:
res_df['RegionName'] = res_df['RegionName'].astype(str)
In [17]:
res_df.head()
Out[17]:
In [18]:
res_df.count()
Out[18]:
In [19]:
res_df.to_csv('right_joined_census.csv')
In [20]:
trunc_csv = res_df[['RegionID', 'RegionName', 'City', 'State', 'Metro', 'CountyName', '2015-01', '2016-01', '2016-02', 'B01001_011E', 'B01001_035E', 'B02001_002E', 'B02001_003E', 'B02001_004E', 'B02001_005E', 'B02001_006E', 'B02001_007E', 'B02001_008E', 'B03001_003E', 'B19013_001E', 'B19113_001E', 'B25001_001E', 'B25002_002E', 'B25003_002E', 'B25075_023E', 'B25075_024E', 'B25075_025E', 'B25002_003E', 'B25077_001E', 'B25064_001E'
]]
In [21]:
trunc_csv.count()
Out[21]:
In [23]:
trunc_csv.to_csv('truncated_csv.csv',index=False)
In [ ]: