In [1]:
import pandas as pd
import numpy as np
atlas_df = pd.read_csv('food_atlas_local.csv')
fruit_df = pd.read_csv('fruit.csv')
veg_df = pd.read_csv('vegetables.csv')
berry_df = pd.read_csv('berries.csv')
rural_df = pd.read_csv('ruralurbancodes2013.csv')
In [2]:
#rename atlas_df column to match other files
atlas_df.rename(columns={'id':'FIPS'}, inplace=True)
#rename
atlas_df.rename(columns={'DIRSALES_FARMS07':'Direct Sale Farms'}, inplace=True)
In [3]:
atlas_df.head()
Out[3]:
In [4]:
print len(fruit_df)
print len(veg_df)
print len(berry_df)
print len(atlas_df)
print len(rural_df)
In [5]:
master_df = pd.merge(atlas_df, rural_df, on='FIPS', how='outer')
len(master_df)
Out[5]:
In [6]:
master_df = pd.merge(master_df, veg_df, on='FIPS', how='outer')
len(master_df)
Out[6]:
In [7]:
master_df = pd.merge(master_df, fruit_df, on='FIPS', how='outer')
len(master_df)
Out[7]:
In [8]:
master_df = pd.merge(master_df, berry_df, on='FIPS', how='outer')
len(master_df)
Out[8]:
In [9]:
master_df.FIPS.astype(int)
master_df = master_df.set_index('FIPS')
In [10]:
# remove duplicate state column resulting from merge
master_df = master_df.drop('State_y', 1)
In [11]:
master_df.rename(columns={'State_x':'State'}, inplace=True)
In [12]:
master_df.head()
Out[12]:
In [13]:
master_df.to_csv('master_data.csv')
In [14]:
len(master_df)
Out[14]:
In [15]:
# needed to drop one duplicate row for json convert to work on unique rows
master_df2 = master_df.drop_duplicates()
len(master_df2)
Out[15]:
In [17]:
master_df2.to_json('master_data.json', orient='columns', double_precision=0)