In [1]:
import pandas as pd
import numpy as np

atlas_df = pd.read_csv('food_atlas_local.csv')
fruit_df = pd.read_csv('fruit.csv') 
veg_df = pd.read_csv('vegetables.csv')
berry_df = pd.read_csv('berries.csv')
rural_df = pd.read_csv('ruralurbancodes2013.csv')

In [2]:
#rename atlas_df column to match other files
atlas_df.rename(columns={'id':'FIPS'}, inplace=True)

#rename 
atlas_df.rename(columns={'DIRSALES_FARMS07':'Direct Sale Farms'}, inplace=True)

In [3]:
atlas_df.head()


Out[3]:
FIPS State County Direct Sale Farms PCT_LOCLFARM07 PCT_LOCLSALE07 DIRSALES07 PC_DIRSALES07 FMRKT09 FMRKT13 PCH_FMRKT_09_13 FMRKTPTH09 FMRKTPTH13 PCH_FMRKTPTH_09_13 FMRKT_SNAP13 PCT_FMRKT_SNAP13 FMRKT_WIC13 PCT_FMRKT_WIC13 FMRKT_WICCASH13 PCT_FMRKT_WICCASH13
0 1001 AL Autauga 25 6.0 0.6 100 2.01 2 1 -50 0.039404 0.018013 -54.285405 0 0 0 0 0 0 ...
1 1003 AL Baldwin 80 7.0 0.7 715 4.16 4 4 0 0.022237 0.020965 -5.719377 0 0 0 0 0 0 ...
2 1005 AL Barbour 18 2.9 0.0 11 0.37 2 3 50 0.067256 0.110290 63.984780 0 0 0 0 0 0 ...
3 1007 AL Bibb 12 5.7 NaN 46 2.14 1 1 0 0.046324 0.044254 -4.469620 0 0 0 0 0 0 ...
4 1009 AL Blount 84 5.9 0.3 429 7.59 1 1 0 0.017139 0.017293 0.897520 0 0 1 100 0 0 ...

5 rows × 49 columns


In [4]:
print len(fruit_df)
print len(veg_df)
print len(berry_df)
print len(atlas_df)
print len(rural_df)


2634
2788
2339
3178
3221

In [5]:
master_df = pd.merge(atlas_df, rural_df, on='FIPS', how='outer')
len(master_df)


Out[5]:
3256

In [6]:
master_df = pd.merge(master_df, veg_df, on='FIPS', how='outer')
len(master_df)


Out[6]:
3256

In [7]:
master_df = pd.merge(master_df, fruit_df, on='FIPS', how='outer')
len(master_df)


Out[7]:
3256

In [8]:
master_df = pd.merge(master_df, berry_df, on='FIPS', how='outer')
len(master_df)


Out[8]:
3256

In [9]:
master_df.FIPS.astype(int)
master_df = master_df.set_index('FIPS')

In [10]:
# remove duplicate state column resulting from merge
master_df = master_df.drop('State_y', 1)

In [11]:
master_df.rename(columns={'State_x':'State'}, inplace=True)

In [12]:
master_df.head()


Out[12]:
State County Direct Sale Farms PCT_LOCLFARM07 PCT_LOCLSALE07 DIRSALES07 PC_DIRSALES07 FMRKT09 FMRKT13 PCH_FMRKT_09_13 FMRKTPTH09 FMRKTPTH13 PCH_FMRKTPTH_09_13 FMRKT_SNAP13 PCT_FMRKT_SNAP13 FMRKT_WIC13 PCT_FMRKT_WIC13 FMRKT_WICCASH13 PCT_FMRKT_WICCASH13 FMRKT_SFMNP13
FIPS
1001 AL Autauga 25 6.0 0.6 100 2.01 2 1 -50 0.039404 0.018013 -54.285405 0 0 0 0 0 0 0 ...
1003 AL Baldwin 80 7.0 0.7 715 4.16 4 4 0 0.022237 0.020965 -5.719377 0 0 0 0 0 0 3 ...
1005 AL Barbour 18 2.9 0.0 11 0.37 2 3 50 0.067256 0.110290 63.984780 0 0 0 0 0 0 0 ...
1007 AL Bibb 12 5.7 NaN 46 2.14 1 1 0 0.046324 0.044254 -4.469620 0 0 0 0 0 0 0 ...
1009 AL Blount 84 5.9 0.3 429 7.59 1 1 0 0.017139 0.017293 0.897520 0 0 1 100 0 0 1 ...

5 rows × 147 columns


In [13]:
master_df.to_csv('master_data.csv')

In [14]:
len(master_df)


Out[14]:
3256

In [15]:
# needed to drop one duplicate row for json convert to work on unique rows
master_df2 = master_df.drop_duplicates()
len(master_df2)


Out[15]:
3255

In [17]:
master_df2.to_json('master_data.json', orient='columns', double_precision=0)