In [24]:
import json
import pandas as pd

In [2]:
active_pensions = "active_pensions.csv"

In [9]:
apframe = pd.read_csv(active_pensions)

In [10]:
apframe.MEMBER_MI=apframe.MEMBER_MI.fillna(value='none')
apframe.MEMBER_LAST_NAME = apframe.MEMBER_LAST_NAME.fillna(value='none')
apframe.MEMBER_FIRST_NAME = apframe.MEMBER_FIRST_NAME.fillna(value='none')
apframe.ALL_EMPLOYERS_SALARY_AMT=apframe.ALL_EMPLOYERS_SALARY_AMT.apply(lambda x: float(x[1:]))

In [11]:
deduped = apframe.drop_duplicates(['MEMBER_LAST_NAME', 'MEMBER_FIRST_NAME', 'MEMBER_MI', 'BIRTH_YEAR'])

In [14]:
statewide = deduped[deduped.LOCATION_NAME=='STATEWIDE']

In [15]:
state_median = statewide.ALL_EMPLOYERS_SALARY_AMT.median()

In [16]:
rest = apframe[apframe.LOCATION_NAME!="STATEWIDE"]

In [17]:
counties = rest.LOCATION_NAME.unique()
data_list = {}

In [18]:
county_meds = "county_median_sals.csv"
overall_medians = pd.read_csv(county_meds, dtype={'FIPS':'str'})

In [19]:
combined=pd.merge(rest, overall_medians, left_on="LOCATION_NAME", right_on="County Name", how="left")

In [20]:
for county in counties:
    if county=='REGIONAL':
        pass
    else:
        FIPS = combined.FIPS[combined.LOCATION_NAME==county].unique()[0]
        county_dict={}
        this_county = combined[combined.LOCATION_NAME==county]
        teachers = this_county[this_county.PENSION_FUND_ID==1]
        public = this_county[this_county.PENSION_FUND_ID==2]
        copsfire = this_county[this_county.PENSION_FUND_ID==3]
        teach_med = teachers.ALL_EMPLOYERS_SALARY_AMT.median()
        public_med = public.ALL_EMPLOYERS_SALARY_AMT.median()
        cops_med = copsfire[copsfire.PENSION_GROUP_ID==1].ALL_EMPLOYERS_SALARY_AMT.median()
        fire_med = copsfire[copsfire.PENSION_GROUP_ID==2].ALL_EMPLOYERS_SALARY_AMT.median()
        overall=this_county.A_MEDIAN.unique()
        county_dict['OVERALL']=overall[0]
        county_dict['COUNTY']=county
        county_dict['STATEWIDE']=state_median
        county_dict['PUBLIC_EMPLOYEES']=public_med
        county_dict['TEACHERS']=teach_med
        county_dict['POLICE']=cops_med
        county_dict['FIRE']=fire_med
        data_list[FIPS]=county_dict

In [169]:
my_json="data.json"

with open(my_json, "wb") as j:
    json.dump(data_list, j)