In [180]:
import pandas as pd

In [181]:
salary_data = "oesdata14/oesm14ma/MSA_M2014_dl.xlsx"

In [182]:
salframe = pd.read_excel(salary_data)

In [183]:
mystatesframe = salframe[(salframe.PRIM_STATE=='NJ')|(salframe.PRIM_STATE=='PA')|(salframe.PRIM_STATE=='DE')|(salframe.PRIM_STATE=='NY')]

In [184]:
myalloccframe = mystatesframe[njframe.OCC_TITLE == 'All Occupations']

In [185]:
njalloccframe=myalloccframe[myallframe['AREA_NAME'].str.contains('NJ')]

In [186]:
njourframe = njallframe.ix[:,['AREA', 'AREA_NAME', 'TOT_EMP', 'A_MEDIAN']]

In [187]:
njmsds=njourframe[njourframe.AREA_NAME.str.contains('Division')]
njmsa=njourframe[njourframe['AREA_NAME'].str.endswith('NJ')]

In [188]:
msa_data = "msacounties.csv"
msd_data = "msdcounties.csv"

In [189]:
msaframe = pd.read_csv(msa_data, names = ['ID', 'MSA Name', 'FIPS', 'County Name'], skipfooter=2)
msdframe = pd.read_csv(msd_data, names = ['ID', 'MSA Name', 'FIPS', 'County Name'], skipfooter=2)

In [190]:
msdframe.ID[msdframe.ID==35614]=35644

In [191]:
joinedmsaframe = pd.merge(njmsa, msaframe, left_on='AREA', right_on='ID', how='inner')

In [192]:
joinedmsdframe = pd.merge(njmsds, msdframe, left_on='AREA', right_on='ID', how='inner')

In [193]:
njmsds = joinedmsdframe[joinedmsdframe['County Name'].str.contains('NJ')]

In [194]:
njmsas= joinedmsaframe[joinedmsaframe['County Name'].str.contains('NJ')]

In [195]:
mycounties = njmsds.append(njmsas)

In [220]:
mediansals = mycounties.ix[:, ['FIPS', 'County Name', 'A_MEDIAN']]
mediansals['County Name']=mediansals['County Name'].map(str.strip)

In [221]:
mediansals['County Name'] = mediansals['County Name'].map(str.upper)

In [232]:
mediansals['County Name'] = mediansals['County Name'].apply(lambda x: x.split(',')[0]+ ' COUNTY')

In [233]:
mediansals.to_csv('county_median_sals.csv', index=False)

In [ ]: