In [1]:
import pandas as pd
In [2]:
df = pd.read_csv('files/all_claims_files.csv')
In [4]:
#df.columns
In [5]:
df['year'] = df.file_name.map(lambda x: x[6:10])
In [6]:
df.year.value_counts()
Out[6]:
In [7]:
# Re-index the Data Frame to have a two-dimensional index
df = df.set_index(['DESYNPUF_ID', 'year'])
In [9]:
# Split the Data Frame
gb = df.groupby(level=[0, 1])
In [10]:
# Define a trend assumption by service type
trend = {}
trend['IP'] = 1.06
trend['CAR'] = 1.03
trend['OP'] = 1.05
In [11]:
# The apply function is passed the group as a Data Frame.
# x.name is the index (a tuple if the index is more than one dimension)
def calculate_premium(x):
member_name = x.name[0] # use the x.name attribute to access the first element of the index tuple
year = x.name[1] # use the x.name attribute to access the second element of the index tuple
member = x.loc[member_name] # use the loc method to access the member
member_year = member.loc[year] # use the loc method to access the year
# calculate the premium
if year == '2008':
weight = 0.1
trend_exp = 2
elif year == '2009':
weight = 0.3
trend_exp = 1
else:
weight = 0.6
trend_exp = 0
exposure = member_year['PLAN_CVRG_MOS_NUM']
claims_amt = member_year['MEDREIMB_IP'] * trend['IP'] ** trend_exp + \
member_year['MEDREIMB_OP'] * trend['OP'] ** trend_exp + \
member_year['MEDREIMB_CAR'] * trend['CAR'] ** trend_exp
if exposure == 0:
premium = 0
else:
premium = weight * (claims_amt / exposure) * 12
return premium
In [12]:
premiums = gb.apply(calculate_premium)
In [14]:
# Combine step
# Find average premium PMPM
premiums.mean()
Out[14]:
In [ ]: