In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('files/all_claims_files.csv')

In [4]:
#df.columns

In [5]:
df['year'] = df.file_name.map(lambda x: x[6:10])

In [6]:
df.year.value_counts()


Out[6]:
2008    2000
2009    2000
2010    2000
Name: year, dtype: int64

In [7]:
# Re-index the Data Frame to have a two-dimensional index
df = df.set_index(['DESYNPUF_ID', 'year'])

In [9]:
# Split the Data Frame
gb = df.groupby(level=[0, 1])

In [10]:
# Define a trend assumption by service type
trend = {}
trend['IP'] = 1.06
trend['CAR'] = 1.03
trend['OP'] = 1.05

In [11]:
# The apply function is passed the group as a Data Frame.
# x.name is the index (a tuple if the index is more than one dimension)
def calculate_premium(x):
    member_name = x.name[0] # use the x.name attribute to access the first element of the index tuple
    year = x.name[1] # use the x.name attribute to access the second element of the index tuple
    member = x.loc[member_name] # use the loc method to access the member
    member_year = member.loc[year] # use the loc method to access the year
    # calculate the premium
    if year == '2008':
        weight = 0.1
        trend_exp = 2
    elif year == '2009':
        weight = 0.3
        trend_exp = 1
    else:
        weight = 0.6
        trend_exp = 0
    exposure = member_year['PLAN_CVRG_MOS_NUM']
    claims_amt = member_year['MEDREIMB_IP'] * trend['IP'] ** trend_exp + \
        member_year['MEDREIMB_OP'] * trend['OP'] ** trend_exp + \
        member_year['MEDREIMB_CAR'] * trend['CAR'] ** trend_exp
    if exposure == 0:
        premium = 0
    else:
        premium = weight * (claims_amt / exposure) * 12
    return premium

In [12]:
premiums = gb.apply(calculate_premium)

In [14]:
# Combine step
# Find average premium PMPM
premiums.mean()


Out[14]:
1381.783704071277

In [ ]: