notebook.community

Edit and run



In [1]:

    
import pandas as pd



In [2]:

    
df = pd.read_csv('files/all_claims_files.csv')



In [4]:

    
#df.columns



In [5]:

    
df['year'] = df.file_name.map(lambda x: x[6:10])



In [6]:

    
df.year.value_counts()









    Out[6]:





2008    2000
2009    2000
2010    2000
Name: year, dtype: int64



In [7]:

    
# Re-index the Data Frame to have a two-dimensional index
df = df.set_index(['DESYNPUF_ID', 'year'])



In [9]:

    
# Split the Data Frame
gb = df.groupby(level=[0, 1])



In [10]:

    
# Define a trend assumption by service type
trend = {}
trend['IP'] = 1.06
trend['CAR'] = 1.03
trend['OP'] = 1.05



In [11]:

    
# The apply function is passed the group as a Data Frame.
# x.name is the index (a tuple if the index is more than one dimension)
def calculate_premium(x):
    member_name = x.name[0] # use the x.name attribute to access the first element of the index tuple
    year = x.name[1] # use the x.name attribute to access the second element of the index tuple
    member = x.loc[member_name] # use the loc method to access the member
    member_year = member.loc[year] # use the loc method to access the year
    # calculate the premium
    if year == '2008':
        weight = 0.1
        trend_exp = 2
    elif year == '2009':
        weight = 0.3
        trend_exp = 1
    else:
        weight = 0.6
        trend_exp = 0
    exposure = member_year['PLAN_CVRG_MOS_NUM']
    claims_amt = member_year['MEDREIMB_IP'] * trend['IP'] ** trend_exp + \
        member_year['MEDREIMB_OP'] * trend['OP'] ** trend_exp + \
        member_year['MEDREIMB_CAR'] * trend['CAR'] ** trend_exp
    if exposure == 0:
        premium = 0
    else:
        premium = weight * (claims_amt / exposure) * 12
    return premium



In [12]:

    
premiums = gb.apply(calculate_premium)



In [14]:

    
# Combine step
# Find average premium PMPM
premiums.mean()









    Out[14]:





1381.783704071277



In [ ]: