In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('files/all_claims_files.csv')

In [3]:
# Filtering: Find all patients that are still alive
# Uses the boolean Series method of filtering
# Filters patients that have an empty date of death i.e. are alive
df = df[df['BENE_DEATH_DT'].isnull()]

In [4]:
# De-duplication: Find all unique combinations of 11 disease states
df.columns


Out[4]:
Index(['DESYNPUF_ID', 'BENE_BIRTH_DT', 'BENE_DEATH_DT', 'BENE_SEX_IDENT_CD',
       'BENE_RACE_CD', 'BENE_ESRD_IND', 'SP_STATE_CODE', 'BENE_COUNTY_CD',
       'BENE_HI_CVRAGE_TOT_MONS', 'BENE_SMI_CVRAGE_TOT_MONS',
       'BENE_HMO_CVRAGE_TOT_MONS', 'PLAN_CVRG_MOS_NUM', 'SP_ALZHDMTA',
       'SP_CHF', 'SP_CHRNKIDN', 'SP_CNCR', 'SP_COPD', 'SP_DEPRESSN',
       'SP_DIABETES', 'SP_ISCHMCHT', 'SP_OSTEOPRS', 'SP_RA_OA', 'SP_STRKETIA',
       'MEDREIMB_IP', 'BENRES_IP', 'PPPYMT_IP', 'MEDREIMB_OP', 'BENRES_OP',
       'PPPYMT_OP', 'MEDREIMB_CAR', 'BENRES_CAR', 'PPPYMT_CAR', 'file_name'],
      dtype='object')

In [5]:
disease_cols = ['SP_ALZHDMTA',
       'SP_CHF', 'SP_CHRNKIDN', 'SP_CNCR', 'SP_COPD', 'SP_DEPRESSN',
       'SP_DIABETES', 'SP_ISCHMCHT', 'SP_OSTEOPRS', 'SP_RA_OA', 'SP_STRKETIA']

In [6]:
2 ** (len(disease_cols)) # Number of theoretically possible disease combinations


Out[6]:
2048

In [7]:
df[disease_cols].drop_duplicates().shape # 684 different disease combinations!


Out[7]:
(684, 11)

In [8]:
# Sorting: Add total Medicare payments and sort by payment
df['PMT_TOT'] = df['MEDREIMB_IP'] + df['MEDREIMB_OP'] + df['MEDREIMB_CAR']

In [9]:
df['PMT_TOT'].describe()


Out[9]:
count      5903.000000
mean       3937.745214
std        9348.617070
min           0.000000
25%          90.000000
50%        1110.000000
75%        3330.000000
max      149810.000000
Name: PMT_TOT, dtype: float64

In [10]:
df = df.sort_values(by='PMT_TOT', ascending=False)

In [12]:
df.pivot_table(index=disease_cols, values='PMT_TOT', aggfunc='mean')


Out[12]:
PMT_TOT
SP_ALZHDMTA SP_CHF SP_CHRNKIDN SP_CNCR SP_COPD SP_DEPRESSN SP_DIABETES SP_ISCHMCHT SP_OSTEOPRS SP_RA_OA SP_STRKETIA
1 1 1 1 1 1 1 1 1 1 1 40640.000000
2 39370.000000
2 1 54453.333333
2 21310.000000
2 1 1 36255.000000
2 19440.000000
2 1 44115.000000
2 10672.500000
2 1 1 1 1 13100.000000
2 2 2 2630.000000
2 1 1 1 1 2 13580.000000
2 2 10240.000000
2 1 1 18865.000000
2 39420.000000
2 1 57810.000000
2 85460.000000
2 1 2 2 2 22950.000000
2 1 1 1 1 1 2 6572.000000
2 1 88780.000000
2 9162.000000
2 1 2 8620.000000
2 2 9613.333333
2 1 2 2 2510.000000
2 1 2 34900.000000
2 2 3600.000000
2 1 2 1 2 10530.000000
2 1 1 1 1 2 24165.000000
2 1 16160.000000
2 6150.000000
2 1 2 18570.000000
... ... ... ... ... ... ... ... ... ... ... ...
2 2 2 2 2 1 2 2 2 1 2 3234.000000
2 1 990.000000
2 1127.978723
2 1 1 1 1 1 10070.000000
2 7090.666667
2 1 1610.000000
2 1961.666667
2 1 1 2320.000000
2 4155.200000
2 1 2182.500000
2 2393.486239
2 1 1 2 2541.666667
2 1 1690.000000
2 2260.869565
2 1 2 2235.333333
2 1 840.000000
2 1752.923077
2 1 1 1 2 5412.000000
2 1 6620.000000
2 1425.609756
2 1 2 3746.060606
2 1 862.500000
2 1245.336538
2 1 1 2 1870.000000
2 1 1085.000000
2 1024.698795
2 1 1 7103.333333
2 1041.142857
2 1 2065.714286
2 248.166352

684 rows × 1 columns


In [ ]: