In [1]:
import pandas as pd
In [2]:
df = pd.read_csv('files/all_claims_files.csv')
In [3]:
# Filtering: Find all patients that are still alive
# Uses the boolean Series method of filtering
# Filters patients that have an empty date of death i.e. are alive
df = df[df['BENE_DEATH_DT'].isnull()]
In [4]:
# De-duplication: Find all unique combinations of 11 disease states
df.columns
Out[4]:
In [5]:
disease_cols = ['SP_ALZHDMTA',
'SP_CHF', 'SP_CHRNKIDN', 'SP_CNCR', 'SP_COPD', 'SP_DEPRESSN',
'SP_DIABETES', 'SP_ISCHMCHT', 'SP_OSTEOPRS', 'SP_RA_OA', 'SP_STRKETIA']
In [6]:
2 ** (len(disease_cols)) # Number of theoretically possible disease combinations
Out[6]:
In [7]:
df[disease_cols].drop_duplicates().shape # 684 different disease combinations!
Out[7]:
In [8]:
# Sorting: Add total Medicare payments and sort by payment
df['PMT_TOT'] = df['MEDREIMB_IP'] + df['MEDREIMB_OP'] + df['MEDREIMB_CAR']
In [9]:
df['PMT_TOT'].describe()
Out[9]:
In [10]:
df = df.sort_values(by='PMT_TOT', ascending=False)
In [12]:
df.pivot_table(index=disease_cols, values='PMT_TOT', aggfunc='mean')
Out[12]:
In [ ]: