notebook.community

Edit and run



In [1]:

    
import pandas as pd



In [2]:

    
df = pd.read_csv('files/all_claims_files.csv') # read consolidated claims file



In [3]:

    
# Add column for the file year since each row is a unique patient/year combination
df['year'] = df.file_name.map(lambda x: int(x[6:10]))



In [4]:

    
all_cols = df.columns # all columns of the Data Frame



In [5]:

    
payment_cols = ['MEDREIMB_IP', 'BENRES_IP', 'PPPYMT_IP', 'MEDREIMB_OP', 'BENRES_OP',
       'PPPYMT_OP', 'MEDREIMB_CAR', 'BENRES_CAR', 'PPPYMT_CAR'] # Only select the payment-related columns



In [6]:

    
id_vars = ['DESYNPUF_ID', 'year'] # Use the DESYNPUF_ID/year as a unique key for the row
# Alternatively, just use the DESYNPUF_ID as a unique key for the patient
# Use list comprehension to select all non-payment related columns
# Alternative: id_vars = [x for x in all_cols if x not in payment_cols]



In [8]:

    
df.shape









    Out[8]:





(6000, 34)



In [7]:

    
df[id_vars].drop_duplicates().shape # Verify uniqueness









    Out[7]:





(6000, 2)



In [9]:

    
df.head()









    Out[9]:







  
    
      
      DESYNPUF_ID
      BENE_BIRTH_DT
      BENE_DEATH_DT
      BENE_SEX_IDENT_CD
      BENE_RACE_CD
      BENE_ESRD_IND
      SP_STATE_CODE
      BENE_COUNTY_CD
      BENE_HI_CVRAGE_TOT_MONS
      BENE_SMI_CVRAGE_TOT_MONS
      ...
      BENRES_IP
      PPPYMT_IP
      MEDREIMB_OP
      BENRES_OP
      PPPYMT_OP
      MEDREIMB_CAR
      BENRES_CAR
      PPPYMT_CAR
      file_name
      year
    
  
  
    
      0
      00013D2EFD8E45D1
      19230501
      NaN
      1
      1
      0
      26
      950
      12
      12
      ...
      0
      0
      50
      10
      0
      0
      0
      0
      DE1_0_2008_Beneficiary_Summary_File_Sample_1
      2008
    
    
      1
      00016F745862898F
      19430101
      NaN
      1
      1
      0
      39
      230
      12
      12
      ...
      0
      0
      0
      0
      0
      700
      240
      0
      DE1_0_2008_Beneficiary_Summary_File_Sample_1
      2008
    
    
      2
      0001FDD721E223DC
      19360901
      NaN
      2
      1
      0
      39
      280
      12
      12
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      DE1_0_2008_Beneficiary_Summary_File_Sample_1
      2008
    
    
      3
      00021CA6FF03E670
      19410601
      NaN
      1
      5
      0
      6
      290
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      DE1_0_2008_Beneficiary_Summary_File_Sample_1
      2008
    
    
      4
      00024B3D2352D2D0
      19360801
      NaN
      1
      1
      0
      52
      590
      12
      12
      ...
      0
      0
      30
      40
      0
      220
      80
      0
      DE1_0_2008_Beneficiary_Summary_File_Sample_1
      2008
    
  

5 rows × 34 columns



In [10]:

    
df_mlt = pd.melt(df, id_vars=id_vars, value_vars=payment_cols) # Melt the data!



In [12]:

    
df_mlt.shape









    Out[12]:





(54000, 4)



In [13]:

    
# Split variable into two parts
df_mlt['payer'] = df_mlt['variable'].map(lambda x: x.split('_')[0])
df_mlt['service'] = df_mlt['variable'].map(lambda x: x.split('_')[1])



In [14]:

    
# Now can easily pivot in different ways



In [17]:

    
# format to show percentages
# Find all available options here: https://pandas.pydata.org/pandas-docs/stable/generated/pandas.set_option.html
pd.set_option('display.float_format', '{:.2f}'.format)



In [18]:

    
pd.crosstab(df_mlt.year, df_mlt.service, values=df_mlt.value, aggfunc='sum', normalize=True)



In [19]:

    
pd.set_option('display.float_format', '${:,.2f}'.format)



In [20]:

    
df_mlt.pivot_table(index='year', columns='service', values='value', aggfunc='sum', margins=True).applymap(float)









    Out[20]:







  
    
      service
      CAR
      IP
      OP
      All
    
    
      year
      
      
      
      
    
  
  
    
      2008
      $3,231,130.00
      $5,799,978.00
      $1,956,540.00
      $10,987,648.00
    
    
      2009
      $3,627,740.00
      $5,633,954.00
      $2,318,570.00
      $11,580,264.00
    
    
      2010
      $2,238,090.00
      $2,631,430.00
      $1,223,250.00
      $6,092,770.00
    
    
      All
      $9,096,960.00
      $14,065,362.00
      $5,498,360.00
      $28,660,682.00



In [ ]:

service	CAR	IP	OP
year
2008	0.11	0.20	0.07
2009	0.13	0.20	0.08
2010	0.08	0.09	0.04

	DESYNPUF_ID	BENE_BIRTH_DT	BENE_DEATH_DT	BENE_SEX_IDENT_CD	BENE_RACE_CD	SP_STATE_CODE	BENE_COUNTY_CD	BENE_HI_CVRAGE_TOT_MONS	BENE_SMI_CVRAGE_TOT_MONS	...	MEDREIMB_OP	BENRES_OP	MEDREIMB_CAR	BENRES_CAR	file_name	year
0	00013D2EFD8E45D1	19230501	NaN	1	1	26	950	12	12	...	50	10	0	0	DE1_0_2008_Beneficiary_Summary_File_Sample_1	2008
1	00016F745862898F	19430101	NaN	1	1	39	230	12	12	...	0	0	700	240	DE1_0_2008_Beneficiary_Summary_File_Sample_1	2008
2	0001FDD721E223DC	19360901	NaN	2	1	39	280	12	12	...	0	0	0	0	DE1_0_2008_Beneficiary_Summary_File_Sample_1	2008
3	00021CA6FF03E670	19410601	NaN	1	5	6	290	0	0	...	0	0	0	0	DE1_0_2008_Beneficiary_Summary_File_Sample_1	2008
4	00024B3D2352D2D0	19360801	NaN	1	1	52	590	12	12	...	30	40	220	80	DE1_0_2008_Beneficiary_Summary_File_Sample_1	2008

service	CAR	IP	OP	All
year
2008	$3,231,130.00	$5,799,978.00	$1,956,540.00	$10,987,648.00
2009	$3,627,740.00	$5,633,954.00	$2,318,570.00	$11,580,264.00
2010	$2,238,090.00	$2,631,430.00	$1,223,250.00	$6,092,770.00
All	$9,096,960.00	$14,065,362.00	$5,498,360.00	$28,660,682.00