In [1]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import scale
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

import matplotlib.pyplot as plt

import seaborn as sns; sns.set()

In [2]:
df = pd.read_pickle('claims_df')

In [3]:
pca_df = df[['SP_ALZHDMTA','SP_CHF', 'SP_CHRNKIDN', 'SP_CNCR', 'SP_COPD', 'SP_DEPRESSN','SP_DIABETES', 'SP_ISCHMCHT', 'SP_OSTEOPRS', 'SP_RA_OA', 'SP_STRKETIA']]

In [4]:
pca_vals = pd.DataFrame(scale(pca_df))

In [5]:
pca_vals.columns = pca_df.columns

In [6]:
pca = PCA()

In [7]:
pca.fit(pca_vals)


Out[7]:
PCA(copy=True, iterated_power='auto', n_components=None, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)

In [8]:
plt.plot(np.cumsum(pca.explained_variance_ratio_));



In [9]:
sns.heatmap(pca.components_, annot=True);


Try different method


In [10]:
lda = LinearDiscriminantAnalysis()

In [11]:
lda.fit(pca_vals, df.TOTAL_PAID)


Out[11]:
LinearDiscriminantAnalysis(n_components=None, priors=None, shrinkage=None,
              solver='svd', store_covariance=False, tol=0.0001)

In [12]:
plt.plot(np.cumsum(lda.explained_variance_ratio_));


Look at payment columns


In [13]:
pmt = df[['MEDREIMB_IP', 'BENRES_IP', 'PPPYMT_IP', 'MEDREIMB_OP', 'BENRES_OP',
       'PPPYMT_OP', 'MEDREIMB_CAR', 'BENRES_CAR', 'PPPYMT_CAR']]

In [14]:
pmt_norm = pd.DataFrame(scale(pmt))

In [15]:
pmt_norm.columns = pmt.columns

In [16]:
#pca_pmt = PCA(n_components=5)
pca_pmt = PCA()

In [17]:
pca_pmt.fit(pmt_norm)


Out[17]:
PCA(copy=True, iterated_power='auto', n_components=None, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)

In [18]:
pd.DataFrame(pca_pmt.transform(pmt_norm))


Out[18]:
0 1 2 3 4 5 6 7 8
0 -1.454393 -0.026290 -0.435435 0.003521 -0.030699 0.306334 -0.048370 -0.014291 0.013755
1 -1.005317 -0.004915 -0.062115 -0.009197 0.025482 -0.087318 -0.056058 -0.013069 -0.053407
2 -1.289040 0.001058 -0.328708 -0.006301 -0.010279 0.190308 -0.050272 0.023283 -0.015365
3 0.868003 0.475059 0.849093 -0.165368 0.257797 -1.137621 -0.068813 -0.055916 -0.003474
4 -0.626859 0.110672 0.472050 0.034377 0.016455 0.435550 -0.042275 0.104021 -0.136976
5 -0.431140 0.042438 0.370587 -0.032695 0.098685 -0.553368 -0.062282 -0.021333 0.238189
6 -1.020830 0.006602 0.050803 0.018786 0.002091 0.207689 -0.050389 0.002872 -0.204887
7 2.242458 1.357826 0.657125 -0.412399 0.422868 -1.120638 -0.061414 -0.872239 -0.518744
8 7.914219 1.119525 7.737192 0.401429 -0.449528 7.375464 -0.030329 1.894723 -0.414372
9 -0.927689 0.030590 -0.054103 -0.020420 0.035528 -0.104869 -0.054964 -0.024833 0.081399
10 3.963378 0.983276 1.068174 -0.383002 0.003874 0.796459 0.537263 -0.301588 0.288215
11 17.424730 9.965999 1.904774 9.741459 0.037796 -5.522384 0.098970 -1.237285 -0.122496
12 -0.856474 0.007312 0.050472 -0.015217 0.044415 -0.208410 -0.057751 -0.015109 0.011458
13 0.658964 -2.520879 -1.123663 0.081196 -0.951111 0.394906 -1.546589 0.014998 -0.154474
14 -0.536959 0.163491 0.085930 -0.061306 0.083311 -0.276345 -0.056510 0.022230 0.025770
15 -0.605462 0.314387 -0.225066 -0.098010 0.073646 0.014310 -0.050177 -0.289259 -0.050299
16 -0.999831 -0.001585 0.362441 0.080053 -0.036331 0.708855 -0.038758 0.012508 -0.143900
17 2.276251 -1.223544 1.616650 0.025175 -0.276388 -1.138552 -0.019403 -0.033475 0.253952
18 -1.408754 -0.025072 -0.396799 0.002443 -0.024882 0.265275 -0.049133 -0.031963 0.022421
19 -0.360921 0.180312 0.210423 -0.069568 0.106273 -0.413475 -0.057662 -0.028236 0.224481
20 -0.141289 0.514612 -0.121319 -0.157390 0.129328 -0.128628 -0.050529 -0.095726 -0.222095
21 -1.166997 0.073242 -0.335954 -0.027229 0.004523 0.183385 -0.049155 0.036769 0.003618
22 -0.122650 1.096541 -1.041608 -0.313345 0.126053 0.727819 -0.020173 0.827626 0.127574
23 1.460097 2.487527 -1.852960 -0.701985 0.311494 1.309800 0.012634 1.446155 -0.014360
24 -0.946799 -0.881295 -0.785667 0.020951 -0.378187 0.442486 0.737853 -0.004200 -0.003864
25 1.138475 1.140884 0.008846 -0.340921 0.286182 -0.397021 -0.046786 -0.427804 -0.140142
26 0.881727 0.147998 1.383569 -0.082421 0.263625 -1.634687 -0.081043 -0.034451 0.299586
27 -0.650213 0.400377 -0.400972 -0.119327 0.066742 0.183415 -0.046020 -0.215735 -0.135525
28 -1.131351 -0.015590 -0.154651 -0.003624 0.009161 0.013357 -0.055160 -0.010657 -0.180092
29 -0.083928 0.124867 0.563577 -0.057118 0.140020 -0.766621 -0.068641 -0.103702 -0.229382
... ... ... ... ... ... ... ... ... ...
4670 0.722817 0.140690 1.301923 -0.067203 0.236319 -1.408897 -0.077394 -0.051549 0.184715
4671 -0.405354 -0.041436 1.478822 0.190295 -0.036741 1.063220 -0.029597 -0.034644 -0.143580
4672 0.773082 -0.302660 -0.145383 -0.111216 -0.269549 0.563908 0.739010 -0.401090 -0.146526
4673 0.881398 -0.270720 -0.450183 -0.189903 -0.208298 -0.100260 0.717823 -1.066264 -0.157351
4674 -0.625633 0.033397 0.210811 -0.026788 0.074055 -0.384007 -0.059543 -0.031593 0.189261
4675 -1.093007 -0.029341 0.130389 0.048188 -0.019568 0.403645 -0.045205 -0.023287 0.008093
4676 -0.520300 0.046162 0.290463 -0.030911 0.086556 -0.467648 -0.062165 -0.040412 0.019113
4677 -1.145866 -0.015556 -0.176292 -0.004846 0.008417 0.032254 -0.052949 -0.013067 0.087052
4678 -1.209663 -0.001543 -0.254954 -0.007023 0.000011 0.112352 -0.051693 -0.026150 0.022664
4679 -0.760366 0.072071 0.021939 -0.035287 0.057264 -0.196192 -0.054451 -0.035325 0.322784
4680 -1.397344 -0.013210 -0.405706 -0.000828 -0.023554 0.272240 -0.048822 -0.030642 0.021823
4681 -0.923983 0.001584 0.000995 -0.012217 0.035664 -0.154543 -0.057288 -0.013795 -0.058692
4682 -1.380430 0.049967 -0.495409 -0.017043 -0.021762 0.351223 -0.046881 -0.167372 0.008268
4683 -1.240795 -0.023094 -0.250366 -0.001391 -0.003423 0.110991 -0.051434 -0.012322 0.103868
4684 -0.577418 0.212426 -0.035844 -0.073279 0.078407 -0.162642 -0.053570 -0.139677 0.101944
4685 -0.633305 0.076438 0.134987 -0.037133 0.072523 -0.313564 -0.058390 -0.112502 0.104284
4686 0.007788 0.076730 0.717948 -0.047845 0.152931 -0.920661 -0.070223 -0.023591 0.035750
4687 -0.192206 0.060342 0.566316 -0.039815 0.127528 -0.757721 -0.067879 -0.020937 -0.042354
4688 0.327672 0.939735 -0.386134 -0.281593 0.185083 0.071610 -0.033342 0.876714 0.356853
4689 -0.895801 0.014698 -0.000116 -0.016880 0.039769 -0.158487 -0.056160 -0.034412 0.095581
4690 -0.957402 -0.042653 0.507000 0.098931 -0.036588 0.700124 -0.037908 -0.023018 0.065775
4691 -1.332369 -0.008278 -0.353029 -0.002858 -0.015654 0.217034 -0.050239 -0.030669 -0.040371
4692 -0.274872 -0.947695 -0.361436 0.003417 -0.348952 -0.036439 0.716385 -0.006497 -0.252809
4693 -0.728136 0.017180 0.153421 -0.019412 0.060133 -0.316722 -0.060339 -0.015430 -0.083249
4694 0.782202 -0.922884 0.371903 -0.036700 -0.241132 -0.844289 0.625493 -0.022154 0.454367
4695 -1.119011 0.009417 -0.193104 -0.011736 0.011637 0.044428 -0.052386 -0.033951 0.090105
4696 -1.433186 -0.038965 -0.395313 0.006489 -0.027956 0.267125 -0.049355 -0.009543 0.005157
4697 -1.431803 -0.038701 -0.395578 0.006211 -0.027644 0.266871 -0.049120 -0.009883 0.039378
4698 0.027735 0.457494 0.117635 -0.147239 0.152071 -0.369028 -0.053718 0.011826 0.057044
4699 -0.483415 0.094434 0.234805 -0.045917 0.092143 -0.423750 -0.058251 -0.038294 0.356845

4700 rows × 9 columns


In [19]:
pmt_comp = pca_pmt.components_

In [20]:
pmt_comp_df = pd.DataFrame(pmt_comp)

In [21]:
pmt_comp_df.columns = pmt_norm.columns

In [22]:
pmt_comp_df


Out[22]:
MEDREIMB_IP BENRES_IP PPPYMT_IP MEDREIMB_OP BENRES_OP PPPYMT_OP MEDREIMB_CAR BENRES_CAR PPPYMT_CAR
0 0.337365 0.330365 0.072971 0.397051 0.398096 0.071513 0.474030 0.458370 0.125170
1 -0.523746 -0.533816 -0.255851 0.409824 0.416485 0.172998 0.041234 0.033643 -0.033103
2 -0.233046 -0.228977 -0.099791 -0.320349 -0.301583 -0.105905 0.338619 0.381598 0.646707
3 0.024607 0.005583 0.199892 -0.110852 -0.116759 0.959683 -0.022558 -0.012622 0.108746
4 -0.198783 -0.222727 0.931866 0.047743 0.045023 -0.168346 0.062359 0.054700 -0.054552
5 0.120723 0.087264 0.102235 0.241168 0.244088 -0.065327 -0.372389 -0.396095 0.741475
6 -0.709089 0.704299 0.019685 0.008366 0.012965 0.010450 -0.001567 -0.011910 0.016629
7 0.007123 0.002564 0.004907 -0.705812 0.708166 0.004286 -0.011378 0.002255 -0.009824
8 -0.003529 -0.013105 -0.007926 -0.028300 -0.014245 0.003778 0.718143 -0.694952 0.006865

In [23]:
plt.plot(np.cumsum(pca_pmt.explained_variance_));



In [24]:
sns.heatmap(pmt_comp, annot=True);



In [ ]: