In [13]:
import pandas as pd
import numpy as np

from sklearn.decomposition import PCA, FactorAnalysis, FastICA
from sklearn.preprocessing import scale

In [14]:
def splitXY(dfXY):
    lbls = ['ReactorType', 'CoolingTime', 'Enrichment', 'Burnup', 'OrigenReactor']
    dfX = dfXY.drop(lbls, axis=1)
    if 'total' in dfX.columns:
        dfX.drop('total', axis=1, inplace=True)
    r_dfY = dfXY.loc[:, lbls[0]]
    c_dfY = dfXY.loc[:, lbls[1]]
    e_dfY = dfXY.loc[:, lbls[2]]
    b_dfY = dfXY.loc[:, lbls[3]]
    return dfX, r_dfY, c_dfY, e_dfY, b_dfY

CV = 5
trainset = '../pkl_trainsets/2jul2018/22jul2018_trainset3_nucs_fissact_not-scaled.pkl'
trainXY = pd.read_pickle(trainset)
#trainXY = trainXY.sample(frac=0.5)
X, rY, cY, eY, bY = splitXY(trainXY)
trainX = pd.DataFrame(scale(X), columns=X.columns)

PCA


In [15]:
pca = PCA(n_components=3, whiten=True)
pca.fit_transform(trainX)


Out[15]:
array([[-1.52268789, -0.12209472,  1.11506217],
       [-1.41352098, -0.15883125,  0.91789136],
       [-1.4129673 , -0.1594127 ,  0.91545403],
       ...,
       [-0.77566032,  6.25175067, -1.45741589],
       [-0.76512761,  5.04317696, -1.03613692],
       [-0.77184825,  5.03730189, -1.03431876]])

In [23]:
pca_components = pd.DataFrame(pca.components_.T, columns=['PC-1', 'PC-2', 'PC-3'], index=trainX.columns)
pca_components


Out[23]:
PC-1 PC-2 PC-3
ba138 0.163513 0.021385 0.030964
ce140 0.163368 0.023822 0.039556
ce142 0.163646 0.020039 0.027590
ce144 -0.025167 0.548237 -0.147559
cs133 0.163737 0.007470 -0.003976
cs135 0.024738 -0.045275 -0.350779
cs137 0.161863 0.018446 0.018126
la139 0.163683 0.018771 0.020935
mo100 0.163399 0.021360 0.030769
mo95 0.162908 0.016343 0.020033
mo97 0.163664 0.019289 0.025321
mo98 0.163435 0.021356 0.030823
nd143 0.147896 -0.051646 -0.257466
nd144 0.158064 0.046849 0.136080
nd145 0.163654 0.006336 -0.014421
nd146 0.162229 0.030859 0.060957
pd104 0.020094 0.035238 0.192831
pd105 0.161038 0.007103 0.026505
pd106 0.105777 0.021821 -0.073689
pr141 0.163655 0.018972 0.020654
pu239 0.105729 -0.172523 -0.446679
pu240 0.145501 -0.085304 -0.121522
pu241 0.116994 -0.099343 -0.411869
pu242 0.136422 0.046338 0.188969
rb87 -0.027002 0.566734 -0.147277
rh103 0.161266 -0.017441 -0.081859
ru101 0.163563 0.019267 0.027619
ru102 0.162250 0.026424 0.040135
ru104 0.161058 0.028362 0.047801
sr88 0.163354 0.012852 0.009751
sr90 0.160497 0.007654 -0.011538
tc99 0.163851 0.011177 0.005064
te130 0.014100 0.028215 0.363396
u234 -0.034567 0.551022 -0.113722
u235 -0.156797 0.002257 0.024997
u236 0.145495 -0.045561 -0.129297
u238 -0.160599 -0.008609 0.003417
xe131 0.071933 0.051452 0.301876
xe132 0.161631 0.031278 0.057135
xe134 0.163452 0.021487 0.031337
xe136 0.162921 0.026227 0.055340
y89 0.163162 0.013539 0.012324
zr91 0.163333 0.014876 0.015769
zr92 0.163710 0.015260 0.015633
zr93 0.163778 0.015744 0.017226
zr94 0.163774 0.018042 0.022257
zr96 0.163734 0.018534 0.023325

Factor


In [20]:
fa = FactorAnalysis(n_components=3)
fa.fit_transform(trainX)


Out[20]:
array([[-1.27234134,  2.03591202, -0.48497343],
       [-1.20059417,  1.57481272, -0.19596514],
       [-1.20038941,  1.57161224, -0.20181028],
       ...,
       [-0.76374927, -0.7028066 ,  1.27830758],
       [-0.7637493 , -0.70280553,  1.27830621],
       [-0.76374937, -0.70280447,  1.27827978]])

In [24]:
fa_components = pd.DataFrame(fa.components_.T, columns=['PC-1', 'PC-2', 'PC-3'], index=trainX.columns)
fa_components


Out[24]:
PC-1 PC-2 PC-3
ba138 0.997423 -0.067541 0.024179
ce140 0.997428 -0.059988 0.036402
ce142 0.996282 -0.082523 0.024584
ce144 -0.133783 0.029595 0.097136
cs133 0.984982 -0.171567 0.000495
cs135 0.156359 0.025952 -0.217190
cs137 0.987030 -0.068390 0.012322
la139 0.996522 -0.082218 0.012910
mo100 0.998347 -0.054510 0.018220
mo95 0.984530 -0.132683 0.019981
mo97 0.996427 -0.081959 0.020380
mo98 0.998089 -0.058246 0.020611
nd143 0.853898 -0.409836 -0.287960
nd144 0.979123 0.045750 0.132947
nd145 0.983038 -0.183280 -0.006728
nd146 0.998726 0.002308 0.050240
pd104 0.127154 0.043328 0.220506
pd105 0.996947 0.052002 -0.046833
pd106 0.670033 0.182335 -0.081332
pr141 0.995438 -0.087442 0.013795
pu239 0.585427 -0.405410 -0.484601
pu240 0.822687 -0.522308 -0.021860
pu241 0.687171 -0.204566 -0.583436
pu242 0.849167 0.085791 0.062656
rb87 -0.146132 0.017295 0.118389
rh103 0.957617 -0.252542 -0.106618
ru101 0.997454 -0.068874 0.016231
ru102 0.999838 0.011762 0.009487
ru104 0.998346 0.057497 -0.000532
sr88 0.980351 -0.192587 0.042672
sr90 0.958687 -0.223668 0.033317
tc99 0.988952 -0.147245 0.007250
te130 0.082696 -0.044295 0.269566
u234 -0.182082 0.101413 0.082486
u235 -0.913213 0.400478 -0.074883
u236 0.819925 -0.568565 -0.018996
u238 -0.994143 -0.058018 0.077050
xe131 0.414272 -0.264094 0.392367
xe132 0.999029 0.033637 0.027981
xe134 0.997934 -0.060455 0.021697
xe136 0.997519 -0.039334 0.048955
y89 0.978789 -0.192464 0.041898
zr91 0.982671 -0.170028 0.036798
zr92 0.987712 -0.152225 0.035371
zr93 0.989886 -0.138369 0.031237
zr94 0.993101 -0.113437 0.029682
zr96 0.995232 -0.094695 0.023322

ICA


In [17]:
ica = FastICA(n_components=3, whiten=True)
ica.fit_transform(trainX)


Out[17]:
array([[-0.00028448, -0.00442905,  0.01116088],
       [-0.000409  , -0.00334068,  0.01021062],
       [-0.00041041, -0.00332593,  0.01020382],
       ...,
       [ 0.04094845,  0.00120453, -0.00284122],
       [ 0.03303206,  0.00032461, -0.00121934],
       [ 0.0330016 ,  0.0003312 , -0.0011706 ]])

In [43]:
ica_components = pd.DataFrame(ica.components_.T, columns=['PC-1', 'PC-2', 'PC-3'], index=trainX.columns)
ica_components


Out[43]:
PC-1 PC-2 PC-3
ba138 2.243827e-05 -0.000201 -0.000136
ce140 2.401282e-05 -0.000243 -0.000126
ce142 2.033732e-05 -0.000184 -0.000139
ce144 2.205198e-03 0.000230 -0.000440
cs133 9.879319e-07 -0.000025 -0.000172
cs135 1.368016e-04 0.001677 -0.000439
cs137 2.308598e-05 -0.000138 -0.000148
la139 2.146798e-05 -0.000152 -0.000147
mo100 2.253681e-05 -0.000200 -0.000136
mo95 1.325534e-05 -0.000145 -0.000146
mo97 1.952076e-05 -0.000173 -0.000142
mo98 2.246848e-05 -0.000200 -0.000136
nd143 6.550491e-06 0.001217 -0.000442
nd144 2.597294e-05 -0.000714 -0.000012
nd145 6.024023e-06 0.000025 -0.000185
nd146 3.172883e-05 -0.000349 -0.000102
pd104 -4.246858e-05 -0.000939 0.000202
pd105 -2.703081e-05 -0.000167 -0.000131
pd106 1.281013e-04 0.000303 -0.000209
pr141 2.248493e-05 -0.000151 -0.000147
pu239 -2.732661e-04 0.002217 -0.000574
pu240 -2.410660e-04 0.000609 -0.000253
pu241 -3.018572e-05 0.001989 -0.000579
pu242 -1.898408e-05 -0.000957 0.000076
rb87 2.275101e-03 0.000213 -0.000448
rh103 -2.322141e-05 0.000362 -0.000254
ru101 1.741092e-05 -0.000183 -0.000139
ru102 3.352723e-05 -0.000248 -0.000126
ru104 3.423812e-05 -0.000285 -0.000116
sr88 9.150367e-06 -0.000094 -0.000158
sr90 9.023796e-06 0.000011 -0.000179
tc99 6.908342e-06 -0.000071 -0.000163
te130 -2.197142e-04 -0.001731 0.000424
u234 2.187349e-03 0.000071 -0.000390
u235 1.570194e-05 -0.000083 0.000186
u236 -8.414868e-05 0.000612 -0.000283
u238 -5.376216e-06 0.000028 0.000169
xe131 -8.810092e-05 -0.001476 0.000278
xe132 3.682731e-05 -0.000332 -0.000106
xe134 2.250331e-05 -0.000203 -0.000135
xe136 1.911765e-05 -0.000319 -0.000107
y89 9.486493e-06 -0.000107 -0.000154
zr91 1.143523e-05 -0.000124 -0.000151
zr92 1.293495e-05 -0.000124 -0.000152
zr93 1.332772e-05 -0.000132 -0.000150
zr94 1.752376e-05 -0.000157 -0.000145
zr96 1.843850e-05 -0.000163 -0.000144

U/Pu only


In [40]:
nucs = trainX.columns.tolist()
upu = ['u234', 'u235', 'u236', 'u238', 'pu239', 'pu240', 'pu241', 'pu242']

In [41]:
trainX_upu = trainX.filter(upu, axis=1)
trainX_upu.head()


Out[41]:
u234 u235 u236 u238 pu239 pu240 pu241 pu242
0 -0.177572 2.017822 -2.286353 1.208731 -2.619126 -1.971166 -1.09989 -0.780072
1 -0.177572 1.743173 -1.950815 1.164032 -2.151137 -1.915436 -1.09989 -0.780036
2 -0.177572 1.743173 -1.950815 1.164032 -2.145345 -1.915436 -1.09989 -0.780036
3 -0.177572 1.743173 -1.950815 1.164032 -2.140904 -1.915436 -1.09989 -0.780036
4 -0.177572 1.743173 -1.950815 1.164032 -2.121598 -1.915436 -1.09989 -0.780036

In [ ]: