In [13]:
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA, FactorAnalysis, FastICA
from sklearn.preprocessing import scale
In [14]:
def splitXY(dfXY):
lbls = ['ReactorType', 'CoolingTime', 'Enrichment', 'Burnup', 'OrigenReactor']
dfX = dfXY.drop(lbls, axis=1)
if 'total' in dfX.columns:
dfX.drop('total', axis=1, inplace=True)
r_dfY = dfXY.loc[:, lbls[0]]
c_dfY = dfXY.loc[:, lbls[1]]
e_dfY = dfXY.loc[:, lbls[2]]
b_dfY = dfXY.loc[:, lbls[3]]
return dfX, r_dfY, c_dfY, e_dfY, b_dfY
CV = 5
trainset = '../pkl_trainsets/2jul2018/22jul2018_trainset3_nucs_fissact_not-scaled.pkl'
trainXY = pd.read_pickle(trainset)
#trainXY = trainXY.sample(frac=0.5)
X, rY, cY, eY, bY = splitXY(trainXY)
trainX = pd.DataFrame(scale(X), columns=X.columns)
In [15]:
pca = PCA(n_components=3, whiten=True)
pca.fit_transform(trainX)
Out[15]:
In [23]:
pca_components = pd.DataFrame(pca.components_.T, columns=['PC-1', 'PC-2', 'PC-3'], index=trainX.columns)
pca_components
Out[23]:
In [20]:
fa = FactorAnalysis(n_components=3)
fa.fit_transform(trainX)
Out[20]:
In [24]:
fa_components = pd.DataFrame(fa.components_.T, columns=['PC-1', 'PC-2', 'PC-3'], index=trainX.columns)
fa_components
Out[24]:
In [17]:
ica = FastICA(n_components=3, whiten=True)
ica.fit_transform(trainX)
Out[17]:
In [43]:
ica_components = pd.DataFrame(ica.components_.T, columns=['PC-1', 'PC-2', 'PC-3'], index=trainX.columns)
ica_components
Out[43]:
In [40]:
nucs = trainX.columns.tolist()
upu = ['u234', 'u235', 'u236', 'u238', 'pu239', 'pu240', 'pu241', 'pu242']
In [41]:
trainX_upu = trainX.filter(upu, axis=1)
trainX_upu.head()
Out[41]:
In [ ]: