In [2]:
%matplotlib inline
In [1]:
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.preprocessing import StandardScaler
import seaborn
In [4]:
#import data
df = pd.read_excel('All_Isotopes_Generated.xlsx', 'Sheet1')
needed_isotopes = ['Reactor', 'Enrichment', 'u234', 'u235', 'u236', 'u238',
'pu238', 'pu239', 'pu240', 'pu241', 'pu242']
df = df[needed_isotopes]
# split data table into data X and class labels y
x = df.ix[:,2:].values
y = df.ix[:,0:2].values
In [5]:
x_std = StandardScaler().fit_transform(x)
In [6]:
mean_vec = np.mean(x_std, axis=0)
cov_mat = (x_std - mean_vec).T.dot((x_std - mean_vec)) / (x_std.shape[0]-1)
print('Covariance matrix \n%s' %cov_mat)
In [7]:
#eigendecomposition on the matrix
cov_mat = np.cov(x_std.T)
eig_vals, eig_vecs = np.linalg.eig(cov_mat)
print('Eigenvectors \n%s' %eig_vecs)
print('\nEigenvalues \n%s' %eig_vals)
In [8]:
#Eigendecomposition of the standardized data based on the correlation matrix
cor_mat1 = np.corrcoef(x_std.T)
eig_vals, eig_vecs = np.linalg.eig(cor_mat1)
print('Eigenvectors \n%s' %eig_vecs)
print('\nEigenvalues \n%s' %eig_vals)
In [9]:
#Eigendecomposition of the raw data based on the correlation matrix
cor_mat2 = np.corrcoef(x.T)
eig_vals, eig_vecs = np.linalg.eig(cor_mat2)
print('Eigenvectors \n%s' %eig_vecs)
print('\nEigenvalues \n%s' %eig_vals)
In [10]:
u,s,v = np.linalg.svd(x_std.T)
u
Out[10]:
In [11]:
#sorting eigenpairs
for ev in eig_vecs:
np.testing.assert_array_almost_equal(1.0, np.linalg.norm(ev))
print('Everything ok!')
In [12]:
# Make a list of (eigenvalue, eigenvector) tuples
eig_pairs = [(np.abs(eig_vals[i]), eig_vecs[:,i]) for i in range(len(eig_vals))]
# Sort the (eigenvalue, eigenvector) tuples from high to low
eig_pairs.sort()
eig_pairs.reverse()
# Visually confirm that the list is correctly sorted by decreasing eigenvalues
print('Eigenvalues in descending order:')
for i in eig_pairs:
print(i[0])
In [13]:
tot = sum(eig_vals)
var_exp = [(i / tot)*100 for i in sorted(eig_vals, reverse=True)]
cum_var_exp = np.cumsum(var_exp)
print cum_var_exp
trace1 = plt.bar(
left=[i for i in range(1,10)],
height=var_exp)
trace2 = plt.plot(
[i for i in range(1,10)],
cum_var_exp,
label='cumulative explained variance')
plt.ylim(0,100)
plt.show()
In [14]:
matrix_w = np.hstack((eig_pairs[0][1].reshape(9,1),
eig_pairs[1][1].reshape(9,1)))
print('Matrix W:\n', matrix_w)
In [18]:
Y = x_std.dot(matrix_w)
In [19]:
len(Y)
Out[19]:
In [71]:
traces = []
for name in ('BWR', 'VVER', 'RBMK'):
trace = plt.scatter(
Y[y==name,0],
Y[y==name,1],
mode='markers',
label=name,
)
traces.append(trace)
data = Data(traces)
layout = Layout(showlegend=True,
scene=Scene(xaxis=XAxis(title='PC1'),
yaxis=YAxis(title='PC2'),))
fig = Figure(data=data, layout=layout)
py.iplot(fig)
In [ ]:
In [ ]:
In [30]:
#import data
df = pd.read_excel('All_Isotopes_Generated.xlsx', 'Sheet1')
needed_isotopes = ['Reactor', 'Enrichment', 'u234', 'u235', 'u236', 'u238',
'pu238', 'pu239', 'pu240', 'pu241', 'pu242']
df = df[needed_isotopes]
# split data table into data X and class labels y
x = df.ix[:,2:].values
reactor = df.ix[:,0].values
enrichment = df.ix[:,1].values
x_std = StandardScaler().fit_transform(x)
#x_std = x
pca_model = PCA(n_components=3)
result = pca_model.fit_transform(x_std)
In [16]:
df_result = pd.DataFrame(result, columns=['PC1', 'PC2', 'PC3'])
df_result['Reactor'] = reactor
df_result['Enrichment'] = enrichment
df_result = df_result[df_result.columns[-2:].append(df_result.columns[:-2])]
In [29]:
df_result
Out[29]:
In [17]:
df_result.to_csv('gernerated_PCA_data.csv')
In [28]:
df_result.plot(kind='scatter', x='PC1', y='PC2')
Out[28]:
In [26]:
df_result.plot(kind='scatter', x='PC1', y='PC3')
Out[26]:
In [27]:
df_result.plot(kind='scatter', x='PC2', y='PC3')
Out[27]:
In [7]:
df = pd.read_csv('marakova-table-7-rbmk-spent-fuel-wo-uncertainty.csv')
In [16]:
df
Out[16]:
In [23]:
pd.DataFrame(df.iloc[-1,:]).T
Out[23]:
In [ ]: