In [2]:
%matplotlib inline

In [1]:
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.preprocessing import StandardScaler
import seaborn


/Users/colingerber/anaconda/lib/python2.7/site-packages/pytz-2013b-py2.7.egg/pytz/__init__.py:35: UserWarning: Module argparse was already imported from /Users/colingerber/anaconda/lib/python2.7/argparse.pyc, but /Users/colingerber/anaconda/lib/python2.7/site-packages is being added to sys.path
  from pkg_resources import resource_stream

In [4]:
#import data
df = pd.read_excel('All_Isotopes_Generated.xlsx', 'Sheet1')

needed_isotopes = ['Reactor', 'Enrichment', 'u234', 'u235', 'u236', 'u238',
                   'pu238', 'pu239', 'pu240', 'pu241', 'pu242']
                   
df = df[needed_isotopes]

# split data table into data X and class labels y
x = df.ix[:,2:].values
y = df.ix[:,0:2].values

In [5]:
x_std = StandardScaler().fit_transform(x)

Covariance Matrix


In [6]:
mean_vec = np.mean(x_std, axis=0)
cov_mat = (x_std - mean_vec).T.dot((x_std - mean_vec)) / (x_std.shape[0]-1)
print('Covariance matrix \n%s' %cov_mat)


Covariance matrix 
[[ 1.00136054  0.92767294 -0.14885239 -0.11775597 -0.42614264 -0.12190458
  -0.58252194 -0.44362169 -0.55335077]
 [ 0.92767294  1.00136054 -0.49186034  0.18916481 -0.61973438 -0.33354484
  -0.80283918 -0.65478251 -0.70479229]
 [-0.14885239 -0.49186034  1.00136054 -0.89523878  0.80246902  0.75721837
   0.84568749  0.85166642  0.69320368]
 [-0.11775597  0.18916481 -0.89523878  1.00136054 -0.80977751 -0.71715002
  -0.71502266 -0.78270486 -0.69348626]
 [-0.42614264 -0.61973438  0.80246902 -0.80977751  1.00136054  0.60245834
   0.89680945  0.9019898   0.94348875]
 [-0.12190458 -0.33354484  0.75721837 -0.71715002  0.60245834  1.00136054
   0.67090778  0.83489857  0.44586445]
 [-0.58252194 -0.80283918  0.84568749 -0.71502266  0.89680945  0.67090778
   1.00136054  0.93928643  0.90174966]
 [-0.44362169 -0.65478251  0.85166642 -0.78270486  0.9019898   0.83489857
   0.93928643  1.00136054  0.81376063]
 [-0.55335077 -0.70479229  0.69320368 -0.69348626  0.94348875  0.44586445
   0.90174966  0.81376063  1.00136054]]

In [7]:
#eigendecomposition on the matrix
cov_mat = np.cov(x_std.T)

eig_vals, eig_vecs = np.linalg.eig(cov_mat)

print('Eigenvectors \n%s' %eig_vecs)
print('\nEigenvalues \n%s' %eig_vals)


Eigenvectors 
[[-0.20030876  0.63771992 -0.15332905 -0.13361956  0.08795281 -0.16025946
   0.19704803 -0.34133174 -0.56839392]
 [-0.29116559  0.48459491 -0.14902883  0.36369115  0.05936071 -0.14502893
  -0.01893453 -0.09476705  0.70143648]
 [ 0.3503192   0.25049007  0.05634367 -0.69679685 -0.24047999  0.14134595
  -0.18217282 -0.30491986  0.34908216]
 [-0.3139701  -0.43617018  0.23567336  0.04917234 -0.13647312 -0.13131746
   0.187426   -0.75855499  0.08255381]
 [ 0.37450749  0.03027429 -0.34050778  0.30490781 -0.65867462  0.07657612
   0.45616132 -0.04241849 -0.01773838]
 [ 0.2939705   0.28428875  0.70603595  0.33482561  0.13089802  0.41205683
   0.15174645 -0.10516784 -0.03396947]
 [ 0.38879072 -0.08404245 -0.00095515 -0.16326499  0.49201034 -0.43468783
   0.58843443  0.02957044  0.19226309]
 [ 0.3842011   0.05973259  0.1992037   0.25971811 -0.16573919 -0.67280028
  -0.48505122 -0.101921   -0.12607005]
 [ 0.35826668 -0.11455848 -0.49137433  0.25048928  0.43789823  0.31431211
  -0.28259196 -0.42662413 -0.04375386]]

Eigenvalues 
[  6.37245171e+00   1.78840329e+00   5.53078250e-01   2.09220561e-01
   4.94126408e-02   3.55548154e-02   3.47517099e-03   1.11444849e-04
   5.37017734e-04]

Correlation Matrix


In [8]:
#Eigendecomposition of the standardized data based on the correlation matrix
cor_mat1 = np.corrcoef(x_std.T)

eig_vals, eig_vecs = np.linalg.eig(cor_mat1)

print('Eigenvectors \n%s' %eig_vecs)
print('\nEigenvalues \n%s' %eig_vals)


Eigenvectors 
[[-0.20030876  0.63771992 -0.15332905 -0.13361956  0.08795281 -0.16025946
   0.19704803  0.34133174 -0.56839392]
 [-0.29116559  0.48459491 -0.14902883  0.36369115  0.05936071 -0.14502893
  -0.01893453  0.09476705  0.70143648]
 [ 0.3503192   0.25049007  0.05634367 -0.69679685 -0.24047999  0.14134595
  -0.18217282  0.30491986  0.34908216]
 [-0.3139701  -0.43617018  0.23567336  0.04917234 -0.13647312 -0.13131746
   0.187426    0.75855499  0.08255381]
 [ 0.37450749  0.03027429 -0.34050778  0.30490781 -0.65867462  0.07657612
   0.45616132  0.04241849 -0.01773838]
 [ 0.2939705   0.28428875  0.70603595  0.33482561  0.13089802  0.41205683
   0.15174645  0.10516784 -0.03396947]
 [ 0.38879072 -0.08404245 -0.00095515 -0.16326499  0.49201034 -0.43468783
   0.58843443 -0.02957044  0.19226309]
 [ 0.3842011   0.05973259  0.1992037   0.25971811 -0.16573919 -0.67280028
  -0.48505122  0.101921   -0.12607005]
 [ 0.35826668 -0.11455848 -0.49137433  0.25048928  0.43789823  0.31431211
  -0.28259196  0.42662413 -0.04375386]]

Eigenvalues 
[  6.36379348e+00   1.78597340e+00   5.52326785e-01   2.08936294e-01
   4.93455041e-02   3.55065072e-02   3.47044929e-03   1.11293429e-04
   5.36288090e-04]

In [9]:
#Eigendecomposition of the raw data based on the correlation matrix

cor_mat2 = np.corrcoef(x.T)

eig_vals, eig_vecs = np.linalg.eig(cor_mat2)

print('Eigenvectors \n%s' %eig_vecs)
print('\nEigenvalues \n%s' %eig_vals)


Eigenvectors 
[[-0.20030876  0.63771992 -0.15332905 -0.13361956  0.08795281 -0.16025946
   0.19704803 -0.34133174 -0.56839392]
 [-0.29116559  0.48459491 -0.14902883  0.36369115  0.05936071 -0.14502893
  -0.01893453 -0.09476705  0.70143648]
 [ 0.3503192   0.25049007  0.05634367 -0.69679685 -0.24047999  0.14134595
  -0.18217282 -0.30491986  0.34908216]
 [-0.3139701  -0.43617018  0.23567336  0.04917234 -0.13647312 -0.13131746
   0.187426   -0.75855499  0.08255381]
 [ 0.37450749  0.03027429 -0.34050778  0.30490781 -0.65867462  0.07657612
   0.45616132 -0.04241849 -0.01773838]
 [ 0.2939705   0.28428875  0.70603595  0.33482561  0.13089802  0.41205683
   0.15174645 -0.10516784 -0.03396947]
 [ 0.38879072 -0.08404245 -0.00095515 -0.16326499  0.49201034 -0.43468783
   0.58843443  0.02957044  0.19226309]
 [ 0.3842011   0.05973259  0.1992037   0.25971811 -0.16573919 -0.67280028
  -0.48505122 -0.101921   -0.12607005]
 [ 0.35826668 -0.11455848 -0.49137433  0.25048928  0.43789823  0.31431211
  -0.28259196 -0.42662413 -0.04375386]]

Eigenvalues 
[  6.36379348e+00   1.78597340e+00   5.52326785e-01   2.08936294e-01
   4.93455041e-02   3.55065072e-02   3.47044929e-03   1.11293429e-04
   5.36288090e-04]

Singular Vextor Decomposition


In [10]:
u,s,v = np.linalg.svd(x_std.T)
u


Out[10]:
array([[-0.20030876, -0.63771992, -0.15332905, -0.13361956,  0.08795281,
         0.16025946, -0.19704803, -0.56839392,  0.34133174],
       [-0.29116559, -0.48459491, -0.14902883,  0.36369115,  0.05936071,
         0.14502893,  0.01893453,  0.70143648,  0.09476705],
       [ 0.3503192 , -0.25049007,  0.05634367, -0.69679685, -0.24047999,
        -0.14134595,  0.18217282,  0.34908216,  0.30491986],
       [-0.3139701 ,  0.43617018,  0.23567336,  0.04917234, -0.13647312,
         0.13131746, -0.187426  ,  0.08255381,  0.75855499],
       [ 0.37450749, -0.03027429, -0.34050778,  0.30490781, -0.65867462,
        -0.07657612, -0.45616132, -0.01773838,  0.04241849],
       [ 0.2939705 , -0.28428875,  0.70603595,  0.33482561,  0.13089802,
        -0.41205683, -0.15174645, -0.03396947,  0.10516784],
       [ 0.38879072,  0.08404245, -0.00095515, -0.16326499,  0.49201034,
         0.43468783, -0.58843443,  0.19226309, -0.02957044],
       [ 0.3842011 , -0.05973259,  0.1992037 ,  0.25971811, -0.16573919,
         0.67280028,  0.48505122, -0.12607005,  0.101921  ],
       [ 0.35826668,  0.11455848, -0.49137433,  0.25048928,  0.43789823,
        -0.31431211,  0.28259196, -0.04375386,  0.42662413]])

Selecting Priciple Components


In [11]:
#sorting eigenpairs

for ev in eig_vecs:
    np.testing.assert_array_almost_equal(1.0, np.linalg.norm(ev))
print('Everything ok!')


Everything ok!

In [12]:
# Make a list of (eigenvalue, eigenvector) tuples
eig_pairs = [(np.abs(eig_vals[i]), eig_vecs[:,i]) for i in range(len(eig_vals))]

# Sort the (eigenvalue, eigenvector) tuples from high to low
eig_pairs.sort()
eig_pairs.reverse()

# Visually confirm that the list is correctly sorted by decreasing eigenvalues
print('Eigenvalues in descending order:')
for i in eig_pairs:
    print(i[0])


Eigenvalues in descending order:
6.36379348365
1.78597339529
0.552326784917
0.208936294005
0.0493455040835
0.03550650724
0.00347044929353
0.000536288090161
0.000111293428871

Explained Variance


In [13]:
tot = sum(eig_vals)
var_exp = [(i / tot)*100 for i in sorted(eig_vals, reverse=True)]
cum_var_exp = np.cumsum(var_exp)

print cum_var_exp

trace1 = plt.bar(
        left=[i for i in range(1,10)],
        height=var_exp)

trace2 = plt.plot(
        [i for i in range(1,10)], 
        cum_var_exp,
        label='cumulative explained variance')

plt.ylim(0,100)
plt.show()


[  70.70881649   90.55296532   96.6899296    99.01144398   99.55972735
   99.9542441    99.99280465   99.99876341  100.        ]

Projection Matrix


In [14]:
matrix_w = np.hstack((eig_pairs[0][1].reshape(9,1), 
                      eig_pairs[1][1].reshape(9,1)))

print('Matrix W:\n', matrix_w)


('Matrix W:\n', array([[-0.20030876,  0.63771992],
       [-0.29116559,  0.48459491],
       [ 0.3503192 ,  0.25049007],
       [-0.3139701 , -0.43617018],
       [ 0.37450749,  0.03027429],
       [ 0.2939705 ,  0.28428875],
       [ 0.38879072, -0.08404245],
       [ 0.3842011 ,  0.05973259],
       [ 0.35826668, -0.11455848]]))

Project Onto the New Feature Space


In [18]:
Y = x_std.dot(matrix_w)

In [19]:
len(Y)


Out[19]:
736

In [71]:
traces = []

for name in ('BWR', 'VVER', 'RBMK'):

    trace = plt.scatter(
        Y[y==name,0],
        Y[y==name,1],
        mode='markers',
        label=name,
        )
    traces.append(trace)


data = Data(traces)
layout = Layout(showlegend=True,
                scene=Scene(xaxis=XAxis(title='PC1'),
                yaxis=YAxis(title='PC2'),))

fig = Figure(data=data, layout=layout)
py.iplot(fig)


---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-71-625ee0b7b4f2> in <module>()
      4 
      5     trace = plt.scatter(
----> 6         Y[y==name,0],
      7         Y[y==name,1],
      8         mode='markers',

IndexError: too many indices for array

In [ ]:


In [ ]:

With Sklearn


In [30]:
#import data
df = pd.read_excel('All_Isotopes_Generated.xlsx', 'Sheet1')

needed_isotopes = ['Reactor', 'Enrichment', 'u234', 'u235', 'u236', 'u238',
                   'pu238', 'pu239', 'pu240', 'pu241', 'pu242']
                   
df = df[needed_isotopes]

# split data table into data X and class labels y
x = df.ix[:,2:].values
reactor = df.ix[:,0].values
enrichment = df.ix[:,1].values

x_std = StandardScaler().fit_transform(x)
#x_std = x

pca_model = PCA(n_components=3)
result = pca_model.fit_transform(x_std)

In [16]:
df_result = pd.DataFrame(result, columns=['PC1', 'PC2', 'PC3'])
df_result['Reactor'] = reactor
df_result['Enrichment'] = enrichment
df_result = df_result[df_result.columns[-2:].append(df_result.columns[:-2])]

In [29]:
df_result


Out[29]:
Reactor Enrichment PC1 PC2 PC3
0 BWR 3.0 2.324359 0.640660 -0.300365
1 BWR 3.0 2.159858 0.648795 -0.368817
2 BWR 3.0 1.995028 0.657830 -0.426216
3 BWR 3.0 1.833563 0.669921 -0.476845
4 BWR 3.0 1.672584 0.685542 -0.519843
5 BWR 3.0 1.512188 0.702172 -0.555975
6 BWR 3.0 1.350975 0.717030 -0.583907
7 BWR 3.0 1.190739 0.734377 -0.604427
8 BWR 3.0 1.034071 0.755563 -0.618856
9 BWR 3.0 0.876613 0.775045 -0.625761
10 BWR 3.0 3.949015 0.800050 1.022931
11 BWR 3.0 0.720793 0.796400 -0.625925
12 BWR 3.0 0.565559 0.818481 -0.620283
13 BWR 3.0 0.411894 0.841181 -0.608189
14 BWR 3.0 0.259031 0.865188 -0.591048
15 BWR 3.0 0.107417 0.888677 -0.568201
16 BWR 3.0 -0.042793 0.913976 -0.539875
17 BWR 3.0 -0.195127 0.937305 -0.504308
18 BWR 3.0 -0.342444 0.963160 -0.465800
19 BWR 3.0 -0.489711 0.987047 -0.421423
20 BWR 3.0 -0.639949 1.019683 -0.356087
21 BWR 3.0 3.754034 0.752252 0.788187
22 BWR 3.0 -0.784879 1.044659 -0.302301
23 BWR 3.0 -0.927280 1.069391 -0.244097
24 BWR 3.0 -1.068761 1.094976 -0.182645
25 BWR 3.0 -1.208528 1.119957 -0.117579
26 BWR 3.0 -1.346245 1.145846 -0.049227
27 BWR 3.0 -1.484914 1.168502 0.023970
28 BWR 3.0 -1.619247 1.193115 0.098966
29 BWR 3.0 -1.752029 1.218626 0.176324
... ... ... ... ... ...
706 RBMK 2.1 3.049474 1.970356 0.074828
707 RBMK 2.1 2.884668 1.969819 -0.019237
708 RBMK 2.1 2.721502 1.974948 -0.097467
709 RBMK 2.1 2.561166 1.987070 -0.164178
710 RBMK 2.1 2.398534 1.997586 -0.215412
711 RBMK 2.2 2.283920 1.887063 -0.218731
712 RBMK 2.2 2.126278 1.906205 -0.252654
713 RBMK 2.2 1.967978 1.925333 -0.275000
714 RBMK 2.2 1.812187 1.949016 -0.289522
715 RBMK 2.2 1.657888 1.974483 -0.295175
716 RBMK 2.2 1.500750 2.001793 -0.284874
717 RBMK 2.2 1.345983 2.028279 -0.272827
718 RBMK 2.2 1.192853 2.055181 -0.252694
719 RBMK 2.2 1.040778 2.083635 -0.225352
720 RBMK 2.2 0.887588 2.109318 -0.189953
721 RBMK 2.2 3.751601 1.906035 0.702952
722 RBMK 2.2 0.737900 2.139102 -0.149031
723 RBMK 2.2 0.587487 2.166720 -0.100587
724 RBMK 2.2 0.438428 2.194643 -0.046017
725 RBMK 2.2 0.290670 2.222871 0.014398
726 RBMK 2.2 0.144822 2.250604 0.080486
727 RBMK 2.2 -0.002176 2.276324 0.152460
728 RBMK 2.2 3.582091 1.876077 0.521399
729 RBMK 2.2 3.413796 1.854529 0.368643
730 RBMK 2.2 3.249941 1.843803 0.237489
731 RBMK 2.2 3.087085 1.841046 0.124566
732 RBMK 2.2 2.923876 1.840793 0.029620
733 RBMK 2.2 2.761145 1.846384 -0.051267
734 RBMK 2.2 2.602539 1.857791 -0.119532
735 RBMK 2.2 2.442587 1.871499 -0.174776

736 rows × 5 columns


In [17]:
df_result.to_csv('gernerated_PCA_data.csv')

In [28]:
df_result.plot(kind='scatter', x='PC1', y='PC2')


Out[28]:
<matplotlib.axes._subplots.AxesSubplot at 0x1174413d0>

In [26]:
df_result.plot(kind='scatter', x='PC1', y='PC3')


Out[26]:
<matplotlib.axes._subplots.AxesSubplot at 0x117171f10>

In [27]:
df_result.plot(kind='scatter', x='PC2', y='PC3')


Out[27]:
<matplotlib.axes._subplots.AxesSubplot at 0x1172cb4d0>

In [7]:
df = pd.read_csv('marakova-table-7-rbmk-spent-fuel-wo-uncertainty.csv')

In [16]:
df


Out[16]:
Sampleno. u234 u235 u236 u238 pu238 pu239 pu240 pu241 pu242
0 1 0.0886 4.865 2.070 969.8 0.02920 2.615 1.644 0.511 0.2220
1 2 0.0844 2.597 2.320 965.5 0.04750 2.374 2.069 0.607 0.4290
2 3 0.0810 2.675 2.300 964.7 0.05280 2.595 2.226 0.650 0.4550
3 4 0.0906 3.570 2.180 970.2 0.03260 2.351 1.728 0.498 0.2730
4 5 0.0980 5.050 2.020 971.8 0.02450 2.505 1.526 0.455 0.1820
5 6 0.0840 2.230 2.330 967.6 0.04790 2.338 2.035 0.584 0.4340
6 7 0.0900 2.440 2.470 963.0 0.06470 2.405 2.134 0.634 0.4660
7 8 0.0960 4.050 2.140 970.7 0.03050 2.292 1.518 0.440 0.2091
8 9 0.1030 7.280 1.740 974.7 0.01280 2.290 1.165 0.315 0.0861
9 10 0.1160 11.236 1.080 977.7 0.00306 2.136 0.518 0.117 0.0130
10 11 0.1140 10.480 1.218 976.5 0.00528 2.267 0.650 0.156 0.0200
11 12 0.0980 5.010 2.034 971.8 0.02950 2.541 1.598 0.466 0.2020
12 13 0.1000 5.938 1.910 972.5 0.02480 2.551 1.508 0.401 0.1370
13 14 0.1020 5.880 1.900 972.3 0.02290 2.570 1.443 0.381 0.1380
14 15 0.1010 4.846 2.024 968.1 0.03210 2.866 1.855 0.535 0.2260
15 16 0.1010 6.104 1.883 972.9 0.02370 2.615 1.448 0.426 0.1490
16 17 0.0990 5.052 2.016 972.2 0.02530 2.450 1.493 0.430 0.1750
17 18 0.0870 2.030 2.470 965.8 0.05670 2.293 2.187 0.617 0.5110
18 19 0.0880 2.330 2.400 967.2 0.05450 2.317 2.071 0.570 0.4370
19 20 0.1470 13.770 1.184 976.4 0.00143 1.577 0.336 0.075 0.0069
20 21 0.1420 11.750 1.485 975.1 0.00294 2.024 0.588 0.142 0.0189
21 22 0.1400 11.720 1.503 974.7 0.00332 2.106 0.641 0.155 0.0229
22 23 0.1430 12.760 1.340 976.8 0.00262 2.024 0.495 0.113 0.0132
23 24 0.1440 12.720 1.340 975.2 0.00286 2.069 0.518 0.121 0.0141
24 25 0.1460 13.720 1.200 976.2 0.00200 1.614 0.355 0.080 0.0100
25 26 0.1080 3.411 2.700 967.2 0.04460 2.349 1.917 0.578 0.3690
26 27 0.1080 3.419 2.720 966.7 0.04350 2.369 1.925 0.568 0.3660
27 28 0.1840 10.640 2.881 973.7 0.01490 2.428 0.804 0.216 0.0380
28 29 0.1440 3.419 3.840 962.5 0.10310 2.642 2.227 0.702 0.4710
29 30 0.1390 3.419 3.840 961.6 0.11200 2.603 2.309 0.720 0.5320
30 31 0.1380 3.419 3.840 963.6 0.08980 2.431 2.074 0.624 0.4620
31 32 0.1630 3.419 3.840 968.3 0.03920 2.630 1.420 0.424 0.1520
32 33 0.1320 3.419 3.840 961.2 0.10210 2.451 2.192 0.667 0.5250
33 34 0.1480 3.419 3.840 964.6 0.07210 2.550 1.841 0.539 0.2900
34 35 0.2550 3.419 3.840 969.7 0.02140 2.423 0.724 0.202 0.0308
35 36 0.2190 3.419 3.840 964.4 0.07460 2.712 1.544 0.499 0.1840
36 37 0.2070 3.419 3.840 962.0 0.10140 2.612 1.811 0.580 0.2960
37 38 0.1970 3.419 3.840 960.9 0.10100 2.525 1.881 0.578 0.3400
38 39 0.2140 3.419 3.840 963.6 0.08220 2.688 1.667 0.490 0.2090
39 40 0.2220 3.419 3.840 967.7 0.03890 2.617 1.098 0.323 0.0770
40 41 0.2390 3.419 3.840 969.5 0.02280 2.477 0.769 0.214 0.0350

In [23]:
pd.DataFrame(df.iloc[-1,:]).T


Out[23]:
Sampleno. u234 u235 u236 u238 pu238 pu239 pu240 pu241 pu242
40 41 0.239 3.419 3.84 969.5 0.0228 2.477 0.769 0.214 0.035

In [ ]: