In [2]:
import pandas as pd
import numpy as np

In [3]:
val=np.matrix([[  8.6411,   8.7376,   6.5835,   8.0363,   5.8235,   6.9572],
       [  9.1225,   7.8232,   6.7591,   7.8883,   5.8564,   7.4644],
       [ 11.7064,   8.3946,   6.7469,   8.0737,   5.9594,   6.9672],
       [  9.4505,   8.7498,   6.7086,   8.0391,   5.8791,   7.3993],
       [ 10.7963,   8.235 ,   6.7443,   7.864 ,   5.9691,   6.9801],
       [  9.086 ,   9.0479,   6.5651,   7.8576,   5.9568,   7.1211],
       [  9.5048,   8.473 ,   6.6788,   7.8939,   5.9607,   7.6634],
       [  8.7645,   8.4184,   6.6414,   7.7059,   5.9074,   8.0684],
       [  9.9437,   9.0894,   6.7041,   7.9346,   5.9644,   6.9568],
       [  9.7562,   8.669 ,   6.7211,   7.8148,   5.9673,   7.5553]])  
val=np.matrix([[-4.8927, -7.5595, -6.1081, -7.6419, -6.987 , -7.3919, -7.1961,
        -6.2051, -7.0551, -6.5356],
       [-4.9939, -5.2557, -4.7379, -5.7602, -5.1277, -5.5271, -5.5584,
        -4.4296, -5.3498, -5.3201]])

In [4]:
val  #data already in the form of sample:probes


Out[4]:
matrix([[-4.8927, -7.5595, -6.1081, -7.6419, -6.987 , -7.3919, -7.1961,
         -6.2051, -7.0551, -6.5356],
        [-4.9939, -5.2557, -4.7379, -5.7602, -5.1277, -5.5271, -5.5584,
         -4.4296, -5.3498, -5.3201]])

In [5]:
#t_val=np.transpose(val)
t_val=val
mean_val=np.mean(t_val,axis=0)
print(t_val)
print(mean_val)


[[-4.8927 -7.5595 -6.1081 -7.6419 -6.987  -7.3919 -7.1961 -6.2051 -7.0551
  -6.5356]
 [-4.9939 -5.2557 -4.7379 -5.7602 -5.1277 -5.5271 -5.5584 -4.4296 -5.3498
  -5.3201]]
[[-4.9433  -6.4076  -5.423   -6.70105 -6.05735 -6.4595  -6.37725 -5.31735
  -6.20245 -5.92785]]

In [6]:
new_val=t_val-mean_val
print(new_val)


[[ 0.0506  -1.1519  -0.6851  -0.94085 -0.92965 -0.9324  -0.81885 -0.88775
  -0.85265 -0.60775]
 [-0.0506   1.1519   0.6851   0.94085  0.92965  0.9324   0.81885  0.88775
   0.85265  0.60775]]

In [7]:
covMat=np.cov(new_val,rowvar=0)
print(covMat)
print(new_val.T.dot(new_val) / (val.shape[0]-1))


[[ 0.00512072 -0.11657228 -0.06933212 -0.09521402 -0.09408058 -0.09435888
  -0.08286762 -0.0898403  -0.08628818 -0.0615043 ]
 [-0.11657228  2.65374722  1.57833338  2.16753023  2.14172767  2.14806312
   1.88646663  2.04519845  1.96433507  1.40013445]
 [-0.06933212  1.57833338  0.93872402  1.28915267  1.27380643  1.27757448
   1.12198827  1.21639505  1.16830103  0.83273905]
 [-0.09521402  2.16753023  1.28915267  1.77039744  1.7493224   1.75449708
   1.54083005  1.67047917  1.6044315   1.14360317]
 [-0.09408058  2.14172767  1.27380643  1.7493224   1.72849825  1.73361132
   1.52248781  1.65059358  1.58533215  1.12998957]
 [-0.09435888  2.14806312  1.27757448  1.75449708  1.73361132  1.73873952
   1.52699148  1.6554762   1.59002172  1.1333322 ]
 [-0.08286762  1.88646663  1.12198827  1.54083005  1.52248781  1.52699148
   1.34103065  1.45386818  1.39638491  0.99531218]
 [-0.0898403   2.04519845  1.21639505  1.67047917  1.65059358  1.6554762
   1.45386818  1.57620013  1.51388008  1.07906012]
 [-0.08628818  1.96433507  1.16830103  1.6044315   1.58533215  1.59002172
   1.39638491  1.51388008  1.45402405  1.03639607]
 [-0.0615043   1.40013445  0.83273905  1.14360317  1.12998957  1.1333322
   0.99531218  1.07906012  1.03639607  0.73872012]]
[[ 0.00512072 -0.11657228 -0.06933212 -0.09521402 -0.09408058 -0.09435888
  -0.08286762 -0.0898403  -0.08628818 -0.0615043 ]
 [-0.11657228  2.65374722  1.57833338  2.16753023  2.14172767  2.14806312
   1.88646663  2.04519845  1.96433507  1.40013445]
 [-0.06933212  1.57833338  0.93872402  1.28915267  1.27380643  1.27757448
   1.12198827  1.21639505  1.16830103  0.83273905]
 [-0.09521402  2.16753023  1.28915267  1.77039744  1.7493224   1.75449708
   1.54083005  1.67047917  1.6044315   1.14360317]
 [-0.09408058  2.14172767  1.27380643  1.7493224   1.72849825  1.73361132
   1.52248781  1.65059358  1.58533215  1.12998957]
 [-0.09435888  2.14806312  1.27757448  1.75449708  1.73361132  1.73873952
   1.52699148  1.6554762   1.59002172  1.1333322 ]
 [-0.08286762  1.88646663  1.12198827  1.54083005  1.52248781  1.52699148
   1.34103065  1.45386818  1.39638491  0.99531218]
 [-0.0898403   2.04519845  1.21639505  1.67047917  1.65059358  1.6554762
   1.45386818  1.57620013  1.51388008  1.07906012]
 [-0.08628818  1.96433507  1.16830103  1.6044315   1.58533215  1.59002172
   1.39638491  1.51388008  1.45402405  1.03639607]
 [-0.0615043   1.40013445  0.83273905  1.14360317  1.12998957  1.1333322
   0.99531218  1.07906012  1.03639607  0.73872012]]

In [8]:
eigVals,eigVects=np.linalg.eig(np.mat(covMat))
#eigVals,eigVects=np.linalg.eig(covMat)
print(eigVals)
print(eigVects)


[  0.00000000e+00 +0.00000000e+00j   1.39452021e+01 +0.00000000e+00j
   1.11177868e-16 +4.83488098e-16j   1.11177868e-16 -4.83488098e-16j
  -3.49546341e-16 +0.00000000e+00j   2.59702685e-16 +0.00000000e+00j
   1.33875015e-16 +0.00000000e+00j  -6.09050724e-17 +8.08990986e-17j
  -6.09050724e-17 -8.08990986e-17j  -8.45384677e-17 +0.00000000e+00j]
[[-0.99981638+0.j          0.01916254+0.j          0.01540968+0.00159667j
   0.01540968-0.00159667j  0.02276174+0.j          0.01343802+0.j
  -0.01774661+0.j         -0.03531335-0.02959337j -0.03531335+0.02959337j
   0.04440172+0.j        ]
 [-0.00836085+0.j         -0.43623185+0.j         -0.01694185-0.2491858j
  -0.01694185+0.2491858j   0.06956636+0.j         -0.08204824+0.j
  -0.02644851+0.j          0.04380163-0.01855428j  0.04380163+0.01855428j
   0.04714933+0.j        ]
 [-0.00497267+0.j         -0.25945172+0.j          0.34907779-0.0068812j
   0.34907779+0.0068812j  -0.24143295+0.j          0.49783102+0.j
   0.36135329+0.j         -0.06524090-0.0784916j  -0.06524090+0.0784916j
   0.02849480+0.j        ]
 [-0.00682898+0.j         -0.35630587+0.j          0.45711413+0.j
   0.45711413-0.j          0.21568167+0.j          0.08948800+0.j
   0.31011188+0.j         -0.11833728+0.39495019j -0.11833728-0.39495019j
   0.26040203+0.j        ]
 [-0.00674769+0.j         -0.35206436+0.j         -0.39079841-0.10240991j
  -0.39079841+0.10240991j -0.15468576+0.j         -0.00570299+0.j
  -0.02187824+0.j         -0.09529248+0.08735765j -0.09529248-0.08735765j
   0.36455300+0.j        ]
 [-0.00676765+0.j         -0.35310580+0.j         -0.24083424+0.14483647j
  -0.24083424-0.14483647j  0.31023467+0.j         -0.52539990+0.j
  -0.14686493+0.j         -0.08558324-0.42032785j -0.08558324+0.42032785j
  -0.16269356+0.j        ]
 [-0.00594347+0.j         -0.31010370+0.j         -0.07592634+0.02170508j
  -0.07592634-0.02170508j -0.46874146+0.j         -0.01838867+0.j
  -0.20847050+0.j         -0.14532526+0.22852059j -0.14532526-0.22852059j
  -0.62343481+0.j        ]
 [-0.00644356+0.j         -0.33619657+0.j         -0.42122071+0.04154067j
  -0.42122071-0.04154067j -0.31837374+0.j         -0.40329066+0.j
  -0.61580478+0.j         -0.02864062-0.17217217j -0.02864062+0.17217217j
   0.43266680+0.j        ]
 [-0.00618880+0.j         -0.32290397+0.j          0.30622986+0.2233798j
   0.30622986-0.2233798j   0.63214564+0.j          0.52556597+0.j
   0.02029926+0.j         -0.02024856-0.02523384j -0.02024856+0.02523384j
  -0.43806841+0.j        ]
 [-0.00441124+0.j         -0.23015879+0.j          0.18746143+0.01131444j
   0.18746143-0.01131444j -0.22129669+0.j          0.14820597+0.j
   0.57193046+0.j          0.71389375+0.j          0.71389375-0.j
  -0.00638122+0.j        ]]

In [9]:
n=3
eigValIndice=np.argsort(eigVals)            #对特征值从小到大排序  
n_eigValIndice=eigValIndice[-1:-(n+1):-1]   #最大的n个特征值的下标  
n_eigVect=eigVects[:,n_eigValIndice]        #最大的n个特征值对应的特征向量  
lowDDataMat=new_val*n_eigVect              #低维特征空间的数据  
reconMat=(lowDDataMat*n_eigVect.T)+mean_val  #重构数据
print(new_val)
print(n_eigVect)


[[ 0.0506  -1.1519  -0.6851  -0.94085 -0.92965 -0.9324  -0.81885 -0.88775
  -0.85265 -0.60775]
 [-0.0506   1.1519   0.6851   0.94085  0.92965  0.9324   0.81885  0.88775
   0.85265  0.60775]]
[[ 0.01916254+0.j  0.01343802+0.j -0.01774661+0.j]
 [-0.43623185+0.j -0.08204824+0.j -0.02644851+0.j]
 [-0.25945172+0.j  0.49783102+0.j  0.36135329+0.j]
 [-0.35630587+0.j  0.08948800+0.j  0.31011188+0.j]
 [-0.35206436+0.j -0.00570299+0.j -0.02187824+0.j]
 [-0.35310580+0.j -0.52539990+0.j -0.14686493+0.j]
 [-0.31010370+0.j -0.01838867+0.j -0.20847050+0.j]
 [-0.33619657+0.j -0.40329066+0.j -0.61580478+0.j]
 [-0.32290397+0.j  0.52556597+0.j  0.02029926+0.j]
 [-0.23015879+0.j  0.14820597+0.j  0.57193046+0.j]]

In [10]:
lowDDataMat


Out[10]:
matrix([[  2.64056832e+00+0.j,  -3.19189120e-16+0.j,  -5.55111512e-16+0.j],
        [ -2.64056832e+00+0.j,   6.38378239e-16+0.j,   8.32667268e-16+0.j]])

In [11]:
from sklearn.decomposition import PCA   
pca=PCA(n_components=n)  
newData=pca.fit_transform(val)
#print(val)
print((newData))
np.cov(val.transpose())


[[ -2.64056832e+00   1.31597728e-15]
 [  2.64056832e+00   1.31597728e-15]]
Out[11]:
array([[ 0.00512072, -0.11657228, -0.06933212, -0.09521402, -0.09408058,
        -0.09435888, -0.08286762, -0.0898403 , -0.08628818, -0.0615043 ],
       [-0.11657228,  2.65374722,  1.57833338,  2.16753023,  2.14172767,
         2.14806312,  1.88646663,  2.04519845,  1.96433507,  1.40013445],
       [-0.06933212,  1.57833338,  0.93872402,  1.28915267,  1.27380643,
         1.27757448,  1.12198827,  1.21639505,  1.16830103,  0.83273905],
       [-0.09521402,  2.16753023,  1.28915267,  1.77039744,  1.7493224 ,
         1.75449708,  1.54083005,  1.67047917,  1.6044315 ,  1.14360317],
       [-0.09408058,  2.14172767,  1.27380643,  1.7493224 ,  1.72849825,
         1.73361132,  1.52248781,  1.65059358,  1.58533215,  1.12998957],
       [-0.09435888,  2.14806312,  1.27757448,  1.75449708,  1.73361132,
         1.73873952,  1.52699148,  1.6554762 ,  1.59002172,  1.1333322 ],
       [-0.08286762,  1.88646663,  1.12198827,  1.54083005,  1.52248781,
         1.52699148,  1.34103065,  1.45386818,  1.39638491,  0.99531218],
       [-0.0898403 ,  2.04519845,  1.21639505,  1.67047917,  1.65059358,
         1.6554762 ,  1.45386818,  1.57620013,  1.51388008,  1.07906012],
       [-0.08628818,  1.96433507,  1.16830103,  1.6044315 ,  1.58533215,
         1.59002172,  1.39638491,  1.51388008,  1.45402405,  1.03639607],
       [-0.0615043 ,  1.40013445,  0.83273905,  1.14360317,  1.12998957,
         1.1333322 ,  0.99531218,  1.07906012,  1.03639607,  0.73872012]])

In [38]:
#from matplotlib.mlab import PCA as mlabPCA

#mlab_pca = mlabPCA(val)
#print(mlab_pca)

In [39]:
#mlab_pca.Y

In [13]:
import matplotlib.pyplot as plt
#newData from sklearn
x=np.array(newData[:,0]).flatten()
y=np.array(newData[:,1]).flatten()
plt.scatter(x,y,marker=(5,3),c='r',alpha=0.5,lw=2) 

#lowDataMat from step by step
xx=np.array(lowDDataMat[:,0]).flatten()
yy=np.array(lowDDataMat[:,1]).flatten()
plt.scatter(xx,yy,marker=(5,3),c='b',alpha=0.5,lw=2) 
plt.show()


C:\Users\user\Miniconda3\envs\cvenv\lib\site-packages\numpy\core\numeric.py:533: ComplexWarning: Casting complex values to real discards the imaginary part
  return array(a, dtype, copy=False, order=order, subok=True)

In [12]:
X_reduced = PCA(n_components=3).fit_transform(t_val)
X_reduced


Out[12]:
array([[ -2.64056832e+00,   1.31597728e-15],
       [  2.64056832e+00,   1.31597728e-15]])

In [ ]: