In [2]:
import pandas as pd
import numpy as np
In [3]:
val=np.matrix([[ 8.6411, 8.7376, 6.5835, 8.0363, 5.8235, 6.9572],
[ 9.1225, 7.8232, 6.7591, 7.8883, 5.8564, 7.4644],
[ 11.7064, 8.3946, 6.7469, 8.0737, 5.9594, 6.9672],
[ 9.4505, 8.7498, 6.7086, 8.0391, 5.8791, 7.3993],
[ 10.7963, 8.235 , 6.7443, 7.864 , 5.9691, 6.9801],
[ 9.086 , 9.0479, 6.5651, 7.8576, 5.9568, 7.1211],
[ 9.5048, 8.473 , 6.6788, 7.8939, 5.9607, 7.6634],
[ 8.7645, 8.4184, 6.6414, 7.7059, 5.9074, 8.0684],
[ 9.9437, 9.0894, 6.7041, 7.9346, 5.9644, 6.9568],
[ 9.7562, 8.669 , 6.7211, 7.8148, 5.9673, 7.5553]])
val=np.matrix([[-4.8927, -7.5595, -6.1081, -7.6419, -6.987 , -7.3919, -7.1961,
-6.2051, -7.0551, -6.5356],
[-4.9939, -5.2557, -4.7379, -5.7602, -5.1277, -5.5271, -5.5584,
-4.4296, -5.3498, -5.3201]])
In [4]:
val #data already in the form of sample:probes
Out[4]:
In [5]:
#t_val=np.transpose(val)
t_val=val
mean_val=np.mean(t_val,axis=0)
print(t_val)
print(mean_val)
In [6]:
new_val=t_val-mean_val
print(new_val)
In [7]:
covMat=np.cov(new_val,rowvar=0)
print(covMat)
print(new_val.T.dot(new_val) / (val.shape[0]-1))
In [8]:
eigVals,eigVects=np.linalg.eig(np.mat(covMat))
#eigVals,eigVects=np.linalg.eig(covMat)
print(eigVals)
print(eigVects)
In [9]:
n=3
eigValIndice=np.argsort(eigVals) #对特征值从小到大排序
n_eigValIndice=eigValIndice[-1:-(n+1):-1] #最大的n个特征值的下标
n_eigVect=eigVects[:,n_eigValIndice] #最大的n个特征值对应的特征向量
lowDDataMat=new_val*n_eigVect #低维特征空间的数据
reconMat=(lowDDataMat*n_eigVect.T)+mean_val #重构数据
print(new_val)
print(n_eigVect)
In [10]:
lowDDataMat
Out[10]:
In [11]:
from sklearn.decomposition import PCA
pca=PCA(n_components=n)
newData=pca.fit_transform(val)
#print(val)
print((newData))
np.cov(val.transpose())
Out[11]:
In [38]:
#from matplotlib.mlab import PCA as mlabPCA
#mlab_pca = mlabPCA(val)
#print(mlab_pca)
In [39]:
#mlab_pca.Y
In [13]:
import matplotlib.pyplot as plt
#newData from sklearn
x=np.array(newData[:,0]).flatten()
y=np.array(newData[:,1]).flatten()
plt.scatter(x,y,marker=(5,3),c='r',alpha=0.5,lw=2)
#lowDataMat from step by step
xx=np.array(lowDDataMat[:,0]).flatten()
yy=np.array(lowDDataMat[:,1]).flatten()
plt.scatter(xx,yy,marker=(5,3),c='b',alpha=0.5,lw=2)
plt.show()
In [12]:
X_reduced = PCA(n_components=3).fit_transform(t_val)
X_reduced
Out[12]:
In [ ]: