In [228]:
%pylab inline
import numpy as np
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.patches import FancyArrowPatch
from mpl_toolkits.mplot3d import proj3d
np.random.seed(234234782384239784)
In [229]:
mu_vec1 = np.array([0,0,0])
cov_mat1 = np.array([[1,0,0],[0,1,0],[0,0,1]])
class1_sample = np.random.multivariate_normal(mu_vec1, cov_mat1, 20)
mu_vec2 = np.array([1,1,1])
cov_mat2 = np.array([[1,0,0],[0,1,0],[0,0,1]])
class2_sample = np.random.multivariate_normal(mu_vec2, cov_mat2, 20)
fig = plt.figure(figsize=(15,15))
ax = fig.add_subplot(111, projection='3d')
mpl.rcParams['legend.fontsize'] = 10
ax.plot(class1_sample[:,0], class1_sample[:,1], class1_sample[:,2], 'o', markersize=10, color='blue', alpha=0.5, label='class1')
ax.plot(class2_sample[:,0], class2_sample[:,1], class2_sample[:,2], '^', markersize=10, alpha=0.5, color='red', label='class2')
mpl.rcParams['legend.fontsize'] = 10
plt.title('Samples for class 1 and class 2')
ax.legend(loc = 'upper right')
plt.draw()
plt.show()
In [230]:
samples = np.concatenate((class1_sample, class2_sample), axis=0)
fig = plt.figure(figsize=(15,15))
ax = fig.add_subplot(111, projection='3d')
ax.plot(samples[:,0], samples[:,1], samples[:,2], 'o', markersize=10, color='green', alpha=0.5)
ax.set_xlabel('x_values')
ax.set_ylabel('y_values')
ax.set_zlabel('z_values')
plt.title('All samples')
plt.draw()
plt.show()
$ \Sigma_i = \Bigg[ \begin{array}{cc} \sigma_{11}^2 & \sigma_{12}^2 & \sigma_{13}^2\\ \sigma_{21}^2 & \sigma_{22}^2 & \sigma_{23}^2\\ \sigma_{31}^2 & \sigma_{32}^2 & \sigma_{33}^2\\ \end{array} \Bigg]$
In [231]:
cov_mat = np.cov([samples[:,0],samples[:,1],samples[:,2]])
print('Covariance Matrix:\n', cov_mat)
In [232]:
eig_val, eig_vec = np.linalg.eig(cov_mat)
for i in range(len(eig_val)):
eigv = eig_vec[:,i].reshape(1,3).T
print('Eigenvector {}: \n{}'.format(i+1, eigv))
print('Eigenvalue {}: {}'.format(i+1, eig_val[i]))
print(40 * '-')
Checking the Eigenvector and Eigenvalue calculation
We expect $ \Sigma\vec{v} = \lambda\vec{v} $
where
$ \Sigma = Covariance \; matrix, \; \vec{v} = \; Eigenvector, \; \lambda = \; Eigenvalue$
In [233]:
for i in range(len(eig_val)):
eigv = eig_vec[:,i].reshape(1,3).T
print('{}\n = \n{}'.format(cov_mat.dot(eigv), eig_val[i] * eigv))
np.testing.assert_array_almost_equal(cov_mat.dot(eigv), eig_val[i] * eigv,
decimal=6, err_msg='', verbose=True)
print(40 * '-')
In [234]:
mean_x = np.mean(samples[:,0])
mean_y = np.mean(samples[:,1])
mean_z = np.mean(samples[:,2])
class Arrow3D(FancyArrowPatch):
def __init__(self, xs, ys, zs, *args, **kwargs):
FancyArrowPatch.__init__(self, (0,0), (0,0), *args, **kwargs)
self._verts3d = xs, ys, zs
def draw(self, renderer):
xs3d, ys3d, zs3d = self._verts3d
xs, ys, zs = proj3d.proj_transform(xs3d, ys3d, zs3d, renderer.M)
self.set_positions((xs[0],ys[0]),(xs[1],ys[1]))
FancyArrowPatch.draw(self, renderer)
fig = plt.figure(figsize=(15,15))
ax = fig.add_subplot(111, projection='3d')
ax.plot(samples[:,0], samples[:,1], samples[:,2], 'o', markersize=10, color='green', alpha=0.2)
ax.plot([mean_x], [mean_y], [mean_z], 'o', markersize=10, color='red', alpha=0.5)
for v in eig_vec:
#ax.plot([mean_x, v[0]], [mean_y, v[1]], [mean_z, v[2]], color='red', alpha=0.8, lw=3)
a = Arrow3D([mean_x, v[0]], [mean_y, v[1]], [mean_z, v[2]], mutation_scale=20, lw=3, arrowstyle="-|>", color="r")
ax.add_artist(a)
ax.set_xlabel('x_values')
ax.set_ylabel('y_values')
ax.set_zlabel('z_values')
plt.title('Eigenvectors')
plt.draw()
plt.show()
In [235]:
import operator
max_index, max_eig_val = max(enumerate(eig_val), key=operator.itemgetter(1))
print('max. eigenvalue:', max_eig_val)
print('max. eigenvector:\n', eig_vec[:,max_index].reshape(1,3).T)
min_index, min_eig_val = min(enumerate(eig_val), key=operator.itemgetter(1))
print('\nmin. eigenvalue:', min_eig_val)
print('min. eigenvector:\n', eig_vec[:,min_index].reshape(1,3).T)
Instead of ordering the eigenvectors from highest to lowest eigenvalue and drop the last column, I chose a slightly different approach since I found the implementation easier since we only want to drop 1 eigenvector. If we want to drop more then one eigenvector, we can either repeat this step, or we can order the eigenvectors and keep the $k$ eigenvectors with the highest eigenvalues. Both approaches are equivalent in terms of the PCA result.
Form a feature vector (a matrix of the eigenvectors) for all but the lowest eigenvalue, where each eigenvector shall represent 1 column in the matrix
In [236]:
feature_vect = np.zeros(shape=(eig_vec.shape[0], eig_vec.shape[1]-1))
index = 0
for i in range(eig_vec.shape[0]):
if i != min_index:
feature_vect[:,index] = eig_vec[:,i]
index += 1
print('Feature vector matrix:\n', feature_vect)
In [237]:
adjusted_samples = np.zeros(shape=(samples.shape[0], samples.shape[1]))
for i in range(samples.shape[1]):
adjusted_samples[:,i] = samples[:,i] - np.mean(samples[:,i])
print('Adjusted dataset:\n', adjusted_samples)
In [238]:
print(feature_vect.shape)
print(adjusted_samples.shape)
In [209]:
transformed_samples = adjusted_samples.dot(feature_vect)
print('Transformed samples:\n', transformed_samples )
In [239]:
plt.plot(transformed_samples[:,0], transformed_samples[:,1], 'o', markersize=7, color='green', alpha=0.5)
plt.xlabel('x_values')
plt.ylabel('y_values')
plt.xlim([-4,6])
plt.ylim([-4,6])
plt.title('Transformed samples')
plt.draw()
plt.show()
In [240]:
plt.plot(transformed_samples[0:20,0], transformed_samples[0:20,1], 'o', markersize=7, color='blue', alpha=0.5, label='class1')
plt.plot(transformed_samples[20:40,0], transformed_samples[20:40,1], '^', markersize=7, color='red', alpha=0.5, label='class2')
plt.xlabel('x_values')
plt.ylabel('y_values')
plt.xlim([-4,6])
plt.ylim([-4,6])
plt.legend()
plt.title('Transformed samples with class labels')
plt.draw()
plt.show()
In [241]:
from matplotlib.mlab import PCA
results = PCA(samples)
#this will return an array of the data projected into PCA space
print(results.Y)
In [242]:
plt.plot(results.Y[20:40,0], results.Y[20:40,1], 'o', markersize=7, color='blue', alpha=0.5, label='class1')
plt.plot(results.Y[0:20,0], results.Y[0:20,1], '^', markersize=7, color='red', alpha=0.5, label='class2')
plt.xlabel('x_values')
plt.ylabel('y_values')
plt.xlim([-4,6])
plt.ylim([-4,6])
plt.legend()
plt.title('Transformed samples with class labels')
plt.draw()
plt.show()
In [ ]: