In [1]:
#import libs, load data
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
df_load = pd.read_csv('https://nthu-datalab.github.io/ml/labs/02_EDA_PCA/gen_dataset.csv')
X_load = df_load.drop('Class label', 1)
Y_load = df_load['Class label']
df_load.head()
Out[1]:
In [4]:
import numpy as np
from sklearn.preprocessing import StandardScaler
# Z-normalize data
sc = StandardScaler()
Z = sc.fit_transform(X_load)
# Estimate the correlation matrix
R = np.dot(Z.T, Z) / df_load.shape[0]
#calculate the eigen values, eigen vectors
eigen_vals, eigen_vecs = np.linalg.eigh(R)
# Make a list of (eigenvalue, eigenvector) tuples
eigen_pairs = [(np.abs(eigen_vals[i]), eigen_vecs[:,i]) for i in range(len(eigen_vals))]
# Sort the (eigenvalue, eigenvector) tuples from high to low
eigen_pairs.sort(reverse=True)
#form the projection matrix
W_2D = np.hstack((eigen_pairs[0][1][:, np.newaxis],
eigen_pairs[1][1][:, np.newaxis]))
#you should form a projection matrix which projects from raw-data dimension to 3 dimension here
W_3D = np.hstack((eigen_pairs[0][1][:, np.newaxis],
eigen_pairs[1][1][:, np.newaxis],
eigen_pairs[2][1][:, np.newaxis]))
In [8]:
import os
import seaborn as sns
sns.set(style='whitegrid', context='notebook')
#import Axes3D for plottin 3d scatter
from mpl_toolkits.mplot3d import Axes3D
#cacculate z_pca(2d and 3d)
Z_pca2 = Z.dot(W_2D)
Z_pca3 = Z.dot(W_3D)
#plot settings
colors = ['r', 'b', 'g']
markers = ['s', 'x', 'o']
fig = plt.figure(figsize=(12,6))
#plot 2D
plt2 = fig.add_subplot(1,2,1)
for l, c, m in zip(np.unique(Y_load), colors, markers):
plt2.scatter(Z_pca2[Y_load==l, 0],
Z_pca2[Y_load==l, 1],
c=c, label=l, marker=m)
plt.title('Z_pca 2D')
plt.xlabel('PC 1')
plt.ylabel('PC 2')
plt.legend(loc='lower left')
plt.tight_layout()
#plot 3D
plt3 = fig.add_subplot(1,2,2, projection='3d')
#you should plot a 3D scatter using plt3.scatter here (see Axes3D.scatter in matplotlib)
for l, c, m in zip(np.unique(Y_load), colors, markers):
plt3.scatter(Z_pca3[Y_load==l, 0],
Z_pca3[Y_load==l, 1],
Z_pca3[Y_load==l, 2],
c=c, label=l, marker=m)
plt3.set_xlabel('PC 1')
plt3.set_ylabel('PC 2')
plt3.set_zlabel('PC 3')
plt.title('Z_pca 3D')
plt.legend(loc='lower right')
plt.tight_layout()
if not os.path.exists('./output'):
os.makedirs('./output')
plt.savefig('./output/fig-pca-2-3-z.png', dpi=300)
plt.show()
In [ ]:
In [ ]: