In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
from numpy import linalg as LA
from sklearn import datasets
In [2]:
iris = datasets.load_iris()
First we need the correlation matrix
In [3]:
df = pd.DataFrame(iris.data, columns=iris.feature_names)
corr = df.corr()
df.corr()
Out[3]:
In [4]:
sns.heatmap(corr)
Out[4]:
In [5]:
eig_vals, eig_vecs = LA.eig(corr)
eig_pairs = [(np.abs(eig_vals[i]), eig_vecs[:,i]) for i in range(len(eig_vals))]
eig_pairs.sort(key=lambda x: x[0], reverse=True)
Eigenvalues
In [6]:
pd.DataFrame([eig_vals])
Out[6]:
Eigenvector as Principal component
In [7]:
pd.DataFrame(eig_vecs)
Out[7]:
Create the projection matrix for a new two dimensional space
In [8]:
matrix_w = np.hstack((eig_pairs[0][1].reshape(len(corr),1),
eig_pairs[1][1].reshape(len(corr),1)))
pd.DataFrame(matrix_w, columns=['PC1', 'PC2'])
Out[8]:
In [9]:
new_dim = np.dot(np.array(iris.data), matrix_w)
df = pd.DataFrame(new_dim, columns=['X', 'Y'])
df['label'] = iris.target
df.head()
Out[9]:
In [10]:
fig = plt.figure()
fig.suptitle('PCA with Eigenvector', fontsize=14, fontweight='bold')
ax = fig.add_subplot(111)
plt.scatter(df[df.label == 0].X, df[df.label == 0].Y, color='red', label=iris.target_names[0])
plt.scatter(df[df.label == 1].X, df[df.label == 1].Y, color='blue', label=iris.target_names[1])
plt.scatter(df[df.label == 2].X, df[df.label == 2].Y, color='green', label=iris.target_names[2])
plt.legend(bbox_to_anchor=(1.25, 1))
Out[10]: