ACP


In [ ]:
%matplotlib inline

In [ ]:
import numpy as np
from sklearn.preprocessing import StandardScaler

mu = np.zeros(2)
cov = np.array([[1.07, 0.63],
                [0.63, 0.64]])
print("Cov:", cov)

# Check eigen vectors #################################

theorical_eigen_val, theorical_eigen_vect = np.linalg.eig(cov)

print("Theorical eigen vectors:", theorical_eigen_vect)
print("Theorical eigen values:", theorical_eigen_val)

# Make samples ########################################

X = np.random.multivariate_normal(mean=mu, cov=cov, size=[100])
#print("X:", X)

# Standardizing data ##################################

# TODO: explain why it's required...
X = StandardScaler().fit_transform(X)

# Compute the covariance matrix #######################

empirical_cov = np.cov(X.T)
print("Empirical cov:", empirical_cov)

# Compute eigen vectors ###############################

empirical_eigen_val, empirical_eigen_vect = np.linalg.eig(empirical_cov)
print("Empirical eigen vectors:", empirical_eigen_vect)
print("Empirical eigen values:", empirical_eigen_val)

# Project data in the new space #######################

projected_data = np.dot(X, empirical_eigen_vect)

# Plot ################################################

fig, ax = plt.subplots(figsize=(8, 8))

ax.plot(*X.T, '.r')

p1x = X[:,0].min()
p2x = X[:,0].max()

for axis in empirical_eigen_vect:
    start, end = mu, mu + projected_data.std(axis=0).mean() * axis
    ax.annotate(
        '', xy=end, xycoords='data',
        xytext=start, textcoords='data',
        arrowprops=dict(facecolor='red', width=2.0))

ex, ey = empirical_eigen_vect[:,0]

p1y = ex * p1x / ey
p2y = ex * p2x / ey

ax.plot([p1x, p2x], [p1y, p2y], '-b')

ex, ey = empirical_eigen_vect[:,1]

p1y = ex * p1x / ey
p2y = ex * p2x / ey

ax.plot([p1x, p2x], [p1y, p2y], ':b')

ax.axis('equal');

In [ ]:
plt.plot(projected_data, '.r')